Compare commits

...

2 Commits

4 changed files with 308 additions and 593 deletions

File diff suppressed because one or more lines are too long

View File

@ -267,9 +267,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
## 🆘 Support ## 🆘 Support
- **Issues**: [GitHub Issues](https://github.com/your-org/FScanpy/issues)
- **Documentation**: [Tutorial](tutorial/tutorial.md) - **Documentation**: [Tutorial](tutorial/tutorial.md)
- **Examples**: [Demo Notebook](FScanpy_Demo.ipynb) - **Usage Example**: [Demo Notebook](FScanpy_Demo.ipynb)
- **Predict Result Explain**: [Predict Result Explain](tutorial/predict_sample.ipynb)
## 🏗️ Dependencies ## 🏗️ Dependencies

View File

@ -1,362 +0,0 @@
#!/usr/bin/env python3
"""
FScanpy 序列预测绘图示例
展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果
包含集成权重参数的使用示例
"""
import matplotlib.pyplot as plt
import os
from FScanpy import plot_prf_prediction, PRFPredictor
def example_basic_plotting():
"""基础绘图示例"""
print("=" * 50)
print("基础绘图示例")
print("=" * 50)
# 示例序列(可以替换为您的实际序列)
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 使用默认参数绘图 (0.4:0.6 集成权重比例)
results, fig = plot_prf_prediction(
sequence=example_sequence,
title="示例序列的移码概率预测 (默认集成权重 4:6)"
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6")
# 显示图片
plt.show()
return results, fig
except Exception as e:
print(f"绘图过程中出错: {str(e)}")
return None, None
def example_custom_ensemble_weights():
"""自定义集成权重示例"""
print("=" * 50)
print("自定义集成权重绘图示例")
print("=" * 50)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
)
# 测试不同的集成权重比例
weight_configs = [
(0.2, "Long模型主导 (2:8)"),
(0.5, "等权重组合 (5:5)"),
(0.7, "Short模型主导 (7:3)")
]
for ensemble_weight, description in weight_configs:
print(f"\n测试集成权重配置: {description}")
try:
results, fig = plot_prf_prediction(
sequence=example_sequence,
ensemble_weight=ensemble_weight,
title=f"移码概率预测 - {description}",
figsize=(14, 7)
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
# 显示统计信息
print("预测统计信息:")
print(f" Short模型平均概率: {results['Short_Probability'].mean():.3f}")
print(f" Long模型平均概率: {results['Long_Probability'].mean():.3f}")
print(f" 集成平均概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}")
plt.show()
except Exception as e:
print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}")
def example_ensemble_comparison():
"""集成权重对比示例"""
print("=" * 50)
print("集成权重对比绘图示例")
print("=" * 50)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 创建预测器实例
predictor = PRFPredictor()
# 测试三种不同集成权重
weights = [0.3, 0.4, 0.6]
weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"]
# 创建对比图
fig, axes = plt.subplots(3, 1, figsize=(15, 12))
fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16)
all_results = []
for i, (weight, name) in enumerate(zip(weights, weight_names)):
# 获取预测结果
results = predictor.predict_sequence(
sequence=example_sequence,
ensemble_weight=weight
)
all_results.append(results)
# 绘制条形图
ax = axes[i]
ax.bar(results['Position'], results['Ensemble_Probability'],
alpha=0.7, color=f'C{i}', width=2)
ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}')
ax.set_ylabel('概率')
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 1)
if i == len(weights) - 1:
ax.set_xlabel('序列位置')
plt.tight_layout()
plt.show()
# 打印对比统计
print("\n集成权重对比统计:")
for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)):
print(f"{name}:")
print(f" 平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 最大集成概率: {results['Ensemble_Probability'].max():.3f}")
print(f" 非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}")
return all_results, fig
except Exception as e:
print(f"集成权重对比时出错: {str(e)}")
return None, None
def example_save_plot():
"""保存图片示例"""
print("=" * 50)
print("保存图片示例")
print("=" * 50)
# 创建保存目录
save_dir = "prediction_plots"
os.makedirs(save_dir, exist_ok=True)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 保存不同集成权重配置的图片
weight_configs = [
(0.3, "long_dominant"),
(0.5, "equal_weight"),
(0.7, "short_dominant")
]
for ensemble_weight, file_suffix in weight_configs:
save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png")
results, fig = plot_prf_prediction(
sequence=example_sequence,
short_threshold=0.6,
long_threshold=0.75,
ensemble_weight=ensemble_weight,
title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})",
save_path=save_path,
dpi=300
)
print(f"图片已保存至: {save_path}")
# 不显示图片,直接关闭
plt.close(fig)
print("所有集成权重配置的图片都已保存完成")
return True
except Exception as e:
print(f"保存图片过程中出错: {str(e)}")
return False
def example_direct_predictor_usage():
"""直接使用PRFPredictor类的示例"""
print("=" * 50)
print("直接使用PRFPredictor类绘图示例")
print("=" * 50)
try:
# 直接创建预测器实例
predictor = PRFPredictor()
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
)
# 使用类方法绘图,展示自定义集成权重
results, fig = predictor.plot_sequence_prediction(
sequence=example_sequence,
short_threshold=0.65,
long_threshold=0.8,
ensemble_weight=0.3, # 自定义集成权重
title="使用PRFPredictor类的绘图结果 (集成权重 3:7)"
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}")
# 显示详细结果
print("\n前10个预测结果:")
columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability']
print(results[columns_to_show].head(10))
# 显示集成权重信息
if 'Ensemble_Weights' in results.columns:
print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}")
plt.show()
return results, fig
except Exception as e:
print(f"使用PRFPredictor类时出错: {str(e)}")
return None, None
def example_new_api_usage():
"""新API使用示例"""
print("=" * 50)
print("新API方法使用示例")
print("=" * 50)
try:
# 直接创建预测器实例
predictor = PRFPredictor()
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
)
print("1. 使用新的 predict_sequence() 方法:")
results = predictor.predict_sequence(
sequence=example_sequence,
ensemble_weight=0.3
)
print(f" 序列预测完成: {len(results)} 个位置")
print(f" 主要输出字段: {[col for col in results.columns if 'Probability' in col]}")
print("\n2. 使用新的 predict_regions() 方法:")
# 模拟一些399bp区域序列
region_sequences = [example_sequence + "A" * (399 - len(example_sequence))]
region_results = predictor.predict_regions(
sequences=region_sequences,
ensemble_weight=0.4
)
print(f" 区域预测完成: {len(region_results)} 个序列")
print(f" 主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}")
# 显示统计
print("\n3. 结果统计:")
print(f" 序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}")
return results, region_results
except Exception as e:
print(f"新API使用时出错: {str(e)}")
return None, None
def main():
"""主函数"""
print("FScanpy 序列预测绘图功能演示")
print("=" * 60)
print("新功能:规范化的集成权重参数 (ensemble_weight)")
print("权重范围0.0 到 1.0 (对应 Short模型的权重Long模型权重 = 1 - ensemble_weight)")
print("新命名Ensemble_Probability 替代 Voting_Probability")
print("=" * 60)
examples = [
("1. 基础绘图示例", example_basic_plotting),
("2. 自定义集成权重示例", example_custom_ensemble_weights),
("3. 集成权重对比示例", example_ensemble_comparison),
("4. 保存图片示例", example_save_plot),
("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage),
("6. 新API方法使用示例", example_new_api_usage)
]
for name, func in examples:
print(f"\n{name}")
try:
result = func()
if result is not None and result != False:
print("✓ 示例执行成功")
else:
print("✗ 示例执行失败")
except Exception as e:
print(f"✗ 示例执行出错: {str(e)}")
print("-" * 50)
print("\n演示完成!")
print("\n📊 新功能总结:")
print("1. plot_prf_prediction(): 便捷的绘图函数")
print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图")
print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测替代predict_full")
print("4. PRFPredictor.predict_regions(): 区域批量预测替代predict_region")
print("5. 支持自定义阈值、标题、保存路径等参数")
print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例")
print("\n⚖️ 集成权重示例:")
print(" - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)")
print(" - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)")
print(" - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)")
print(" - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)")
print("\n📂 输出字段:")
print(" - Short_Probability: Short模型(HistGB)预测概率")
print(" - Long_Probability: Long模型(BiLSTM-CNN)预测概率")
print(" - Ensemble_Probability: 集成预测概率(主要结果)")
print(" - Ensemble_Weights: 权重配置信息")
print(" - Short_Sequence: 33bp序列")
print(" - Long_Sequence: 399bp序列")
print("7. 自动保存PNG和PDF两种格式")
if __name__ == "__main__":
main()

File diff suppressed because one or more lines are too long