FScanpy-package/example_plot_prediction.py

362 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
FScanpy 序列预测绘图示例
展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果
包含集成权重参数的使用示例
"""
import matplotlib.pyplot as plt
import os
from FScanpy import plot_prf_prediction, PRFPredictor
def example_basic_plotting():
"""基础绘图示例"""
print("=" * 50)
print("基础绘图示例")
print("=" * 50)
# 示例序列(可以替换为您的实际序列)
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 使用默认参数绘图 (0.4:0.6 集成权重比例)
results, fig = plot_prf_prediction(
sequence=example_sequence,
title="示例序列的移码概率预测 (默认集成权重 4:6)"
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6")
# 显示图片
plt.show()
return results, fig
except Exception as e:
print(f"绘图过程中出错: {str(e)}")
return None, None
def example_custom_ensemble_weights():
"""自定义集成权重示例"""
print("=" * 50)
print("自定义集成权重绘图示例")
print("=" * 50)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
)
# 测试不同的集成权重比例
weight_configs = [
(0.2, "Long模型主导 (2:8)"),
(0.5, "等权重组合 (5:5)"),
(0.7, "Short模型主导 (7:3)")
]
for ensemble_weight, description in weight_configs:
print(f"\n测试集成权重配置: {description}")
try:
results, fig = plot_prf_prediction(
sequence=example_sequence,
ensemble_weight=ensemble_weight,
title=f"移码概率预测 - {description}",
figsize=(14, 7)
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
# 显示统计信息
print("预测统计信息:")
print(f" Short模型平均概率: {results['Short_Probability'].mean():.3f}")
print(f" Long模型平均概率: {results['Long_Probability'].mean():.3f}")
print(f" 集成平均概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}")
plt.show()
except Exception as e:
print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}")
def example_ensemble_comparison():
"""集成权重对比示例"""
print("=" * 50)
print("集成权重对比绘图示例")
print("=" * 50)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 创建预测器实例
predictor = PRFPredictor()
# 测试三种不同集成权重
weights = [0.3, 0.4, 0.6]
weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"]
# 创建对比图
fig, axes = plt.subplots(3, 1, figsize=(15, 12))
fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16)
all_results = []
for i, (weight, name) in enumerate(zip(weights, weight_names)):
# 获取预测结果
results = predictor.predict_sequence(
sequence=example_sequence,
ensemble_weight=weight
)
all_results.append(results)
# 绘制条形图
ax = axes[i]
ax.bar(results['Position'], results['Ensemble_Probability'],
alpha=0.7, color=f'C{i}', width=2)
ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}')
ax.set_ylabel('概率')
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 1)
if i == len(weights) - 1:
ax.set_xlabel('序列位置')
plt.tight_layout()
plt.show()
# 打印对比统计
print("\n集成权重对比统计:")
for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)):
print(f"{name}:")
print(f" 平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 最大集成概率: {results['Ensemble_Probability'].max():.3f}")
print(f" 非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}")
return all_results, fig
except Exception as e:
print(f"集成权重对比时出错: {str(e)}")
return None, None
def example_save_plot():
"""保存图片示例"""
print("=" * 50)
print("保存图片示例")
print("=" * 50)
# 创建保存目录
save_dir = "prediction_plots"
os.makedirs(save_dir, exist_ok=True)
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
)
try:
# 保存不同集成权重配置的图片
weight_configs = [
(0.3, "long_dominant"),
(0.5, "equal_weight"),
(0.7, "short_dominant")
]
for ensemble_weight, file_suffix in weight_configs:
save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png")
results, fig = plot_prf_prediction(
sequence=example_sequence,
short_threshold=0.6,
long_threshold=0.75,
ensemble_weight=ensemble_weight,
title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})",
save_path=save_path,
dpi=300
)
print(f"图片已保存至: {save_path}")
# 不显示图片,直接关闭
plt.close(fig)
print("所有集成权重配置的图片都已保存完成")
return True
except Exception as e:
print(f"保存图片过程中出错: {str(e)}")
return False
def example_direct_predictor_usage():
"""直接使用PRFPredictor类的示例"""
print("=" * 50)
print("直接使用PRFPredictor类绘图示例")
print("=" * 50)
try:
# 直接创建预测器实例
predictor = PRFPredictor()
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
)
# 使用类方法绘图,展示自定义集成权重
results, fig = predictor.plot_sequence_prediction(
sequence=example_sequence,
short_threshold=0.65,
long_threshold=0.8,
ensemble_weight=0.3, # 自定义集成权重
title="使用PRFPredictor类的绘图结果 (集成权重 3:7)"
)
print(f"预测完成!共处理 {len(results)} 个位置")
print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}")
# 显示详细结果
print("\n前10个预测结果:")
columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability']
print(results[columns_to_show].head(10))
# 显示集成权重信息
if 'Ensemble_Weights' in results.columns:
print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}")
plt.show()
return results, fig
except Exception as e:
print(f"使用PRFPredictor类时出错: {str(e)}")
return None, None
def example_new_api_usage():
"""新API使用示例"""
print("=" * 50)
print("新API方法使用示例")
print("=" * 50)
try:
# 直接创建预测器实例
predictor = PRFPredictor()
# 示例序列
example_sequence = (
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
)
print("1. 使用新的 predict_sequence() 方法:")
results = predictor.predict_sequence(
sequence=example_sequence,
ensemble_weight=0.3
)
print(f" 序列预测完成: {len(results)} 个位置")
print(f" 主要输出字段: {[col for col in results.columns if 'Probability' in col]}")
print("\n2. 使用新的 predict_regions() 方法:")
# 模拟一些399bp区域序列
region_sequences = [example_sequence + "A" * (399 - len(example_sequence))]
region_results = predictor.predict_regions(
sequences=region_sequences,
ensemble_weight=0.4
)
print(f" 区域预测完成: {len(region_results)} 个序列")
print(f" 主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}")
# 显示统计
print("\n3. 结果统计:")
print(f" 序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
print(f" 区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}")
return results, region_results
except Exception as e:
print(f"新API使用时出错: {str(e)}")
return None, None
def main():
"""主函数"""
print("FScanpy 序列预测绘图功能演示")
print("=" * 60)
print("新功能:规范化的集成权重参数 (ensemble_weight)")
print("权重范围0.0 到 1.0 (对应 Short模型的权重Long模型权重 = 1 - ensemble_weight)")
print("新命名Ensemble_Probability 替代 Voting_Probability")
print("=" * 60)
examples = [
("1. 基础绘图示例", example_basic_plotting),
("2. 自定义集成权重示例", example_custom_ensemble_weights),
("3. 集成权重对比示例", example_ensemble_comparison),
("4. 保存图片示例", example_save_plot),
("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage),
("6. 新API方法使用示例", example_new_api_usage)
]
for name, func in examples:
print(f"\n{name}")
try:
result = func()
if result is not None and result != False:
print("✓ 示例执行成功")
else:
print("✗ 示例执行失败")
except Exception as e:
print(f"✗ 示例执行出错: {str(e)}")
print("-" * 50)
print("\n演示完成!")
print("\n📊 新功能总结:")
print("1. plot_prf_prediction(): 便捷的绘图函数")
print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图")
print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测替代predict_full")
print("4. PRFPredictor.predict_regions(): 区域批量预测替代predict_region")
print("5. 支持自定义阈值、标题、保存路径等参数")
print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例")
print("\n⚖️ 集成权重示例:")
print(" - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)")
print(" - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)")
print(" - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)")
print(" - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)")
print("\n📂 输出字段:")
print(" - Short_Probability: Short模型(HistGB)预测概率")
print(" - Long_Probability: Long模型(BiLSTM-CNN)预测概率")
print(" - Ensemble_Probability: 集成预测概率(主要结果)")
print(" - Ensemble_Weights: 权重配置信息")
print(" - Short_Sequence: 33bp序列")
print(" - Long_Sequence: 399bp序列")
print("7. 自动保存PNG和PDF两种格式")
if __name__ == "__main__":
main()