#!/usr/bin/env python3 """ FScanpy 序列预测绘图示例 展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果 包含集成权重参数的使用示例 """ import matplotlib.pyplot as plt import os from FScanpy import plot_prf_prediction, PRFPredictor def example_basic_plotting(): """基础绘图示例""" print("=" * 50) print("基础绘图示例") print("=" * 50) # 示例序列(可以替换为您的实际序列) example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" ) try: # 使用默认参数绘图 (0.4:0.6 集成权重比例) results, fig = plot_prf_prediction( sequence=example_sequence, title="示例序列的移码概率预测 (默认集成权重 4:6)" ) print(f"预测完成!共处理 {len(results)} 个位置") print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6") # 显示图片 plt.show() return results, fig except Exception as e: print(f"绘图过程中出错: {str(e)}") return None, None def example_custom_ensemble_weights(): """自定义集成权重示例""" print("=" * 50) print("自定义集成权重绘图示例") print("=" * 50) # 示例序列 example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" ) # 测试不同的集成权重比例 weight_configs = [ (0.2, "Long模型主导 (2:8)"), (0.5, "等权重组合 (5:5)"), (0.7, "Short模型主导 (7:3)") ] for ensemble_weight, description in weight_configs: print(f"\n测试集成权重配置: {description}") try: results, fig = plot_prf_prediction( sequence=example_sequence, ensemble_weight=ensemble_weight, title=f"移码概率预测 - {description}", figsize=(14, 7) ) print(f"预测完成!共处理 {len(results)} 个位置") print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") # 显示统计信息 print("预测统计信息:") print(f" Short模型平均概率: {results['Short_Probability'].mean():.3f}") print(f" Long模型平均概率: {results['Long_Probability'].mean():.3f}") print(f" 集成平均概率: {results['Ensemble_Probability'].mean():.3f}") print(f" 集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}") plt.show() except Exception as e: print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}") def example_ensemble_comparison(): """集成权重对比示例""" print("=" * 50) print("集成权重对比绘图示例") print("=" * 50) # 示例序列 example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" ) try: # 创建预测器实例 predictor = PRFPredictor() # 测试三种不同集成权重 weights = [0.3, 0.4, 0.6] weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"] # 创建对比图 fig, axes = plt.subplots(3, 1, figsize=(15, 12)) fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16) all_results = [] for i, (weight, name) in enumerate(zip(weights, weight_names)): # 获取预测结果 results = predictor.predict_sequence( sequence=example_sequence, ensemble_weight=weight ) all_results.append(results) # 绘制条形图 ax = axes[i] ax.bar(results['Position'], results['Ensemble_Probability'], alpha=0.7, color=f'C{i}', width=2) ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}') ax.set_ylabel('概率') ax.grid(True, alpha=0.3) ax.set_ylim(0, 1) if i == len(weights) - 1: ax.set_xlabel('序列位置') plt.tight_layout() plt.show() # 打印对比统计 print("\n集成权重对比统计:") for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)): print(f"{name}:") print(f" 平均集成概率: {results['Ensemble_Probability'].mean():.3f}") print(f" 最大集成概率: {results['Ensemble_Probability'].max():.3f}") print(f" 非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}") return all_results, fig except Exception as e: print(f"集成权重对比时出错: {str(e)}") return None, None def example_save_plot(): """保存图片示例""" print("=" * 50) print("保存图片示例") print("=" * 50) # 创建保存目录 save_dir = "prediction_plots" os.makedirs(save_dir, exist_ok=True) # 示例序列 example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" ) try: # 保存不同集成权重配置的图片 weight_configs = [ (0.3, "long_dominant"), (0.5, "equal_weight"), (0.7, "short_dominant") ] for ensemble_weight, file_suffix in weight_configs: save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png") results, fig = plot_prf_prediction( sequence=example_sequence, short_threshold=0.6, long_threshold=0.75, ensemble_weight=ensemble_weight, title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})", save_path=save_path, dpi=300 ) print(f"图片已保存至: {save_path}") # 不显示图片,直接关闭 plt.close(fig) print("所有集成权重配置的图片都已保存完成") return True except Exception as e: print(f"保存图片过程中出错: {str(e)}") return False def example_direct_predictor_usage(): """直接使用PRFPredictor类的示例""" print("=" * 50) print("直接使用PRFPredictor类绘图示例") print("=" * 50) try: # 直接创建预测器实例 predictor = PRFPredictor() # 示例序列 example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" ) # 使用类方法绘图,展示自定义集成权重 results, fig = predictor.plot_sequence_prediction( sequence=example_sequence, short_threshold=0.65, long_threshold=0.8, ensemble_weight=0.3, # 自定义集成权重 title="使用PRFPredictor类的绘图结果 (集成权重 3:7)" ) print(f"预测完成!共处理 {len(results)} 个位置") print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}") # 显示详细结果 print("\n前10个预测结果:") columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability'] print(results[columns_to_show].head(10)) # 显示集成权重信息 if 'Ensemble_Weights' in results.columns: print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}") plt.show() return results, fig except Exception as e: print(f"使用PRFPredictor类时出错: {str(e)}") return None, None def example_new_api_usage(): """新API使用示例""" print("=" * 50) print("新API方法使用示例") print("=" * 50) try: # 直接创建预测器实例 predictor = PRFPredictor() # 示例序列 example_sequence = ( "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" ) print("1. 使用新的 predict_sequence() 方法:") results = predictor.predict_sequence( sequence=example_sequence, ensemble_weight=0.3 ) print(f" 序列预测完成: {len(results)} 个位置") print(f" 主要输出字段: {[col for col in results.columns if 'Probability' in col]}") print("\n2. 使用新的 predict_regions() 方法:") # 模拟一些399bp区域序列 region_sequences = [example_sequence + "A" * (399 - len(example_sequence))] region_results = predictor.predict_regions( sequences=region_sequences, ensemble_weight=0.4 ) print(f" 区域预测完成: {len(region_results)} 个序列") print(f" 主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}") # 显示统计 print("\n3. 结果统计:") print(f" 序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}") print(f" 区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}") return results, region_results except Exception as e: print(f"新API使用时出错: {str(e)}") return None, None def main(): """主函数""" print("FScanpy 序列预测绘图功能演示") print("=" * 60) print("新功能:规范化的集成权重参数 (ensemble_weight)") print("权重范围:0.0 到 1.0 (对应 Short模型的权重,Long模型权重 = 1 - ensemble_weight)") print("新命名:Ensemble_Probability 替代 Voting_Probability") print("=" * 60) examples = [ ("1. 基础绘图示例", example_basic_plotting), ("2. 自定义集成权重示例", example_custom_ensemble_weights), ("3. 集成权重对比示例", example_ensemble_comparison), ("4. 保存图片示例", example_save_plot), ("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage), ("6. 新API方法使用示例", example_new_api_usage) ] for name, func in examples: print(f"\n{name}") try: result = func() if result is not None and result != False: print("✓ 示例执行成功") else: print("✗ 示例执行失败") except Exception as e: print(f"✗ 示例执行出错: {str(e)}") print("-" * 50) print("\n演示完成!") print("\n📊 新功能总结:") print("1. plot_prf_prediction(): 便捷的绘图函数") print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图") print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测(替代predict_full)") print("4. PRFPredictor.predict_regions(): 区域批量预测(替代predict_region)") print("5. 支持自定义阈值、标题、保存路径等参数") print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例") print("\n⚖️ 集成权重示例:") print(" - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)") print(" - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)") print(" - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)") print(" - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)") print("\n📂 输出字段:") print(" - Short_Probability: Short模型(HistGB)预测概率") print(" - Long_Probability: Long模型(BiLSTM-CNN)预测概率") print(" - Ensemble_Probability: 集成预测概率(主要结果)") print(" - Ensemble_Weights: 权重配置信息") print(" - Short_Sequence: 33bp序列") print(" - Long_Sequence: 399bp序列") print("7. 自动保存PNG和PDF两种格式") if __name__ == "__main__": main()