Compare commits
	
		
			No commits in common. "96b61d34d822b481dab9debb2a455dfae95884f0" and "96ae7ace4bce79a24e1d77f65d6a962f3d099639" have entirely different histories.
		
	
	
		
			96b61d34d8
			...
			96ae7ace4b
		
	
		
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -267,9 +267,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file | ||||||
| 
 | 
 | ||||||
| ## 🆘 Support | ## 🆘 Support | ||||||
| 
 | 
 | ||||||
|  | - **Issues**: [GitHub Issues](https://github.com/your-org/FScanpy/issues) | ||||||
| - **Documentation**: [Tutorial](tutorial/tutorial.md) | - **Documentation**: [Tutorial](tutorial/tutorial.md) | ||||||
| - **Usage Example**: [Demo Notebook](FScanpy_Demo.ipynb) | - **Examples**: [Demo Notebook](FScanpy_Demo.ipynb) | ||||||
| - **Predict Result Explain**: [Predict Result Explain](tutorial/predict_sample.ipynb) |  | ||||||
| 
 | 
 | ||||||
| ## 🏗️ Dependencies | ## 🏗️ Dependencies | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,362 @@ | ||||||
|  | #!/usr/bin/env python3 | ||||||
|  | """ | ||||||
|  | FScanpy 序列预测绘图示例 | ||||||
|  | 
 | ||||||
|  | 展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果 | ||||||
|  | 包含集成权重参数的使用示例 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | import matplotlib.pyplot as plt | ||||||
|  | import os | ||||||
|  | from FScanpy import plot_prf_prediction, PRFPredictor | ||||||
|  | 
 | ||||||
|  | def example_basic_plotting(): | ||||||
|  |     """基础绘图示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("基础绘图示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     # 示例序列(可以替换为您的实际序列) | ||||||
|  |     example_sequence = ( | ||||||
|  |         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |     try: | ||||||
|  |         # 使用默认参数绘图 (0.4:0.6 集成权重比例) | ||||||
|  |         results, fig = plot_prf_prediction( | ||||||
|  |             sequence=example_sequence, | ||||||
|  |             title="示例序列的移码概率预测 (默认集成权重 4:6)" | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         print(f"预测完成!共处理 {len(results)} 个位置") | ||||||
|  |         print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") | ||||||
|  |         print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6") | ||||||
|  |          | ||||||
|  |         # 显示图片 | ||||||
|  |         plt.show() | ||||||
|  |          | ||||||
|  |         return results, fig | ||||||
|  |          | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"绘图过程中出错: {str(e)}") | ||||||
|  |         return None, None | ||||||
|  | 
 | ||||||
|  | def example_custom_ensemble_weights(): | ||||||
|  |     """自定义集成权重示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("自定义集成权重绘图示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     # 示例序列 | ||||||
|  |     example_sequence = ( | ||||||
|  |         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |     # 测试不同的集成权重比例 | ||||||
|  |     weight_configs = [ | ||||||
|  |         (0.2, "Long模型主导 (2:8)"), | ||||||
|  |         (0.5, "等权重组合 (5:5)"), | ||||||
|  |         (0.7, "Short模型主导 (7:3)") | ||||||
|  |     ] | ||||||
|  |      | ||||||
|  |     for ensemble_weight, description in weight_configs: | ||||||
|  |         print(f"\n测试集成权重配置: {description}") | ||||||
|  |         try: | ||||||
|  |             results, fig = plot_prf_prediction( | ||||||
|  |                 sequence=example_sequence, | ||||||
|  |                 ensemble_weight=ensemble_weight, | ||||||
|  |                 title=f"移码概率预测 - {description}", | ||||||
|  |                 figsize=(14, 7) | ||||||
|  |             ) | ||||||
|  |              | ||||||
|  |             print(f"预测完成!共处理 {len(results)} 个位置") | ||||||
|  |             print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") | ||||||
|  |              | ||||||
|  |             # 显示统计信息 | ||||||
|  |             print("预测统计信息:") | ||||||
|  |             print(f"  Short模型平均概率: {results['Short_Probability'].mean():.3f}") | ||||||
|  |             print(f"  Long模型平均概率: {results['Long_Probability'].mean():.3f}") | ||||||
|  |             print(f"  集成平均概率: {results['Ensemble_Probability'].mean():.3f}") | ||||||
|  |             print(f"  集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}") | ||||||
|  |              | ||||||
|  |             plt.show() | ||||||
|  |              | ||||||
|  |         except Exception as e: | ||||||
|  |             print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}") | ||||||
|  | 
 | ||||||
|  | def example_ensemble_comparison(): | ||||||
|  |     """集成权重对比示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("集成权重对比绘图示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     # 示例序列 | ||||||
|  |     example_sequence = ( | ||||||
|  |         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |     try: | ||||||
|  |         # 创建预测器实例 | ||||||
|  |         predictor = PRFPredictor() | ||||||
|  |          | ||||||
|  |         # 测试三种不同集成权重 | ||||||
|  |         weights = [0.3, 0.4, 0.6] | ||||||
|  |         weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"] | ||||||
|  |          | ||||||
|  |         # 创建对比图 | ||||||
|  |         fig, axes = plt.subplots(3, 1, figsize=(15, 12)) | ||||||
|  |         fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16) | ||||||
|  |          | ||||||
|  |         all_results = [] | ||||||
|  |          | ||||||
|  |         for i, (weight, name) in enumerate(zip(weights, weight_names)): | ||||||
|  |             # 获取预测结果 | ||||||
|  |             results = predictor.predict_sequence( | ||||||
|  |                 sequence=example_sequence, | ||||||
|  |                 ensemble_weight=weight | ||||||
|  |             ) | ||||||
|  |             all_results.append(results) | ||||||
|  |              | ||||||
|  |             # 绘制条形图 | ||||||
|  |             ax = axes[i] | ||||||
|  |             ax.bar(results['Position'], results['Ensemble_Probability'],  | ||||||
|  |                   alpha=0.7, color=f'C{i}', width=2) | ||||||
|  |             ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}') | ||||||
|  |             ax.set_ylabel('概率') | ||||||
|  |             ax.grid(True, alpha=0.3) | ||||||
|  |             ax.set_ylim(0, 1) | ||||||
|  |              | ||||||
|  |             if i == len(weights) - 1: | ||||||
|  |                 ax.set_xlabel('序列位置') | ||||||
|  |          | ||||||
|  |         plt.tight_layout() | ||||||
|  |         plt.show() | ||||||
|  |          | ||||||
|  |         # 打印对比统计 | ||||||
|  |         print("\n集成权重对比统计:") | ||||||
|  |         for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)): | ||||||
|  |             print(f"{name}:") | ||||||
|  |             print(f"  平均集成概率: {results['Ensemble_Probability'].mean():.3f}") | ||||||
|  |             print(f"  最大集成概率: {results['Ensemble_Probability'].max():.3f}") | ||||||
|  |             print(f"  非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}") | ||||||
|  |          | ||||||
|  |         return all_results, fig | ||||||
|  |          | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"集成权重对比时出错: {str(e)}") | ||||||
|  |         return None, None | ||||||
|  | 
 | ||||||
|  | def example_save_plot(): | ||||||
|  |     """保存图片示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("保存图片示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     # 创建保存目录 | ||||||
|  |     save_dir = "prediction_plots" | ||||||
|  |     os.makedirs(save_dir, exist_ok=True) | ||||||
|  |      | ||||||
|  |     # 示例序列 | ||||||
|  |     example_sequence = ( | ||||||
|  |         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |     try: | ||||||
|  |         # 保存不同集成权重配置的图片 | ||||||
|  |         weight_configs = [ | ||||||
|  |             (0.3, "long_dominant"), | ||||||
|  |             (0.5, "equal_weight"), | ||||||
|  |             (0.7, "short_dominant") | ||||||
|  |         ] | ||||||
|  |          | ||||||
|  |         for ensemble_weight, file_suffix in weight_configs: | ||||||
|  |             save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png") | ||||||
|  |             results, fig = plot_prf_prediction( | ||||||
|  |                 sequence=example_sequence, | ||||||
|  |                 short_threshold=0.6, | ||||||
|  |                 long_threshold=0.75, | ||||||
|  |                 ensemble_weight=ensemble_weight, | ||||||
|  |                 title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})", | ||||||
|  |                 save_path=save_path, | ||||||
|  |                 dpi=300 | ||||||
|  |             ) | ||||||
|  |              | ||||||
|  |             print(f"图片已保存至: {save_path}") | ||||||
|  |              | ||||||
|  |             # 不显示图片,直接关闭 | ||||||
|  |             plt.close(fig) | ||||||
|  |          | ||||||
|  |         print("所有集成权重配置的图片都已保存完成") | ||||||
|  |         return True | ||||||
|  |          | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"保存图片过程中出错: {str(e)}") | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  | def example_direct_predictor_usage(): | ||||||
|  |     """直接使用PRFPredictor类的示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("直接使用PRFPredictor类绘图示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     try: | ||||||
|  |         # 直接创建预测器实例 | ||||||
|  |         predictor = PRFPredictor() | ||||||
|  |          | ||||||
|  |         # 示例序列 | ||||||
|  |         example_sequence = ( | ||||||
|  |             "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |             "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |             "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         # 使用类方法绘图,展示自定义集成权重 | ||||||
|  |         results, fig = predictor.plot_sequence_prediction( | ||||||
|  |             sequence=example_sequence, | ||||||
|  |             short_threshold=0.65, | ||||||
|  |             long_threshold=0.8, | ||||||
|  |             ensemble_weight=0.3,  # 自定义集成权重 | ||||||
|  |             title="使用PRFPredictor类的绘图结果 (集成权重 3:7)" | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         print(f"预测完成!共处理 {len(results)} 个位置") | ||||||
|  |         print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}") | ||||||
|  |          | ||||||
|  |         # 显示详细结果 | ||||||
|  |         print("\n前10个预测结果:") | ||||||
|  |         columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability'] | ||||||
|  |         print(results[columns_to_show].head(10)) | ||||||
|  |          | ||||||
|  |         # 显示集成权重信息 | ||||||
|  |         if 'Ensemble_Weights' in results.columns: | ||||||
|  |             print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}") | ||||||
|  |          | ||||||
|  |         plt.show() | ||||||
|  |          | ||||||
|  |         return results, fig | ||||||
|  |          | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"使用PRFPredictor类时出错: {str(e)}") | ||||||
|  |         return None, None | ||||||
|  | 
 | ||||||
|  | def example_new_api_usage(): | ||||||
|  |     """新API使用示例""" | ||||||
|  |     print("=" * 50) | ||||||
|  |     print("新API方法使用示例") | ||||||
|  |     print("=" * 50) | ||||||
|  |      | ||||||
|  |     try: | ||||||
|  |         # 直接创建预测器实例 | ||||||
|  |         predictor = PRFPredictor() | ||||||
|  |          | ||||||
|  |         # 示例序列 | ||||||
|  |         example_sequence = ( | ||||||
|  |             "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" | ||||||
|  |             "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         print("1. 使用新的 predict_sequence() 方法:") | ||||||
|  |         results = predictor.predict_sequence( | ||||||
|  |             sequence=example_sequence, | ||||||
|  |             ensemble_weight=0.3 | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         print(f"   序列预测完成: {len(results)} 个位置") | ||||||
|  |         print(f"   主要输出字段: {[col for col in results.columns if 'Probability' in col]}") | ||||||
|  |          | ||||||
|  |         print("\n2. 使用新的 predict_regions() 方法:") | ||||||
|  |         # 模拟一些399bp区域序列 | ||||||
|  |         region_sequences = [example_sequence + "A" * (399 - len(example_sequence))] | ||||||
|  |         region_results = predictor.predict_regions( | ||||||
|  |             sequences=region_sequences, | ||||||
|  |             ensemble_weight=0.4 | ||||||
|  |         ) | ||||||
|  |          | ||||||
|  |         print(f"   区域预测完成: {len(region_results)} 个序列") | ||||||
|  |         print(f"   主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}") | ||||||
|  |          | ||||||
|  |         # 显示统计 | ||||||
|  |         print("\n3. 结果统计:") | ||||||
|  |         print(f"   序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}") | ||||||
|  |         print(f"   区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}") | ||||||
|  |          | ||||||
|  |         return results, region_results | ||||||
|  |          | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"新API使用时出错: {str(e)}") | ||||||
|  |         return None, None | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     """主函数""" | ||||||
|  |     print("FScanpy 序列预测绘图功能演示") | ||||||
|  |     print("=" * 60) | ||||||
|  |     print("新功能:规范化的集成权重参数 (ensemble_weight)") | ||||||
|  |     print("权重范围:0.0 到 1.0 (对应 Short模型的权重,Long模型权重 = 1 - ensemble_weight)") | ||||||
|  |     print("新命名:Ensemble_Probability 替代 Voting_Probability") | ||||||
|  |     print("=" * 60) | ||||||
|  |      | ||||||
|  |     examples = [ | ||||||
|  |         ("1. 基础绘图示例", example_basic_plotting), | ||||||
|  |         ("2. 自定义集成权重示例", example_custom_ensemble_weights), | ||||||
|  |         ("3. 集成权重对比示例", example_ensemble_comparison), | ||||||
|  |         ("4. 保存图片示例", example_save_plot), | ||||||
|  |         ("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage), | ||||||
|  |         ("6. 新API方法使用示例", example_new_api_usage) | ||||||
|  |     ] | ||||||
|  |      | ||||||
|  |     for name, func in examples: | ||||||
|  |         print(f"\n{name}") | ||||||
|  |         try: | ||||||
|  |             result = func() | ||||||
|  |             if result is not None and result != False: | ||||||
|  |                 print("✓ 示例执行成功") | ||||||
|  |             else: | ||||||
|  |                 print("✗ 示例执行失败") | ||||||
|  |         except Exception as e: | ||||||
|  |             print(f"✗ 示例执行出错: {str(e)}") | ||||||
|  |          | ||||||
|  |         print("-" * 50) | ||||||
|  |      | ||||||
|  |     print("\n演示完成!") | ||||||
|  |     print("\n📊 新功能总结:") | ||||||
|  |     print("1. plot_prf_prediction(): 便捷的绘图函数") | ||||||
|  |     print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图") | ||||||
|  |     print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测(替代predict_full)") | ||||||
|  |     print("4. PRFPredictor.predict_regions(): 区域批量预测(替代predict_region)") | ||||||
|  |     print("5. 支持自定义阈值、标题、保存路径等参数") | ||||||
|  |     print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例") | ||||||
|  |     print("\n⚖️ 集成权重示例:") | ||||||
|  |     print("   - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)") | ||||||
|  |     print("   - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)") | ||||||
|  |     print("   - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)") | ||||||
|  |     print("   - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)") | ||||||
|  |     print("\n📂 输出字段:") | ||||||
|  |     print("   - Short_Probability: Short模型(HistGB)预测概率") | ||||||
|  |     print("   - Long_Probability: Long模型(BiLSTM-CNN)预测概率") | ||||||
|  |     print("   - Ensemble_Probability: 集成预测概率(主要结果)") | ||||||
|  |     print("   - Ensemble_Weights: 权重配置信息") | ||||||
|  |     print("   - Short_Sequence: 33bp序列") | ||||||
|  |     print("   - Long_Sequence: 399bp序列") | ||||||
|  |     print("7. 自动保存PNG和PDF两种格式") | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main()  | ||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
		Reference in New Issue