Compare commits
	
		
			2 Commits
		
	
	
		
			96ae7ace4b
			...
			96b61d34d8
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 96b61d34d8 | |
|  | cce177429c | 
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -267,9 +267,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file | ||||||
| 
 | 
 | ||||||
| ## 🆘 Support | ## 🆘 Support | ||||||
| 
 | 
 | ||||||
| - **Issues**: [GitHub Issues](https://github.com/your-org/FScanpy/issues) |  | ||||||
| - **Documentation**: [Tutorial](tutorial/tutorial.md) | - **Documentation**: [Tutorial](tutorial/tutorial.md) | ||||||
| - **Examples**: [Demo Notebook](FScanpy_Demo.ipynb) | - **Usage Example**: [Demo Notebook](FScanpy_Demo.ipynb) | ||||||
|  | - **Predict Result Explain**: [Predict Result Explain](tutorial/predict_sample.ipynb) | ||||||
| 
 | 
 | ||||||
| ## 🏗️ Dependencies | ## 🏗️ Dependencies | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,362 +0,0 @@ | ||||||
| #!/usr/bin/env python3 |  | ||||||
| """ |  | ||||||
| FScanpy 序列预测绘图示例 |  | ||||||
| 
 |  | ||||||
| 展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果 |  | ||||||
| 包含集成权重参数的使用示例 |  | ||||||
| """ |  | ||||||
| 
 |  | ||||||
| import matplotlib.pyplot as plt |  | ||||||
| import os |  | ||||||
| from FScanpy import plot_prf_prediction, PRFPredictor |  | ||||||
| 
 |  | ||||||
| def example_basic_plotting(): |  | ||||||
|     """基础绘图示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("基础绘图示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     # 示例序列(可以替换为您的实际序列) |  | ||||||
|     example_sequence = ( |  | ||||||
|         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         # 使用默认参数绘图 (0.4:0.6 集成权重比例) |  | ||||||
|         results, fig = plot_prf_prediction( |  | ||||||
|             sequence=example_sequence, |  | ||||||
|             title="示例序列的移码概率预测 (默认集成权重 4:6)" |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         print(f"预测完成!共处理 {len(results)} 个位置") |  | ||||||
|         print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") |  | ||||||
|         print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6") |  | ||||||
|          |  | ||||||
|         # 显示图片 |  | ||||||
|         plt.show() |  | ||||||
|          |  | ||||||
|         return results, fig |  | ||||||
|          |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"绘图过程中出错: {str(e)}") |  | ||||||
|         return None, None |  | ||||||
| 
 |  | ||||||
| def example_custom_ensemble_weights(): |  | ||||||
|     """自定义集成权重示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("自定义集成权重绘图示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     # 示例序列 |  | ||||||
|     example_sequence = ( |  | ||||||
|         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     # 测试不同的集成权重比例 |  | ||||||
|     weight_configs = [ |  | ||||||
|         (0.2, "Long模型主导 (2:8)"), |  | ||||||
|         (0.5, "等权重组合 (5:5)"), |  | ||||||
|         (0.7, "Short模型主导 (7:3)") |  | ||||||
|     ] |  | ||||||
|      |  | ||||||
|     for ensemble_weight, description in weight_configs: |  | ||||||
|         print(f"\n测试集成权重配置: {description}") |  | ||||||
|         try: |  | ||||||
|             results, fig = plot_prf_prediction( |  | ||||||
|                 sequence=example_sequence, |  | ||||||
|                 ensemble_weight=ensemble_weight, |  | ||||||
|                 title=f"移码概率预测 - {description}", |  | ||||||
|                 figsize=(14, 7) |  | ||||||
|             ) |  | ||||||
|              |  | ||||||
|             print(f"预测完成!共处理 {len(results)} 个位置") |  | ||||||
|             print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}") |  | ||||||
|              |  | ||||||
|             # 显示统计信息 |  | ||||||
|             print("预测统计信息:") |  | ||||||
|             print(f"  Short模型平均概率: {results['Short_Probability'].mean():.3f}") |  | ||||||
|             print(f"  Long模型平均概率: {results['Long_Probability'].mean():.3f}") |  | ||||||
|             print(f"  集成平均概率: {results['Ensemble_Probability'].mean():.3f}") |  | ||||||
|             print(f"  集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}") |  | ||||||
|              |  | ||||||
|             plt.show() |  | ||||||
|              |  | ||||||
|         except Exception as e: |  | ||||||
|             print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}") |  | ||||||
| 
 |  | ||||||
| def example_ensemble_comparison(): |  | ||||||
|     """集成权重对比示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("集成权重对比绘图示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     # 示例序列 |  | ||||||
|     example_sequence = ( |  | ||||||
|         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         # 创建预测器实例 |  | ||||||
|         predictor = PRFPredictor() |  | ||||||
|          |  | ||||||
|         # 测试三种不同集成权重 |  | ||||||
|         weights = [0.3, 0.4, 0.6] |  | ||||||
|         weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"] |  | ||||||
|          |  | ||||||
|         # 创建对比图 |  | ||||||
|         fig, axes = plt.subplots(3, 1, figsize=(15, 12)) |  | ||||||
|         fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16) |  | ||||||
|          |  | ||||||
|         all_results = [] |  | ||||||
|          |  | ||||||
|         for i, (weight, name) in enumerate(zip(weights, weight_names)): |  | ||||||
|             # 获取预测结果 |  | ||||||
|             results = predictor.predict_sequence( |  | ||||||
|                 sequence=example_sequence, |  | ||||||
|                 ensemble_weight=weight |  | ||||||
|             ) |  | ||||||
|             all_results.append(results) |  | ||||||
|              |  | ||||||
|             # 绘制条形图 |  | ||||||
|             ax = axes[i] |  | ||||||
|             ax.bar(results['Position'], results['Ensemble_Probability'],  |  | ||||||
|                   alpha=0.7, color=f'C{i}', width=2) |  | ||||||
|             ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}') |  | ||||||
|             ax.set_ylabel('概率') |  | ||||||
|             ax.grid(True, alpha=0.3) |  | ||||||
|             ax.set_ylim(0, 1) |  | ||||||
|              |  | ||||||
|             if i == len(weights) - 1: |  | ||||||
|                 ax.set_xlabel('序列位置') |  | ||||||
|          |  | ||||||
|         plt.tight_layout() |  | ||||||
|         plt.show() |  | ||||||
|          |  | ||||||
|         # 打印对比统计 |  | ||||||
|         print("\n集成权重对比统计:") |  | ||||||
|         for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)): |  | ||||||
|             print(f"{name}:") |  | ||||||
|             print(f"  平均集成概率: {results['Ensemble_Probability'].mean():.3f}") |  | ||||||
|             print(f"  最大集成概率: {results['Ensemble_Probability'].max():.3f}") |  | ||||||
|             print(f"  非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}") |  | ||||||
|          |  | ||||||
|         return all_results, fig |  | ||||||
|          |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"集成权重对比时出错: {str(e)}") |  | ||||||
|         return None, None |  | ||||||
| 
 |  | ||||||
| def example_save_plot(): |  | ||||||
|     """保存图片示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("保存图片示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     # 创建保存目录 |  | ||||||
|     save_dir = "prediction_plots" |  | ||||||
|     os.makedirs(save_dir, exist_ok=True) |  | ||||||
|      |  | ||||||
|     # 示例序列 |  | ||||||
|     example_sequence = ( |  | ||||||
|         "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|         "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         # 保存不同集成权重配置的图片 |  | ||||||
|         weight_configs = [ |  | ||||||
|             (0.3, "long_dominant"), |  | ||||||
|             (0.5, "equal_weight"), |  | ||||||
|             (0.7, "short_dominant") |  | ||||||
|         ] |  | ||||||
|          |  | ||||||
|         for ensemble_weight, file_suffix in weight_configs: |  | ||||||
|             save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png") |  | ||||||
|             results, fig = plot_prf_prediction( |  | ||||||
|                 sequence=example_sequence, |  | ||||||
|                 short_threshold=0.6, |  | ||||||
|                 long_threshold=0.75, |  | ||||||
|                 ensemble_weight=ensemble_weight, |  | ||||||
|                 title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})", |  | ||||||
|                 save_path=save_path, |  | ||||||
|                 dpi=300 |  | ||||||
|             ) |  | ||||||
|              |  | ||||||
|             print(f"图片已保存至: {save_path}") |  | ||||||
|              |  | ||||||
|             # 不显示图片,直接关闭 |  | ||||||
|             plt.close(fig) |  | ||||||
|          |  | ||||||
|         print("所有集成权重配置的图片都已保存完成") |  | ||||||
|         return True |  | ||||||
|          |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"保存图片过程中出错: {str(e)}") |  | ||||||
|         return False |  | ||||||
| 
 |  | ||||||
| def example_direct_predictor_usage(): |  | ||||||
|     """直接使用PRFPredictor类的示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("直接使用PRFPredictor类绘图示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         # 直接创建预测器实例 |  | ||||||
|         predictor = PRFPredictor() |  | ||||||
|          |  | ||||||
|         # 示例序列 |  | ||||||
|         example_sequence = ( |  | ||||||
|             "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|             "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|             "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG" |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         # 使用类方法绘图,展示自定义集成权重 |  | ||||||
|         results, fig = predictor.plot_sequence_prediction( |  | ||||||
|             sequence=example_sequence, |  | ||||||
|             short_threshold=0.65, |  | ||||||
|             long_threshold=0.8, |  | ||||||
|             ensemble_weight=0.3,  # 自定义集成权重 |  | ||||||
|             title="使用PRFPredictor类的绘图结果 (集成权重 3:7)" |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         print(f"预测完成!共处理 {len(results)} 个位置") |  | ||||||
|         print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}") |  | ||||||
|          |  | ||||||
|         # 显示详细结果 |  | ||||||
|         print("\n前10个预测结果:") |  | ||||||
|         columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability'] |  | ||||||
|         print(results[columns_to_show].head(10)) |  | ||||||
|          |  | ||||||
|         # 显示集成权重信息 |  | ||||||
|         if 'Ensemble_Weights' in results.columns: |  | ||||||
|             print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}") |  | ||||||
|          |  | ||||||
|         plt.show() |  | ||||||
|          |  | ||||||
|         return results, fig |  | ||||||
|          |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"使用PRFPredictor类时出错: {str(e)}") |  | ||||||
|         return None, None |  | ||||||
| 
 |  | ||||||
| def example_new_api_usage(): |  | ||||||
|     """新API使用示例""" |  | ||||||
|     print("=" * 50) |  | ||||||
|     print("新API方法使用示例") |  | ||||||
|     print("=" * 50) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         # 直接创建预测器实例 |  | ||||||
|         predictor = PRFPredictor() |  | ||||||
|          |  | ||||||
|         # 示例序列 |  | ||||||
|         example_sequence = ( |  | ||||||
|             "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC" |  | ||||||
|             "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT" |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         print("1. 使用新的 predict_sequence() 方法:") |  | ||||||
|         results = predictor.predict_sequence( |  | ||||||
|             sequence=example_sequence, |  | ||||||
|             ensemble_weight=0.3 |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         print(f"   序列预测完成: {len(results)} 个位置") |  | ||||||
|         print(f"   主要输出字段: {[col for col in results.columns if 'Probability' in col]}") |  | ||||||
|          |  | ||||||
|         print("\n2. 使用新的 predict_regions() 方法:") |  | ||||||
|         # 模拟一些399bp区域序列 |  | ||||||
|         region_sequences = [example_sequence + "A" * (399 - len(example_sequence))] |  | ||||||
|         region_results = predictor.predict_regions( |  | ||||||
|             sequences=region_sequences, |  | ||||||
|             ensemble_weight=0.4 |  | ||||||
|         ) |  | ||||||
|          |  | ||||||
|         print(f"   区域预测完成: {len(region_results)} 个序列") |  | ||||||
|         print(f"   主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}") |  | ||||||
|          |  | ||||||
|         # 显示统计 |  | ||||||
|         print("\n3. 结果统计:") |  | ||||||
|         print(f"   序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}") |  | ||||||
|         print(f"   区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}") |  | ||||||
|          |  | ||||||
|         return results, region_results |  | ||||||
|          |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"新API使用时出错: {str(e)}") |  | ||||||
|         return None, None |  | ||||||
| 
 |  | ||||||
| def main(): |  | ||||||
|     """主函数""" |  | ||||||
|     print("FScanpy 序列预测绘图功能演示") |  | ||||||
|     print("=" * 60) |  | ||||||
|     print("新功能:规范化的集成权重参数 (ensemble_weight)") |  | ||||||
|     print("权重范围:0.0 到 1.0 (对应 Short模型的权重,Long模型权重 = 1 - ensemble_weight)") |  | ||||||
|     print("新命名:Ensemble_Probability 替代 Voting_Probability") |  | ||||||
|     print("=" * 60) |  | ||||||
|      |  | ||||||
|     examples = [ |  | ||||||
|         ("1. 基础绘图示例", example_basic_plotting), |  | ||||||
|         ("2. 自定义集成权重示例", example_custom_ensemble_weights), |  | ||||||
|         ("3. 集成权重对比示例", example_ensemble_comparison), |  | ||||||
|         ("4. 保存图片示例", example_save_plot), |  | ||||||
|         ("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage), |  | ||||||
|         ("6. 新API方法使用示例", example_new_api_usage) |  | ||||||
|     ] |  | ||||||
|      |  | ||||||
|     for name, func in examples: |  | ||||||
|         print(f"\n{name}") |  | ||||||
|         try: |  | ||||||
|             result = func() |  | ||||||
|             if result is not None and result != False: |  | ||||||
|                 print("✓ 示例执行成功") |  | ||||||
|             else: |  | ||||||
|                 print("✗ 示例执行失败") |  | ||||||
|         except Exception as e: |  | ||||||
|             print(f"✗ 示例执行出错: {str(e)}") |  | ||||||
|          |  | ||||||
|         print("-" * 50) |  | ||||||
|      |  | ||||||
|     print("\n演示完成!") |  | ||||||
|     print("\n📊 新功能总结:") |  | ||||||
|     print("1. plot_prf_prediction(): 便捷的绘图函数") |  | ||||||
|     print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图") |  | ||||||
|     print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测(替代predict_full)") |  | ||||||
|     print("4. PRFPredictor.predict_regions(): 区域批量预测(替代predict_region)") |  | ||||||
|     print("5. 支持自定义阈值、标题、保存路径等参数") |  | ||||||
|     print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例") |  | ||||||
|     print("\n⚖️ 集成权重示例:") |  | ||||||
|     print("   - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)") |  | ||||||
|     print("   - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)") |  | ||||||
|     print("   - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)") |  | ||||||
|     print("   - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)") |  | ||||||
|     print("\n📂 输出字段:") |  | ||||||
|     print("   - Short_Probability: Short模型(HistGB)预测概率") |  | ||||||
|     print("   - Long_Probability: Long模型(BiLSTM-CNN)预测概率") |  | ||||||
|     print("   - Ensemble_Probability: 集成预测概率(主要结果)") |  | ||||||
|     print("   - Ensemble_Weights: 权重配置信息") |  | ||||||
|     print("   - Short_Sequence: 33bp序列") |  | ||||||
|     print("   - Long_Sequence: 399bp序列") |  | ||||||
|     print("7. 自动保存PNG和PDF两种格式") |  | ||||||
| 
 |  | ||||||
| if __name__ == "__main__": |  | ||||||
|     main()  |  | ||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
		Reference in New Issue