重新运行demo，并进行路径管理

2025-08-14 16:06:49 +08:00 · 2025-08-14 16:06:49 +08:00 · cce177429c
parent 6f7510455a
commit cce177429c
4 changed files with 308 additions and 593 deletions
--- a/FScanpy_Demo.ipynb
+++ b/FScanpy_Demo.ipynb
--- a/README.md
+++ b/README.md
@ -267,9 +267,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## 🆘 Support
 - **Issues**: [GitHub Issues](https://github.com/your-org/FScanpy/issues)
 - **Documentation**: [Tutorial](tutorial/tutorial.md)
- **Examples**: [Demo Notebook](FScanpy_Demo.ipynb)
+- **Usage Example**: [Demo Notebook](FScanpy_Demo.ipynb)
 - **Predict Result Explain**: [Predict Result Explain](tutorial/predict_sample.ipynb)
 ## 🏗️ Dependencies
--- a/example_plot_prediction.py
+++ b/example_plot_prediction.py
@ -1,362 +0,0 @@
 #!/usr/bin/env python3
 """
 FScanpy 序列预测绘图示例
 展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果
 包含集成权重参数的使用示例
 """
 import matplotlib.pyplot as plt
 import os
 from FScanpy import plot_prf_prediction, PRFPredictor
 def example_basic_plotting():
    """基础绘图示例"""
    print("=" * 50)
    print("基础绘图示例")
    print("=" * 50)
    # 示例序列（可以替换为您的实际序列）
    example_sequence = (
        "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
    )
    try:
        # 使用默认参数绘图 (0.4:0.6 集成权重比例)
        results, fig = plot_prf_prediction(
            sequence=example_sequence,
            title="示例序列的移码概率预测 (默认集成权重 4:6)"
        )
        print(f"预测完成！共处理 {len(results)} 个位置")
        print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
        print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6")
        # 显示图片
        plt.show()
        return results, fig
    except Exception as e:
        print(f"绘图过程中出错: {str(e)}")
        return None, None
 def example_custom_ensemble_weights():
    """自定义集成权重示例"""
    print("=" * 50)
    print("自定义集成权重绘图示例")
    print("=" * 50)
    # 示例序列
    example_sequence = (
        "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
    )
    # 测试不同的集成权重比例
    weight_configs = [
        (0.2, "Long模型主导 (2:8)"),
        (0.5, "等权重组合 (5:5)"),
        (0.7, "Short模型主导 (7:3)")
    ]
    for ensemble_weight, description in weight_configs:
        print(f"\n测试集成权重配置: {description}")
        try:
            results, fig = plot_prf_prediction(
                sequence=example_sequence,
                ensemble_weight=ensemble_weight,
                title=f"移码概率预测 - {description}",
                figsize=(14, 7)
            )
            print(f"预测完成！共处理 {len(results)} 个位置")
            print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
            # 显示统计信息
            print("预测统计信息:")
            print(f"  Short模型平均概率: {results['Short_Probability'].mean():.3f}")
            print(f"  Long模型平均概率: {results['Long_Probability'].mean():.3f}")
            print(f"  集成平均概率: {results['Ensemble_Probability'].mean():.3f}")
            print(f"  集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}")
            plt.show()
        except Exception as e:
            print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}")
 def example_ensemble_comparison():
    """集成权重对比示例"""
    print("=" * 50)
    print("集成权重对比绘图示例")
    print("=" * 50)
    # 示例序列
    example_sequence = (
        "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
    )
    try:
        # 创建预测器实例
        predictor = PRFPredictor()
        # 测试三种不同集成权重
        weights = [0.3, 0.4, 0.6]
        weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"]
        # 创建对比图
        fig, axes = plt.subplots(3, 1, figsize=(15, 12))
        fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16)
        all_results = []
        for i, (weight, name) in enumerate(zip(weights, weight_names)):
            # 获取预测结果
            results = predictor.predict_sequence(
                sequence=example_sequence,
                ensemble_weight=weight
            )
            all_results.append(results)
            # 绘制条形图
            ax = axes[i]
            ax.bar(results['Position'], results['Ensemble_Probability'], 
                  alpha=0.7, color=f'C{i}', width=2)
            ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}')
            ax.set_ylabel('概率')
            ax.grid(True, alpha=0.3)
            ax.set_ylim(0, 1)
            if i == len(weights) - 1:
                ax.set_xlabel('序列位置')
        plt.tight_layout()
        plt.show()
        # 打印对比统计
        print("\n集成权重对比统计:")
        for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)):
            print(f"{name}:")
            print(f"  平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
            print(f"  最大集成概率: {results['Ensemble_Probability'].max():.3f}")
            print(f"  非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}")
        return all_results, fig
    except Exception as e:
        print(f"集成权重对比时出错: {str(e)}")
        return None, None
 def example_save_plot():
    """保存图片示例"""
    print("=" * 50)
    print("保存图片示例")
    print("=" * 50)
    # 创建保存目录
    save_dir = "prediction_plots"
    os.makedirs(save_dir, exist_ok=True)
    # 示例序列
    example_sequence = (
        "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
        "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        "CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
    )
    try:
        # 保存不同集成权重配置的图片
        weight_configs = [
            (0.3, "long_dominant"),
            (0.5, "equal_weight"),
            (0.7, "short_dominant")
        ]
        for ensemble_weight, file_suffix in weight_configs:
            save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png")
            results, fig = plot_prf_prediction(
                sequence=example_sequence,
                short_threshold=0.6,
                long_threshold=0.75,
                ensemble_weight=ensemble_weight,
                title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})",
                save_path=save_path,
                dpi=300
            )
            print(f"图片已保存至: {save_path}")
            # 不显示图片，直接关闭
            plt.close(fig)
        print("所有集成权重配置的图片都已保存完成")
        return True
    except Exception as e:
        print(f"保存图片过程中出错: {str(e)}")
        return False
 def example_direct_predictor_usage():
    """直接使用PRFPredictor类的示例"""
    print("=" * 50)
    print("直接使用PRFPredictor类绘图示例")
    print("=" * 50)
    try:
        # 直接创建预测器实例
        predictor = PRFPredictor()
        # 示例序列
        example_sequence = (
            "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
            "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
            "AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
        )
        # 使用类方法绘图，展示自定义集成权重
        results, fig = predictor.plot_sequence_prediction(
            sequence=example_sequence,
            short_threshold=0.65,
            long_threshold=0.8,
            ensemble_weight=0.3,  # 自定义集成权重
            title="使用PRFPredictor类的绘图结果 (集成权重 3:7)"
        )
        print(f"预测完成！共处理 {len(results)} 个位置")
        print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}")
        # 显示详细结果
        print("\n前10个预测结果:")
        columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability']
        print(results[columns_to_show].head(10))
        # 显示集成权重信息
        if 'Ensemble_Weights' in results.columns:
            print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}")
        plt.show()
        return results, fig
    except Exception as e:
        print(f"使用PRFPredictor类时出错: {str(e)}")
        return None, None
 def example_new_api_usage():
    """新API使用示例"""
    print("=" * 50)
    print("新API方法使用示例")
    print("=" * 50)
    try:
        # 直接创建预测器实例
        predictor = PRFPredictor()
        # 示例序列
        example_sequence = (
            "ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
            "GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
        )
        print("1. 使用新的 predict_sequence() 方法:")
        results = predictor.predict_sequence(
            sequence=example_sequence,
            ensemble_weight=0.3
        )
        print(f"   序列预测完成: {len(results)} 个位置")
        print(f"   主要输出字段: {[col for col in results.columns if 'Probability' in col]}")
        print("\n2. 使用新的 predict_regions() 方法:")
        # 模拟一些399bp区域序列
        region_sequences = [example_sequence + "A" * (399 - len(example_sequence))]
        region_results = predictor.predict_regions(
            sequences=region_sequences,
            ensemble_weight=0.4
        )
        print(f"   区域预测完成: {len(region_results)} 个序列")
        print(f"   主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}")
        # 显示统计
        print("\n3. 结果统计:")
        print(f"   序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
        print(f"   区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}")
        return results, region_results
    except Exception as e:
        print(f"新API使用时出错: {str(e)}")
        return None, None
 def main():
    """主函数"""
    print("FScanpy 序列预测绘图功能演示")
    print("=" * 60)
    print("新功能：规范化的集成权重参数 (ensemble_weight)")
    print("权重范围：0.0 到 1.0 (对应 Short模型的权重，Long模型权重 = 1 - ensemble_weight)")
    print("新命名：Ensemble_Probability 替代 Voting_Probability")
    print("=" * 60)
    examples = [
        ("1. 基础绘图示例", example_basic_plotting),
        ("2. 自定义集成权重示例", example_custom_ensemble_weights),
        ("3. 集成权重对比示例", example_ensemble_comparison),
        ("4. 保存图片示例", example_save_plot),
        ("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage),
        ("6. 新API方法使用示例", example_new_api_usage)
    ]
    for name, func in examples:
        print(f"\n{name}")
        try:
            result = func()
            if result is not None and result != False:
                print("✓ 示例执行成功")
            else:
                print("✗ 示例执行失败")
        except Exception as e:
            print(f"✗ 示例执行出错: {str(e)}")
        print("-" * 50)
    print("\n演示完成！")
    print("\n📊 新功能总结:")
    print("1. plot_prf_prediction(): 便捷的绘图函数")
    print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图")
    print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测（替代predict_full）")
    print("4. PRFPredictor.predict_regions(): 区域批量预测（替代predict_region）")
    print("5. 支持自定义阈值、标题、保存路径等参数")
    print("6. 新增 ensemble_weight 参数，可调节两个模型的集成权重比例")
    print("\n⚖️ 集成权重示例:")
    print("   - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)")
    print("   - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)")
    print("   - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)")
    print("   - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)")
    print("\n📂 输出字段:")
    print("   - Short_Probability: Short模型(HistGB)预测概率")
    print("   - Long_Probability: Long模型(BiLSTM-CNN)预测概率")
    print("   - Ensemble_Probability: 集成预测概率（主要结果）")
    print("   - Ensemble_Weights: 权重配置信息")
    print("   - Short_Sequence: 33bp序列")
    print("   - Long_Sequence: 399bp序列")
    print("7. 自动保存PNG和PDF两种格式")
 if __name__ == "__main__":
    main() 
--- a/tutorial/predict_sample.ipynb
+++ b/tutorial/predict_sample.ipynb