重新运行demo,并进行路径管理
This commit is contained in:
parent
6f7510455a
commit
cce177429c
File diff suppressed because one or more lines are too long
|
@ -267,9 +267,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
||||||
|
|
||||||
## 🆘 Support
|
## 🆘 Support
|
||||||
|
|
||||||
- **Issues**: [GitHub Issues](https://github.com/your-org/FScanpy/issues)
|
|
||||||
- **Documentation**: [Tutorial](tutorial/tutorial.md)
|
- **Documentation**: [Tutorial](tutorial/tutorial.md)
|
||||||
- **Examples**: [Demo Notebook](FScanpy_Demo.ipynb)
|
- **Usage Example**: [Demo Notebook](FScanpy_Demo.ipynb)
|
||||||
|
- **Predict Result Explain**: [Predict Result Explain](tutorial/predict_sample.ipynb)
|
||||||
|
|
||||||
## 🏗️ Dependencies
|
## 🏗️ Dependencies
|
||||||
|
|
||||||
|
|
|
@ -1,362 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
FScanpy 序列预测绘图示例
|
|
||||||
|
|
||||||
展示如何使用新的 plot_prf_prediction 函数绘制序列的移码概率预测结果
|
|
||||||
包含集成权重参数的使用示例
|
|
||||||
"""
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import os
|
|
||||||
from FScanpy import plot_prf_prediction, PRFPredictor
|
|
||||||
|
|
||||||
def example_basic_plotting():
|
|
||||||
"""基础绘图示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("基础绘图示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# 示例序列(可以替换为您的实际序列)
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 使用默认参数绘图 (0.4:0.6 集成权重比例)
|
|
||||||
results, fig = plot_prf_prediction(
|
|
||||||
sequence=example_sequence,
|
|
||||||
title="示例序列的移码概率预测 (默认集成权重 4:6)"
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"预测完成!共处理 {len(results)} 个位置")
|
|
||||||
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
|
|
||||||
print(f"使用集成权重比例: Short模型 0.4, Long模型 0.6")
|
|
||||||
|
|
||||||
# 显示图片
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
return results, fig
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"绘图过程中出错: {str(e)}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def example_custom_ensemble_weights():
|
|
||||||
"""自定义集成权重示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("自定义集成权重绘图示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# 示例序列
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 测试不同的集成权重比例
|
|
||||||
weight_configs = [
|
|
||||||
(0.2, "Long模型主导 (2:8)"),
|
|
||||||
(0.5, "等权重组合 (5:5)"),
|
|
||||||
(0.7, "Short模型主导 (7:3)")
|
|
||||||
]
|
|
||||||
|
|
||||||
for ensemble_weight, description in weight_configs:
|
|
||||||
print(f"\n测试集成权重配置: {description}")
|
|
||||||
try:
|
|
||||||
results, fig = plot_prf_prediction(
|
|
||||||
sequence=example_sequence,
|
|
||||||
ensemble_weight=ensemble_weight,
|
|
||||||
title=f"移码概率预测 - {description}",
|
|
||||||
figsize=(14, 7)
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"预测完成!共处理 {len(results)} 个位置")
|
|
||||||
print(f"满足阈值条件的位点数: {len(results[results['Ensemble_Probability'] > 0])}")
|
|
||||||
|
|
||||||
# 显示统计信息
|
|
||||||
print("预测统计信息:")
|
|
||||||
print(f" Short模型平均概率: {results['Short_Probability'].mean():.3f}")
|
|
||||||
print(f" Long模型平均概率: {results['Long_Probability'].mean():.3f}")
|
|
||||||
print(f" 集成平均概率: {results['Ensemble_Probability'].mean():.3f}")
|
|
||||||
print(f" 集成权重比例: Short:{ensemble_weight:.1f}, Long:{1-ensemble_weight:.1f}")
|
|
||||||
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"集成权重 {ensemble_weight} 绘图时出错: {str(e)}")
|
|
||||||
|
|
||||||
def example_ensemble_comparison():
|
|
||||||
"""集成权重对比示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("集成权重对比绘图示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# 示例序列
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 创建预测器实例
|
|
||||||
predictor = PRFPredictor()
|
|
||||||
|
|
||||||
# 测试三种不同集成权重
|
|
||||||
weights = [0.3, 0.4, 0.6]
|
|
||||||
weight_names = ["Long主导 (3:7)", "默认权重 (4:6)", "Short主导 (6:4)"]
|
|
||||||
|
|
||||||
# 创建对比图
|
|
||||||
fig, axes = plt.subplots(3, 1, figsize=(15, 12))
|
|
||||||
fig.suptitle('不同集成权重配置的预测结果对比', fontsize=16)
|
|
||||||
|
|
||||||
all_results = []
|
|
||||||
|
|
||||||
for i, (weight, name) in enumerate(zip(weights, weight_names)):
|
|
||||||
# 获取预测结果
|
|
||||||
results = predictor.predict_sequence(
|
|
||||||
sequence=example_sequence,
|
|
||||||
ensemble_weight=weight
|
|
||||||
)
|
|
||||||
all_results.append(results)
|
|
||||||
|
|
||||||
# 绘制条形图
|
|
||||||
ax = axes[i]
|
|
||||||
ax.bar(results['Position'], results['Ensemble_Probability'],
|
|
||||||
alpha=0.7, color=f'C{i}', width=2)
|
|
||||||
ax.set_title(f'{name} - 平均概率: {results["Ensemble_Probability"].mean():.3f}')
|
|
||||||
ax.set_ylabel('概率')
|
|
||||||
ax.grid(True, alpha=0.3)
|
|
||||||
ax.set_ylim(0, 1)
|
|
||||||
|
|
||||||
if i == len(weights) - 1:
|
|
||||||
ax.set_xlabel('序列位置')
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# 打印对比统计
|
|
||||||
print("\n集成权重对比统计:")
|
|
||||||
for i, (weight, name, results) in enumerate(zip(weights, weight_names, all_results)):
|
|
||||||
print(f"{name}:")
|
|
||||||
print(f" 平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
|
|
||||||
print(f" 最大集成概率: {results['Ensemble_Probability'].max():.3f}")
|
|
||||||
print(f" 非零预测数量: {(results['Ensemble_Probability'] > 0).sum()}")
|
|
||||||
|
|
||||||
return all_results, fig
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"集成权重对比时出错: {str(e)}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def example_save_plot():
|
|
||||||
"""保存图片示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("保存图片示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# 创建保存目录
|
|
||||||
save_dir = "prediction_plots"
|
|
||||||
os.makedirs(save_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# 示例序列
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
"CTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 保存不同集成权重配置的图片
|
|
||||||
weight_configs = [
|
|
||||||
(0.3, "long_dominant"),
|
|
||||||
(0.5, "equal_weight"),
|
|
||||||
(0.7, "short_dominant")
|
|
||||||
]
|
|
||||||
|
|
||||||
for ensemble_weight, file_suffix in weight_configs:
|
|
||||||
save_path = os.path.join(save_dir, f"prediction_{file_suffix}.png")
|
|
||||||
results, fig = plot_prf_prediction(
|
|
||||||
sequence=example_sequence,
|
|
||||||
short_threshold=0.6,
|
|
||||||
long_threshold=0.75,
|
|
||||||
ensemble_weight=ensemble_weight,
|
|
||||||
title=f"移码概率预测 (集成权重 {ensemble_weight:.1f}:{1-ensemble_weight:.1f})",
|
|
||||||
save_path=save_path,
|
|
||||||
dpi=300
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"图片已保存至: {save_path}")
|
|
||||||
|
|
||||||
# 不显示图片,直接关闭
|
|
||||||
plt.close(fig)
|
|
||||||
|
|
||||||
print("所有集成权重配置的图片都已保存完成")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"保存图片过程中出错: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def example_direct_predictor_usage():
|
|
||||||
"""直接使用PRFPredictor类的示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("直接使用PRFPredictor类绘图示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 直接创建预测器实例
|
|
||||||
predictor = PRFPredictor()
|
|
||||||
|
|
||||||
# 示例序列
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
"AGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCTAGCTAGCTAGCTAG"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 使用类方法绘图,展示自定义集成权重
|
|
||||||
results, fig = predictor.plot_sequence_prediction(
|
|
||||||
sequence=example_sequence,
|
|
||||||
short_threshold=0.65,
|
|
||||||
long_threshold=0.8,
|
|
||||||
ensemble_weight=0.3, # 自定义集成权重
|
|
||||||
title="使用PRFPredictor类的绘图结果 (集成权重 3:7)"
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"预测完成!共处理 {len(results)} 个位置")
|
|
||||||
print(f"使用集成权重比例: Short:{0.3:.1f}, Long:{0.7:.1f}")
|
|
||||||
|
|
||||||
# 显示详细结果
|
|
||||||
print("\n前10个预测结果:")
|
|
||||||
columns_to_show = ['Position', 'Short_Probability', 'Long_Probability', 'Ensemble_Probability']
|
|
||||||
print(results[columns_to_show].head(10))
|
|
||||||
|
|
||||||
# 显示集成权重信息
|
|
||||||
if 'Ensemble_Weights' in results.columns:
|
|
||||||
print(f"\n集成权重配置: {results['Ensemble_Weights'].iloc[0]}")
|
|
||||||
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
return results, fig
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"使用PRFPredictor类时出错: {str(e)}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def example_new_api_usage():
|
|
||||||
"""新API使用示例"""
|
|
||||||
print("=" * 50)
|
|
||||||
print("新API方法使用示例")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 直接创建预测器实例
|
|
||||||
predictor = PRFPredictor()
|
|
||||||
|
|
||||||
# 示例序列
|
|
||||||
example_sequence = (
|
|
||||||
"ATGCGTACGTTAGCGATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC"
|
|
||||||
"GATCGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCGATCGATCGTAGCT"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("1. 使用新的 predict_sequence() 方法:")
|
|
||||||
results = predictor.predict_sequence(
|
|
||||||
sequence=example_sequence,
|
|
||||||
ensemble_weight=0.3
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f" 序列预测完成: {len(results)} 个位置")
|
|
||||||
print(f" 主要输出字段: {[col for col in results.columns if 'Probability' in col]}")
|
|
||||||
|
|
||||||
print("\n2. 使用新的 predict_regions() 方法:")
|
|
||||||
# 模拟一些399bp区域序列
|
|
||||||
region_sequences = [example_sequence + "A" * (399 - len(example_sequence))]
|
|
||||||
region_results = predictor.predict_regions(
|
|
||||||
sequences=region_sequences,
|
|
||||||
ensemble_weight=0.4
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f" 区域预测完成: {len(region_results)} 个序列")
|
|
||||||
print(f" 主要输出字段: {[col for col in region_results.columns if 'Probability' in col or 'Sequence' in col]}")
|
|
||||||
|
|
||||||
# 显示统计
|
|
||||||
print("\n3. 结果统计:")
|
|
||||||
print(f" 序列预测平均集成概率: {results['Ensemble_Probability'].mean():.3f}")
|
|
||||||
print(f" 区域预测平均集成概率: {region_results['Ensemble_Probability'].mean():.3f}")
|
|
||||||
|
|
||||||
return results, region_results
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"新API使用时出错: {str(e)}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""主函数"""
|
|
||||||
print("FScanpy 序列预测绘图功能演示")
|
|
||||||
print("=" * 60)
|
|
||||||
print("新功能:规范化的集成权重参数 (ensemble_weight)")
|
|
||||||
print("权重范围:0.0 到 1.0 (对应 Short模型的权重,Long模型权重 = 1 - ensemble_weight)")
|
|
||||||
print("新命名:Ensemble_Probability 替代 Voting_Probability")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
examples = [
|
|
||||||
("1. 基础绘图示例", example_basic_plotting),
|
|
||||||
("2. 自定义集成权重示例", example_custom_ensemble_weights),
|
|
||||||
("3. 集成权重对比示例", example_ensemble_comparison),
|
|
||||||
("4. 保存图片示例", example_save_plot),
|
|
||||||
("5. 直接使用PRFPredictor类示例", example_direct_predictor_usage),
|
|
||||||
("6. 新API方法使用示例", example_new_api_usage)
|
|
||||||
]
|
|
||||||
|
|
||||||
for name, func in examples:
|
|
||||||
print(f"\n{name}")
|
|
||||||
try:
|
|
||||||
result = func()
|
|
||||||
if result is not None and result != False:
|
|
||||||
print("✓ 示例执行成功")
|
|
||||||
else:
|
|
||||||
print("✗ 示例执行失败")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ 示例执行出错: {str(e)}")
|
|
||||||
|
|
||||||
print("-" * 50)
|
|
||||||
|
|
||||||
print("\n演示完成!")
|
|
||||||
print("\n📊 新功能总结:")
|
|
||||||
print("1. plot_prf_prediction(): 便捷的绘图函数")
|
|
||||||
print("2. PRFPredictor.plot_sequence_prediction(): 类方法绘图")
|
|
||||||
print("3. PRFPredictor.predict_sequence(): 序列滑动窗口预测(替代predict_full)")
|
|
||||||
print("4. PRFPredictor.predict_regions(): 区域批量预测(替代predict_region)")
|
|
||||||
print("5. 支持自定义阈值、标题、保存路径等参数")
|
|
||||||
print("6. 新增 ensemble_weight 参数,可调节两个模型的集成权重比例")
|
|
||||||
print("\n⚖️ 集成权重示例:")
|
|
||||||
print(" - ensemble_weight=0.2: Short模型20%, Long模型80% (Long主导)")
|
|
||||||
print(" - ensemble_weight=0.4: Short模型40%, Long模型60% (默认平衡)")
|
|
||||||
print(" - ensemble_weight=0.5: Short模型50%, Long模型50% (等权重)")
|
|
||||||
print(" - ensemble_weight=0.7: Short模型70%, Long模型30% (Short主导)")
|
|
||||||
print("\n📂 输出字段:")
|
|
||||||
print(" - Short_Probability: Short模型(HistGB)预测概率")
|
|
||||||
print(" - Long_Probability: Long模型(BiLSTM-CNN)预测概率")
|
|
||||||
print(" - Ensemble_Probability: 集成预测概率(主要结果)")
|
|
||||||
print(" - Ensemble_Weights: 权重配置信息")
|
|
||||||
print(" - Short_Sequence: 33bp序列")
|
|
||||||
print(" - Long_Sequence: 399bp序列")
|
|
||||||
print("7. 自动保存PNG和PDF两种格式")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue