完善真实样本的预测以及结果解读

This commit is contained in:
Chenlab 2025-06-12 02:52:35 +08:00
parent f320a1696f
commit 6f7510455a
5 changed files with 412 additions and 6 deletions

View File

@ -22,6 +22,7 @@ def get_test_data_path(filename: str) -> str:
>>> blastx_file = get_test_data_path('blastx_example.xlsx')
>>> mrna_file = get_test_data_path('mrna_example.fasta')
>>> region_file = get_test_data_path('region_example.csv')
>>> full_seq_file = get_test_data_path('full_seq.xlsx')
"""
current_dir = Path(__file__).parent
test_data_dir = current_dir / "test_data"
@ -93,7 +94,8 @@ def print_test_data_info():
file_descriptions = {
'blastx_example.xlsx': '🧬 BLASTX比对结果示例 (1000条记录)',
'mrna_example.fasta': '🧬 mRNA序列示例数据',
'region_example.csv': '🎯 PRF区域验证数据 (含标签)'
'region_example.csv': '🎯 PRF区域验证数据 (含标签)',
'full_seq.xlsx': '🧬 完整序列示例数据'
}
for filename in files:
@ -112,7 +114,7 @@ def print_test_data_info():
print(" blastx_file = get_test_data_path('blastx_example.xlsx')")
print(" mrna_file = get_test_data_path('mrna_example.fasta')")
print(" region_file = get_test_data_path('region_example.csv')")
print(" full_seq_file = get_test_data_path('full_seq.xlsx')")
except Exception as e:
print(f"❌ 获取数据信息时出错: {e}")

Binary file not shown.

402
predict_sample.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -15,7 +15,8 @@ dependencies = [
"matplotlib>=3.9.4",
"joblib>=1.4.2",
"biopython>=1.85",
"wrapt>=1.17.0"
"wrapt>=1.17.0",
"openpyxl>=3.1.5"
]
requires-python = ">=3.9"

View File

@ -369,6 +369,7 @@ list_test_data()
blastx_file = get_test_data_path('blastx_example.xlsx')
mrna_file = get_test_data_path('mrna_example.fasta')
region_file = get_test_data_path('region_example.csv')
seq_file = get_test_data_path('full_seq.xlsx')
```
## Complete Workflow Examples
@ -380,15 +381,15 @@ from FScanpy import predict_prf, plot_prf_prediction
import matplotlib.pyplot as plt
# Define sequence
sequence = "ATGCGTACGTATGCGTACGTATGCGTACGTAAGCCCTTTGAACCCAAAGGG"
full_seq = pd.read.excel(seq_file)
# Method 1: Simple prediction
results = predict_prf(sequence=sequence)
results = predict_prf(sequence=full_seq[0]['full_seq'])
print(f"Found {len(results)} potential sites")
# Method 2: Prediction with visualization
results, fig = plot_prf_prediction(
sequence=sequence,
sequence=sequence=full_seq[0]['full_seq'],
window_size=1, # Scan every position
short_threshold=0.3, # Display sites above 0.3
long_threshold=0.4, # Display sites above 0.4