完善真实样本的预测以及结果解读
This commit is contained in:
parent
f320a1696f
commit
6f7510455a
|
|
@ -22,6 +22,7 @@ def get_test_data_path(filename: str) -> str:
|
|||
>>> blastx_file = get_test_data_path('blastx_example.xlsx')
|
||||
>>> mrna_file = get_test_data_path('mrna_example.fasta')
|
||||
>>> region_file = get_test_data_path('region_example.csv')
|
||||
>>> full_seq_file = get_test_data_path('full_seq.xlsx')
|
||||
"""
|
||||
current_dir = Path(__file__).parent
|
||||
test_data_dir = current_dir / "test_data"
|
||||
|
|
@ -93,7 +94,8 @@ def print_test_data_info():
|
|||
file_descriptions = {
|
||||
'blastx_example.xlsx': '🧬 BLASTX比对结果示例 (1000条记录)',
|
||||
'mrna_example.fasta': '🧬 mRNA序列示例数据',
|
||||
'region_example.csv': '🎯 PRF区域验证数据 (含标签)'
|
||||
'region_example.csv': '🎯 PRF区域验证数据 (含标签)',
|
||||
'full_seq.xlsx': '🧬 完整序列示例数据'
|
||||
}
|
||||
|
||||
for filename in files:
|
||||
|
|
@ -112,7 +114,7 @@ def print_test_data_info():
|
|||
print(" blastx_file = get_test_data_path('blastx_example.xlsx')")
|
||||
print(" mrna_file = get_test_data_path('mrna_example.fasta')")
|
||||
print(" region_file = get_test_data_path('region_example.csv')")
|
||||
|
||||
print(" full_seq_file = get_test_data_path('full_seq.xlsx')")
|
||||
except Exception as e:
|
||||
print(f"❌ 获取数据信息时出错: {e}")
|
||||
|
||||
|
|
|
|||
Binary file not shown.
File diff suppressed because one or more lines are too long
|
|
@ -15,7 +15,8 @@ dependencies = [
|
|||
"matplotlib>=3.9.4",
|
||||
"joblib>=1.4.2",
|
||||
"biopython>=1.85",
|
||||
"wrapt>=1.17.0"
|
||||
"wrapt>=1.17.0",
|
||||
"openpyxl>=3.1.5"
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
|
||||
|
|
|
|||
|
|
@ -369,6 +369,7 @@ list_test_data()
|
|||
blastx_file = get_test_data_path('blastx_example.xlsx')
|
||||
mrna_file = get_test_data_path('mrna_example.fasta')
|
||||
region_file = get_test_data_path('region_example.csv')
|
||||
seq_file = get_test_data_path('full_seq.xlsx')
|
||||
```
|
||||
|
||||
## Complete Workflow Examples
|
||||
|
|
@ -380,15 +381,15 @@ from FScanpy import predict_prf, plot_prf_prediction
|
|||
import matplotlib.pyplot as plt
|
||||
|
||||
# Define sequence
|
||||
sequence = "ATGCGTACGTATGCGTACGTATGCGTACGTAAGCCCTTTGAACCCAAAGGG"
|
||||
full_seq = pd.read.excel(seq_file)
|
||||
|
||||
# Method 1: Simple prediction
|
||||
results = predict_prf(sequence=sequence)
|
||||
results = predict_prf(sequence=full_seq[0]['full_seq'])
|
||||
print(f"Found {len(results)} potential sites")
|
||||
|
||||
# Method 2: Prediction with visualization
|
||||
results, fig = plot_prf_prediction(
|
||||
sequence=sequence,
|
||||
sequence=sequence=full_seq[0]['full_seq'],
|
||||
window_size=1, # Scan every position
|
||||
short_threshold=0.3, # Display sites above 0.3
|
||||
long_threshold=0.4, # Display sites above 0.4
|
||||
|
|
|
|||
Loading…
Reference in New Issue