完善真实样本的预测以及结果解读

2025-06-12 02:52:35 +08:00 · 2025-06-12 02:52:35 +08:00 · 6f7510455a
parent f320a1696f
commit 6f7510455a
5 changed files with 412 additions and 6 deletions
--- a/FScanpy/data/init.py
+++ b/FScanpy/data/init.py
@ -22,6 +22,7 @@ def get_test_data_path(filename: str) -> str:
        >>> blastx_file = get_test_data_path('blastx_example.xlsx')
        >>> mrna_file = get_test_data_path('mrna_example.fasta')
        >>> region_file = get_test_data_path('region_example.csv')
+        >>> full_seq_file = get_test_data_path('full_seq.xlsx')
    """
    current_dir = Path(__file__).parent
    test_data_dir = current_dir / "test_data"
@ -93,7 +94,8 @@ def print_test_data_info():
        file_descriptions = {
            'blastx_example.xlsx': '🧬 BLASTX比对结果示例 (1000条记录)',
            'mrna_example.fasta': '🧬 mRNA序列示例数据',
-            'region_example.csv': '🎯 PRF区域验证数据 (含标签)'
+            'region_example.csv': '🎯 PRF区域验证数据 (含标签)',
+            'full_seq.xlsx': '🧬 完整序列示例数据'
        }
        
        for filename in files:
@ -112,7 +114,7 @@ def print_test_data_info():
        print("  blastx_file = get_test_data_path('blastx_example.xlsx')")
        print("  mrna_file = get_test_data_path('mrna_example.fasta')")
        print("  region_file = get_test_data_path('region_example.csv')")
-        
+        print("  full_seq_file = get_test_data_path('full_seq.xlsx')")
    except Exception as e:
        print(f"❌ 获取数据信息时出错: {e}")

--- a/FScanpy/data/test_data/full_seq.xlsx
+++ b/FScanpy/data/test_data/full_seq.xlsx
--- a/predict_sample.ipynb
+++ b/predict_sample.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@ -15,7 +15,8 @@ dependencies = [
    "matplotlib>=3.9.4",
    "joblib>=1.4.2",
    "biopython>=1.85",
-    "wrapt>=1.17.0"
+    "wrapt>=1.17.0",
+    "openpyxl>=3.1.5"
 ]
 requires-python = ">=3.9"

--- a/tutorial/tutorial.md
+++ b/tutorial/tutorial.md
@ -369,6 +369,7 @@ list_test_data()
 blastx_file = get_test_data_path('blastx_example.xlsx')
 mrna_file = get_test_data_path('mrna_example.fasta')
 region_file = get_test_data_path('region_example.csv')
+seq_file = get_test_data_path('full_seq.xlsx')
 ```

 ## Complete Workflow Examples
@ -380,15 +381,15 @@ from FScanpy import predict_prf, plot_prf_prediction
 import matplotlib.pyplot as plt

 # Define sequence
-sequence = "ATGCGTACGTATGCGTACGTATGCGTACGTAAGCCCTTTGAACCCAAAGGG"
+full_seq = pd.read.excel(seq_file)

 # Method 1: Simple prediction
-results = predict_prf(sequence=sequence)
+results = predict_prf(sequence=full_seq[0]['full_seq'])
 print(f"Found {len(results)} potential sites")

 # Method 2: Prediction with visualization
 results, fig = plot_prf_prediction(
-    sequence=sequence,
+    sequence=sequence=full_seq[0]['full_seq'],
    window_size=1,              # Scan every position
    short_threshold=0.3,        # Display sites above 0.3
    long_threshold=0.4,         # Display sites above 0.4