FScanpy-package/predict_sample.ipynb

403 lines
268 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Import FScanpy related modules\n",
"from FScanpy import PRFPredictor, predict_prf, plot_prf_prediction\n",
"from FScanpy.data import get_test_data_path, list_test_data\n",
"from FScanpy.utils import fscanr, extract_prf_regions"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" GB_Probability CNN_Probability Voting_Probability Position Codon \\\n",
"0 0.883519 0.950972 0.923991 114 GCC \n",
"1 0.910635 0.988631 0.957433 1794 CCC \n",
"2 0.890379 0.979877 0.944078 1821 AAC \n",
"3 0.953772 0.962459 0.958984 1881 AAC \n",
"4 0.941618 0.946840 0.944751 1194 CTT \n",
"\n",
" 33bp \\\n",
"0 TCTGGAAGAAGTAAACGCCGAGCTGGAACAGCC \n",
"1 GGGGCAGTCCCCTAGCCCCGCTCAAAAGGGGGA \n",
"2 ACCACCCCATCAGGGAAACCGGGTGGAGGGGCC \n",
"3 CACCGGGCCAGGAAATAACCCAGTATTCCCAGT \n",
"4 ACTAATAGAGGGGGGACTTAGCGCCCCCCAAAC \n",
"\n",
" 399bp Sequence_ID \\\n",
"0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN... 0 \n",
"1 GACAGGACACATCAGAAAAGACTGTAAGGATGAAAAGGGCTCAAAA... 1 \n",
"2 AAATAAAGAGAAAGGAGGGTGTTGCTTTAAATGCGGTAAAAAAGGA... 2 \n",
"3 CCTGTACCTCCCTGAGGCAAAAAGGCCGCCTGTAATCTTGCCAATA... 3 \n",
"4 GCCCGGGCCTCGGCAACCGGCCCCCAAAAGGCCCCCCCCGGGACCA... 4 \n",
"\n",
" Full_Sequence \n",
"0 ATGTTTGAAATTAACCCGGTGAATAACCGCATTCAGGACCTCACGG... \n",
"1 ATGGGGGTCTCGGGATCAAAAGGGCAGAAACTCTTTGTTTCTGTTC... \n",
"2 ATGGGGCAAGAATTAAGCCAGCATGAACGTTATGTAGAACAATTGA... \n",
"3 ATGGGCCAAATCTTTTCCCGTAGCGCTAGCCCTATTCCGCGGCCGC... \n",
"4 ATGGGAAATTCCCCCTCCTATAACCCCCCCGCTGGTATCTCCCCCT... \n"
]
}
],
"source": [
"data = pd.read_excel(get_test_data_path('full_seq.xlsx'))\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"Exception in thread Thread-9 (_readerthread):\n",
"Traceback (most recent call last):\n",
" File \"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\threading.py\", line 1016, in _bootstrap_inner\n",
" self.run()\n",
" File \"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\ipykernel\\ipkernel.py\", line 766, in run_closure\n",
" _threading_Thread_run(self)\n",
" File \"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\threading.py\", line 953, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\subprocess.py\", line 1515, in _readerthread\n",
" buffer.append(fh.read())\n",
" File \"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\codecs.py\", line 322, in decode\n",
" (result, consumed) = self._buffer_decode(data, self.errors, final)\n",
"UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 4: invalid continuation byte\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABScAAAL0CAYAAAD+wYLfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAeThJREFUeJzt3QecHGX9P/AnhSS00FsgEhDpEDrSexUERYmINCEEAWmCEERCEVFQfhGlK0V/dERQaUKkSFF6kSY9AST0JLSEJPt/fZ/ffy97l7vL5W7v5vb2/X699nVbZmfn5pmZnfnsU3qVSqVSAgAAAADoYr27+gMBAAAAAIJwEgAAAAAohHASAAAAACiEcBIAAAAAKIRwEgAAAAAohHASAAAAACiEcBIAAAAAKIRwEgAAAAAohHASAAAAACiEcBIAeqi77ror9erVK3344Yf58aWXXprmn3/+Ds2zGvOoFSeddFJaY401OmW+iy22WC6bG264oerzp3P8+Mc/TgceeGDq7vbdd9+06667Fr0YdMBxxx2Xvv/97xe9GADQZYSTANSluICPcChu/fr1S8stt1w65ZRT0tSpUxsFe+XbIossknbcccf01FNPtTifytuLL77Y7Oc2nW+EVLvttlt6+eWXO/1/HjZsWPrPf/7T5umHDBmSRo8e3aF5tNfmm2/esI4GDBiQll9++XT66aenUqmUajn0efbZZ9PJJ5+cLrjggvTf//437bDDDqmannjiifTVr341Lbroonm9RRlGmb399ttV/Zxa9uSTT6ZNNtkkr5/BgwenM844Y5bveeutt9KvfvWr9KMf/ajFfX+hhRZK22+/fZ5/Z4nt/8ILL0zrr79+mmeeefIPBeuss07eTz/55JNUq4F9Wz/n1Vdfzev68ccfr9pndcY8O+roo49Ol112WZd8LwBAdyCcBKBuRZAQAdELL7yQfvCDH+QL4jPPPLPRNM8//3ye5rbbbkuTJ09OX/nKV9KUKVOanU/lbZlllmn1s2O+b775Zrr22mvT008/nXbeeec0bdq0maaLMKIcmHbUnHPOmUOroufRVsOHD8/rMtbVyJEj04knnpjOP//8VMteeuml/HeXXXZJiy++eOrfv3+75vP555/P9Nw777yTttpqq7Tgggvm7TWC0EsuuSQNGjQoffzxxx1e9p5g4sSJadttt01LL710euSRR/L+Hvt9BH6t+e1vf5s23HDD/L6W9v0xY8akvn37pp122qlDy9j0+FJpr732SkcccUTefu68884cqEWNzhtvvDH97W9/S52lo8ehCHJjPdM2Cy+8cNpuu+3SeeedV/SiAECXEE4CULciGIqAKAKH733ve2nrrbdOf/7znxtNE0FcTLPWWmvlUGDcuHHpueeea3Y+lbc+ffq0+tkx3yWWWCJtuummOXR75plncm3Lcs3KW265Ja299tp53vfee2+aPn16rjkYoWcEhEOHDk3XXXddo3nefPPNuYZhvL7FFlvkGkGzapL9l7/8Ja277rq5FllcEH/ta19rqLn42muvpSOPPLKhZlhL84gL6C9+8Yu5BuoKK6yQ/vCHPzR6Pd4b4U7Me6655kpf+tKXZlrPzYlpy+Wz3377pdVXXz3dfvvtDa9HWBw1jJZccsk099xz59pksf7KYvkj9F1ggQXy66usskpeRy39H9HEuvx/NhXBStRkihCovD7isyJIOvTQQ3NZxjqMZY1yamkesTyhd+/eDZ8VZRu1dpdaaqlc3lFj7NZbb52pZtfVV1+dNttss/w5l19++Uzzv++++9KECRPyul5zzTXzthLbwf/8z/80Csv//e9/5xqbUfMuau5G4PXuu+82vB5B5t57751fj//rl7/8Zd4eYvsva65JeqzPWK9lsa/svvvu+fkITCNQq9wmyzVRf/GLX+TPiZqHhxxySKPgNcr42GOPzTUcY91EDeff/e53bf5fmor1FmV28cUX5+3hW9/6VjrssMPSWWedlVpz1VVXNZRdS/t+lFs0x43/O4Lislj+2C9je1522WVzmFj5P5ZrCUa5RTlF+Tbnmmuuyct/5ZVXpuOPPz7vt1EzNtbr3//+91zWlVpbr7GPRo3LeeedNy/7t7/97Ua1a5s7Dv3v//5vrvUbtXPL+0BleRdlVttA7Esbb7xx3g5jXUR4XP6RIJT3jdhn4n+Kbb1y+/zpT3+a5xvvL9euP+aYY/I2Hfts/ABQqa3lHbWnY7uO6WI/iX23Umxvsd0BQD0QTgLA/xehXku1luLCsXyhGCFctT83VH52hBw/+9nPcu23COUi8Pr973+faw5GTcsIDb/zne+ku+++O08fgcjXv/71fEEbtakOOOCAPI/W3HTTTTkwjObqjz32WK75td566+XXrr/++nzhHRfj5ZphzfnTn/6UDj/88FzzNEKCESNG5CAxanVVilAjLsCjyWt83p577pnef//9Ntfa+sc//pFD4cp1H6HgAw88kMsl5vvNb34z12SLmrAhApkIt+65557cHP/nP/95DjDaI0LQWP7KmnJRk+7ss8/OQWsER1HDM8KjCIxamkc5yKhcp9FcOALACJPi/4gaU9E0u/x/lEV5xrqObSKmaSpCpghOokxaav4e/Y9uueWWOYh5+OGHc3Azfvz4/L+VRfAS21W5Nl4EVY8++uhsra8IY2IZI/yKsovgNNZ9rL/K7Ty2kwiK4m+EvxF2VQZeEZJGGBfrOf7vCHTKZdiW/6Wp2F7iB4HK7SiWM8rugw8+aPY9sZ3GjwcR5rXmo48+ygFeBKgRgpXFOoj/KeYRZX3RRRflwLhS/DDxxz/+Me93LTUvjm0rwv8II5uKUG2++eZr83qN8jn11FNz0Bghc4TGEcY1VXkc2mabbfJ+HqFuefuNLgOK1JZtIML2o446Kr8ex7j4YSCOe/GjQHjwwQfz3zvuuCP/T1EGZRH6Rg33OIZEgD1q1KgcbsYPHv/617/SQQcdlI95r7/++myXdxwz4sehWOY4/h588MGNpoljccy36Y9MANAjlQCgDu2zzz6lXXbZJd+fPn166fbbby/179+/dPTRR+fn7rzzzkh3SnPPPXe+xf24ffWrX51pPn369GmYLm7f+MY3Wvzc8nw/+OCD/PjNN98sbbjhhqUll1yyNHny5IbXb7jhhob3fPbZZ6W55pqrdP/99zea1/7771/aY4898v2RI0eWVl555UavH3vssY0+65JLLinNN998Da9vsMEGpT333LPFZV166aVL//M//9PouabziGUfPnx4o2m++c1vlnbccceGx7EMJ5xwQsPjjz76KD93yy23tPjZm222WWmOOebI6zP+xvQDBgwo3Xffffn11157La/3N954o9H7ttpqq7wuwmqrrVY66aSTmp1/0/8j/OlPf8qfUzZq1KjS0KFDm91myr7//e+Xttxyy7wNtUXTzwiDBg0qnXbaaY2eW3fddUsHH3xwvv/KK6/k94wePXqW8z/++ONLffv2LS244IKl7bffvnTGGWeU3nrrrYbXTz311NK2227b6D3jxo3L83/++edLkyZNKvXr1690zTXXNLz+3nvvleacc87S4Ycf3vBcTB//S6VYn7Fewx/+8IfSCius0Gi9xPYd87ntttsa1mdsY1OnTm207QwbNizfj+WJz4l9szmz+l+as80225QOPPDARs89/fTT+T3PPPNMs+957LHH8utjx45tdd+PaZZYYonSI488UmrNmWeeWVp77bUbbWexjb/99tutvm+llVaa6fjTnFmt1+Y89NBDefmj/ENzx6Hm9om2imWK97ZVTNu7d+9Gx9W4xXEwlivKpL3bwDvvvJNff+qppxrtX+V5Nl2P06ZNa3gutulNNtmk4XGs41iuK6+8crbKO7ab119/veG5OBbG//vf//634bkJEybk5brrrrvatM4AoJb1LTocBYCi/PWvf821sKIWUdSiiaaNTftFi1pf0ezun//8Z27e11yfh9GcsrJvsGhCPCtRKzEynhjEIppoR62pytpclbW0opZNTBc1lypFDbSoMRSiZlM0a660wQYbtLoMUUMr+nXsiPjcpiMYb7T
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results0, fig0 = plot_prf_prediction(\n",
" sequence=str(data.iloc[0]['Full_Sequence']),\n",
" window_size=3,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.6,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[0]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 结果解读Sequence0\n",
"### 真实情况\n",
"该序列核糖体程序性移码发生于第113nt处\n",
"### 图上信息\n",
"在该处我们可以看到一个显著的最高峰,并且明显较粗。"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABS4AAAL0CAYAAAAC1KkVAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAkYtJREFUeJzs3QeYHVXZAOCz6QkQegsEAqhApIMgIEW6KIqiICpNRBARBUVAkYDIj91YULAAoiCIBVEQBARRAelFKUpN6KEmkJBkN/d/vrM7m7ubzWZ3s2Xu3vd9npvsnXvvzJk5M2dmvjmloVKpVBIAAAAAQIkMGegEAAAAAAC0J3AJAAAAAJSOwCUAAAAAUDoClwAAAABA6QhcAgAAAAClI3AJAAAAAJSOwCUAAAAAUDoClwAAAABA6QhcAgAAAAClI3AJAIPQ9ddfnxoaGtLLL7+c35933nlpmWWWWax59sY8asUpp5ySNtlkkz6Z78orr5zz5tJLL+31+dM3fvazn6Xddtst1et+S/8566yz0l577TXQyQCA0hC4BKDuHHzwwTlwFK8RI0akN7zhDenLX/5yamxsbBP0K14rrrhi2nPPPdO999670PlUvx566KEOl9t+vhHA2meffdIjjzzS5+u83377pf/+979d/v6ECRPS5MmTF2sePbXjjju2bqNRo0alN73pTemMM85IlUolDZTI67333nux5nH//fenU089NZ199tnp6aefTu94xztSb7r77rvTu9/97rTSSivl7RZ5GHn23HPP9epyatXrr7+e83HDDTdMw4YN63J+xu++9KUvpUmTJrUJEFYfy0svvXTabrvt0t/+9rc+XIOUfvvb3+bjI5a35JJLpo022iiXXS+++GLqa/314KKz5fRFwL9sDxE++tGPpjvuuCP9/e9/H+ikAEApCFwCUJf22GOPHDz63//+lz772c/mQMQ3vvGNNt958MEH83euuuqqNHv27PTOd74zzZkzp8P5VL/WWmutTpcd833qqafSJZdckv7zn//k2jVNTU0LfC8CdUUwdXGNHj06B7QGeh5dddhhh+VtGdvqxBNPTCeffHKuiVTLHn744fz/e97znrTKKqukkSNH9mg+c+fOXWDatGnT0s4775yWW265vL9GkPTcc89N48aNS6+99tpip30wiGMs9uGjjz467bLLLl3+3W9+85s0duzYtO2227aZ/uY3v7n1mL/pppvSG9/4xvSud70rvfLKK4uVxnnz5nX42Re/+MUciH7LW96S/vznP6d///vf6Vvf+lYOWP/iF79Ifamjfa6romyNgDFdEw/TPvShD6Xvfe97A50UACgFgUsA6lIEjSJ4tOaaa6ZPfOITOZBx2WWXtflOBOniO5tttln6zGc+k6ZOnZoeeOCBDudT/Ro6dGiny475rrrqqmn77bfPAbn77rsv19IsamRGUGLzzTfP8/7HP/6RAxlR4zACohF42XjjjXMwpdoVV1yRaybG529/+9vTY489tshaTH/84x9zECRq562wwgrpve99b54eNboef/zxdMwxx7TWKFvYPH70ox+lddZZJ99sr7vuugsEUOK3P/3pT/O8x4wZk4M77bdzR+K7Rf4ccsghuWbZ1Vdf3fp5BJI/97nPpdVWWy0tscQSaauttsrbrxDpj4Dwsssumz+PIFNso4WtR9S4Ktazo8DLz3/+8/SHP/yhdXvEsiKIfdRRR+W8jG0YaY18Wtg8iuafQ4YMaV1W5G3UmFt99dVzfkcz3yuvvLL1d5GP8d2LL7447bDDDnk5F1xwwQLz/+c//5kDZrGtN91007yvxH7wne98p00gPYJdUdMzautFjd8DDjggPf/8862fR5DzwAMPzJ/HekVgLPaH2P87q6EW2zO2ayGOlX333TdPj2BqBGur98miBus3v/nNvJzll18+ffKTn2wTIIs8Pv7449P48ePztoma0dFku6vr0l7sB7G/RlA89q2uuuiiizpsuhu1NotjfuLEiTkfX3311Ta1kr/97W/nGp6x7FiPI488Mn+nUOyLcUzEPGI9p0yZssCybrnllvR///d/OT/iAcs222yTa9TuuuuuuRbmQQcd1Ob7cRzG51Ez84Mf/GCaMWNG62exf73tbW/Ly43tHsHWIqje2T4Xx2HsY8UxEPv0QFvUfnbrrbfmbRTlW2yLWJ+ozViIbRSifIp1Kt4XTe7POeectMYaa+R9LPIuAstf//rXc55HOX766ae3SU9X8zuOnygLY9vuvvvueT2qxf4W+8SsWbP6bNsBQK0QuASAltqE7WtTFuJmPYIXIQJ0vb3cUL3sE044IX31q1/NteYiYBfBsPPPPz/XOIwamhFQ/MhHPtLaLDVuet/3vvflm9277rorfexjH8vz6Mzll1+eb9ajCfydd96Zrr322rTlllvmz373u9/lQFoEYooaZR35/e9/nz796U/nGqsRRDr88MNzcOO6665r871oHh3BhXvuuScv78Mf/nCXm7ZGrdNoMhkB4+ptHwHDqOUW+RLz/cAHPpBrv0YN2hBBsAh83XDDDbmJ/9e+9rUcfOiJCJBG+qtr10bgKGpERXDh17/+da4ZGsGdIvDR0TyiBmSo3qbf/e53czAqAnixHhHEiObexXoUIj9jW8c+Ed9pLwIpUTs38mRhTeqjv9OddtopBzZvu+22HMB69tln87oVjjvuuLxfRZD2L3/5Sw7QVgd6uiKCj5HGpZZaKuddBFVj28f2q97PYz+JgFn8H4HhCOpUBz8jgPqrX/0qb+dY72hiX+RhV9alt8TDgy222KLT78S+FvkbQakI4BciSB3pj+M21vGvf/1r+vznP9/mtzNnzsz7ZwSd43sd1WqOfasInnWkOhAf2zQCY3/605/yK/IzypPq4PSxxx6bt1sc95HGKAva1/Ss3uciCB5dR0TN02L/jX16IHVlP4uAbQR1Iw9vvvnmHCyMMqgI5EZgM0TexToV74vtGA+RYt+K/TCC5lHr/oknnsjbNPLspJNOSv/617+6nd8R8IwyPdIc+3IEl6vF/hbHc/W8AaBuVQCgzhx00EGV97znPfnvefPmVa6++urKyJEjK5/73OfytOuuuy4iP5Ulllgiv+LveL373e9eYD5Dhw5t/V683v/+9y90ucV8X3rppfz+qaeeqmyzzTaV1VZbrTJ79uzWzy+99NLW37z++uuVMWPGVG688cY28zr00EMr+++/f/77xBNPrEycOLHN58cff3ybZZ177rmVpZdeuvXzrbfeuvLhD394oWldc801K9/5znfaTGs/j0j7YYcd1uY7H/jAByp77rln6/tIw0knndT6/tVXX83T/vznPy902TvssENl+PDheXvG//H9UaNGVf75z3/mzx9//PG83Z988sk2v9t5553ztggbbrhh5ZRTTulw/u3XI/z+97/PyylMmjSpsvHGG3e4zxQ+9alPVXbaaae8D3VF+2WEcePGVU4//fQ2097ylrdUjjzyyPz3o48+mn8zefLkRc7/C1/4QmXYsGGV5ZZbrrLHHntUvv71r1eeeeaZ1s9PO+20ym677dbmN1OnTs3zf/DBByszZsyojBgxovLrX/+69fMXXnihMnr06MqnP/3p1mnx/ViXarE9Y7uGX/ziF5V11123zXaJ/Tvmc9VVV7Vuz9jHGhsb2+w7++23X/470hPLiWOzI4tal0XpKD87EsdPzPOGG25oMz32jyFDhrQe9w0NDZWxY8d2ul+HSy65pLL88su3vo9tFvO/6667Ov3dO97xjspGG220yPRGuqK8mD59euu04447rrLVVlst9DfTpk3Labj33ns73ec6Om66ItIU27urim1SXa5Wl8XFvteV/ay9pqamylJLLVX54x//2On+3NF23H333SsTJkzI8yjE8s8444xu5/fNN9/cOu3+++/P0/71r3+1+e2yyy5bOe+88xa5vQBgsBs20IFTABgIURMpaudErZ2oaRR9irVv+hi1eKLJctTUiWaaHfWxGDWRovlpIZoILkrUZoz75ah5E82+o6lndW3C6tpd0YQ8vhfNHatFjaKobRaiRlQ0la6
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results1, fig1 = plot_prf_prediction(\n",
" sequence=str(data.iloc[1]['Full_Sequence']),\n",
" window_size=3,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.6,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[1]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 结果解读Sequence1\n",
"\n",
"### 真实情况\n",
"该序列核糖体程序性移码发生于第1794nt处。\n",
"### 图上信息\n",
"在该处我们可以看到一个显著的高峰但是肉眼难以分辨改高峰与其他位置的高峰的差异因此需要提高分辨率。将window size参数调整更小查看每个高峰周围碱基的移码概率基于高概率位点的集中程度判断其移码可能性。\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABS4AAAL0CAYAAAAC1KkVAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAndxJREFUeJzs3QeYJEX9MODay3fAkTMnh6ggkkEQEEEEQRTFiBFERBAxYUD8I4joh1kMKBgAE4IYMIAgIogKSEZQQMlHTsIRjovzPb/a7b3ZuZnd2b0NPTPv+zx7N9Mz013d1VVd/euq7q5KpVJJAAAAAAAlMm6sEwAAAAAAUEvgEgAAAAAoHYFLAAAAAKB0BC4BAAAAgNIRuAQAAAAASkfgEgAAAAAoHYFLAAAAAKB0BC4BAAAAgNIRuAQAAAAASkfgEgDa0EUXXZS6urrSY489lt+feuqpaYUVVliqeQ7HPFrFpz/96bT55puPyHxXX331nDdnnXXWsM+fkfGDH/wgvfzlL0+dut8yek488cS01157jXUyAKA0BC4B6DjvfOc7c+Ao/iZNmpSe85znpM985jNpwYIFfYJ+xd+qq66a9txzz3T99dc3nE/13y233FJ3ubXzjQDW61//+nTbbbeN+Drvs88+6T//+U/T3585c2Y6/vjjl2oeQ7Xzzjv3bqMpU6ak5z3veem4445LlUoljZXI67333nup5nHjjTemY445Jp100knpvvvuS694xSvScLruuuvSq1/96rTaaqvl7RZ5GHn24IMPDutyWtUzzzyT83GTTTZJEyZMaDo/43ef+tSn0tFHH90nQFhdlpdffvm04447pr/85S8juAYp/fKXv8zlI5a37LLLpk033TTXXY8++mgaaaN14aK/5YxEwL9sFxHe9a53pauvvjr99a9/HeukAEApCFwC0JH22GOPHDz673//mz7ykY/kQMSXvvSlPt+5+eab83fOO++8NHfu3PTKV74yzZs3r+58qv/WW2+9fpcd87333nvTmWeemf71r3/l3jULFy5c4nsRqCuCqUtr6tSpOaA11vNo1oEHHpi3ZWyrI444Ih111FG5J1Iru/XWW/P/r3nNa9Iaa6yRJk+ePKT5zJ8/f4lpDz30UHrZy16WVlpppby/RpD0lFNOSWuttVZ66qmnljrt7SDKWOzDH/jAB9Kuu+7a9O9+8YtfpOnTp6cddtihz/QXvOAFvWX+0ksvTc997nPTq171qvT4448vVRoXLVpU97P/+7//y4HoF77whekPf/hDuuGGG9JXvvKVHLD+8Y9/nEZSvX2uWVG3RsCY5sTFtLe+9a3pG9/4xlgnBQBKQeASgI4UQaMIHq277rrpve99bw5k/Pa3v+3znQjSxXe23HLL9KEPfSjNmjUr3XTTTXXnU/03fvz4fpcd811zzTXTS17ykhyQ+/e//517aRY9MiMosdVWW+V5/+1vf8uBjOhxGAHRCLxsttlmOZhS7Zxzzsk9E+Pzl770pemOO+4YsBfT7373uxwEid55q6yySnrta1+bp0ePrjvvvDN9+MMf7u1R1mge3/nOd9L666+fT7Y32GCDJQIo8dvvf//7ed7Tpk3LwZ3a7VxPfLfIn/333z/3LDv//PN7P49A8kc/+tG09tprp2WWWSZtu+22efsVIv0REF5xxRXz5xFkim3UaD2ix1WxnvUCLz/84Q/Tb37zm97tEcuKIPahhx6a8zK2YaQ18qnRPIrhn+PGjetdVuRt9JhbZ511cn7HMN9zzz2393eRj/HdM844I+200055OT/96U+XmP/f//73HDCLbb3FFlvkfSX2g6997Wt9AukR7IqentFbL3r8vuMd70gPP/xw7+cR5Nx3333z57FeERiL/SH2//56qMX2jO1aiLLypje9KU+PYGoEa6v3yaIH65e//OW8nJVXXjm9733v6xMgizw+/PDD04wZM/K2iZ7RMWS72XWpFftB7K8RFI99q1mnn3563aG70WuzKPMbbbRRzscnn3yyT6/kr371q7mHZyw71uOQQw7J3ykU+2KUiZhHrOddd921xLIuv/zy9P/+3//L+REXWLbffvvco3a33XbLvTD322+/Pt+PchifR8/MN7/5zemJJ57o/Sz2rxe/+MV5ubHdI9haBNX72+eiHMY+VpSB2KfH2kD72RVXXJG3UdRvsS1ifaI3YyG2UYj6KdapeF8MuT/55JPTs571rLyPRd5FYPmLX/xizvOoxz/3uc/1SU+z+R3lJ+rC2La77757Xo9qsb/FPjFnzpwR23YA0CoELgGgpzdhbW/KQpysR/AiRIBuuJcbqpf9iU98In3+85/PveYiYBfBsB/96Ee5x2H00IyA4tvf/vbeYalx0vu6170un+xee+216d3vfneeR3/OPvvsfLIeQ+CvueaadMEFF6Rtttkmf/arX/0qB9IiEFP0KKvn17/+dfrgBz+Ye6xGEOmggw7KwY0LL7ywz/dieHQEF/75z3/m5b3tbW9remhr9DqNIZMRMK7e9hEwjF5ukS8x3ze+8Y2592v0oA0RBIvA18UXX5yH+H/hC1/IwYehiABppL+6d20EjqJHVAQXfv7zn+eeoRHcKQIf9eYRPSBD9Tb9+te/noNREcCL9YggRgz3LtajEPkZ2zr2ifhOrQikRO/cyJNGQ+rjfqe77LJLDmxeeeWVOYD1wAMP5HUrfOxjH8v7VQRp//jHP+YAbXWgpxkRfIw0LrfccjnvIqga2z62X/V+HvtJBMzi/wgMR1CnOvgZAdSf/exneTvHescQ+yIPm1mX4RIXD7beeut+vxP7WuRvBKUigF+IIHWkP8ptrOOf//zn9PGPf7zPb59++um8f0bQOb5Xr1dz7FtF8Kye6kB8bNMIjP3+97/Pf5GfUZ9UB6cPO+ywvN2i3Ecaoy6o7elZvc9FEDxuHRE9T4v9N/bpsdTMfhYB2wjqRh5edtllOVgYdVARyI3AZoi8i3Uq3hfbMS4ixb4V+2EEzaPX/d133523aeTZkUcemf7xj38MOr8j4Bl1eqQ59uUILleL/S3Kc/W8AaBjVQCgw+y3336V17zmNfn1okWLKueff35l8uTJlY9+9KN52oUXXhiRn8oyyyyT/+J1/L361a9eYj7jx4/v/V78veENb2i43GK+//vf//L7e++9t7L99ttX1l577crcuXN7Pz/rrLN6f/PMM89Upk2bVrnkkkv6zOuAAw6ovOUtb8mvjzjiiMpGG23U5/PDDz+8z7JOOeWUyvLLL9/7+XbbbVd529ve1jCt6667buVrX/tan2m184i0H3jggX2+88Y3vrGy55579r6PNBx55JG975988sk87Q9/+EPDZe+0006ViRMn5u0Z/8f3p0yZUvn73/+eP7/zzjvzdr/nnnv6/O5lL3tZ3hZhk002qXz605+uO//a9Qi//vWv83IKRx99dGWzzTaru88U3v/+91d22WWXvA81o3YZYa211qp87nOf6zPthS98YeWQQw7Jr2+//fb8m+OPP37A+X/yk5+sTJgwobLSSitV9thjj8oXv/jFyv3339/7+bHHHlt5+ctf3uc3s2bNyvO/+eabK0888URl0qRJlZ///Oe9nz/yyCOVqVOnVj74wQ/2Tovvx7pUi+0Z2zX8+Mc/rmywwQZ9tkvs3zGf8847r3d7xj62YMGCPvvOPvvsk19HemI5UTbrGWhdBlIvP+uJ8hPzvPjii/tMj/1j3LhxveW+q6urMn369H7363DmmWdWVl555d73sc1i/tdee22/v3vFK15R2XTTTQdMb6Qr6ovZs2f3TvvYxz5W2XbbbRv+5qGHHsppuP766/vd5+qVm2ZEmmJ7N6vYJtX1anVdXOx7zexntRYuXFhZbrnlKr/73e/63Z/rbcfdd9+9MnPmzDyPQiz/uOOOG3R+X3bZZb3TbrzxxjztH//4R5/frrjiipVTTz11wO0FAO1uwlgHTgFgLERPpOidE712oqdR3FOsduhj9OKJIcvRUyeGada7x2L0RIrhp4UYIjiQ6M0Y58vR8yaGfcdQz+rehNW9u2IIeXwvhjtWix5F0dssRI+
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results1, fig1 = plot_prf_prediction(\n",
" sequence=str(data.iloc[1]['Full_Sequence']),\n",
" window_size=1,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.6,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[1]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 高分辨率的结果解读Sequence1\n",
"\n",
"### 真实情况\n",
"该序列核糖体程序性移码发生于第1794nt处。\n",
"### 图上信息\n",
"在该位置存在大量的高概率碱基集中但是其他高峰周围并不存在因此其PRF可能性高于其余高峰。\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABR8AAALtCAYAAAChPBNAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZjtJREFUeJzt3QeYXFX5P/CTQhIgdEIgIRCKgLSEGkJvEkBQEBUBaUII0kH4QRAJRUBAMSotgIAoSJOi0oQAAoLSe5GeSAtFktBS5/+8x/8sm83uZndn7pbs5/M8k0y5M3Nn5uydO9/7nnO6lEqlUgIAAAAAqLKu1X5AAAAAAIAgfAQAAAAACiF8BAAAAAAKIXwEAAAAAAohfAQAAAAACiF8BAAAAAAKIXwEAAAAAAohfAQAAAAACiF8BAAAAAAKIXwEgLnUvffem7p06ZI+/vjjfPnyyy9PCy+8cEWPWY3H6ChOOumkNHjw4EIet2/fvvmzuemmm6r++BTjJz/5STrggANSe7fPPvuknXbaqa1Xgwocd9xx6dBDD23r1QCAqhE+AtApxQ/0CH/i1KNHj7TiiiumU045JU2fPn2W4K586tOnT9p+++3TM8880+Dj1D698sor9T5v3ceNEGqXXXZJr732WuGvedddd03//ve/m7z8wIED0+jRoyt6jJbafPPNa96jXr16pZVWWimdccYZqVQqpY4c6rzwwgvp5JNPTmPGjEnvvPNO2m677VI1PfXUU+kb3/hGWmKJJfL7Fp9hfGYTJkyo6vN0VBH81vf3Ov/88zd6v3fffTf96le/Sj/+8Y8b/NtfbLHF0rbbbpuefvrpwtY/2v9FF12UhgwZknr37p0PBKy77rr57/Szzz5LHTWQb+rzvPHGG/m9fvLJJ6v2XEU8ZqWOPvro9Lvf/a5VvhcAoDUIHwHotCIoiADo5ZdfTj/60Y/yD96zzz57lmVeeumlvMwdd9yRpkyZkr7+9a+nqVOn1vs4tU/LLbdco88dj/v222+n6667Lj333HNpxx13TDNmzKg3bCgHopWad955cyjV1o/RVMOHD8/vZbxXI0eOTCeeeGK68MILW+W5i/Lqq6/m/7/5zW+mJZdcMvXs2bNFjzNt2rTZrnv//ffTVlttlRZddNHcXiPovOyyy1K/fv3Sp59+WvG6zw0i1Kn7t7rqqqum73znO43e75JLLkkbbrhhWnbZZRv82x87dmzq3r172mGHHSpax7rbl9r23HPPdMQRR+T2c8899+TALCoyb7755vS3v/0tFaXS7VAEtbF9pWkWX3zxNGzYsHTBBRe09aoAQFUIHwHotCL4iQAoAoUf/vCHaeutt05//vOfZ1kmgrZYZu21184/+sePH59efPHFeh+n9qlbt26NPnc87lJLLZU23XTTHKo9//zzuVqyXBl52223pXXWWSc/9gMPPJBmzpyZK/8i1IwAcNCgQen666+f5TFvvfXWXCEYt2+xxRa5omdOXab/8pe/pPXWWy9XycUP3p133rmm8vDNN99MRx55ZE1lV0OPET+QV1hhhVxBuvLKK6ff//73s9we943wJh57vvnmS1/5yldme5/rE8uWP5999903rbnmmunOO++suT3C4AiT+vfvnyvXohos3r+yWP8IdRdZZJF8+2qrrZbfo4ZeR3SBLr/OuiI4iUqkCHnK70c8VwRFhxxySP4s4z2MdY3PqaHHiPUJXbt2rXmu+Gyj6nbppZfOn3dUfN1+++2zVWZdc801abPNNsvPc+WVV872+P/4xz/SxIkT83u91lpr5bYS7eCXv/zlLGH4s88+mysuo3IuKm8j0Prggw9qbo+gcq+99sq3x+v6xS9+kdtDtP+y+rqMx/sZ72tZ/K1897vfzddHIBqBWe02Wa4k/fnPf56fJyoHDz744FmC1fiMjz322DRgwID83kSF8m9/+9smv5a6Yrnaf6fvvfde/tvbb7/9UmOuvvrqms+uob/9+Nyiu2y87giCy2L94+8y2vPyyy+fw8Lar7Fc5RefW3xO8fnW59prr82f+x//+Md0/PHH57/bqGyN9/Xuu+/On3Vtjb2v8TcaFZMLLLBAXvfdd999lurY+rZDf/jDH3LVblTXlv8Gan/ebWVObSD+ljbeeOPcDuO9iHC4fBAglP824m8mXlO09drt8/TTT8+PG/cvV8cfc8wxuU3H32wE/LU19fOO6udo17Fc/J3E325t0d6i3QHA3ED4CAD/X4R2DVUdxQ/D8g/BCNmq/byh9nNHiPGzn/0sV69F6BaB1hVXXJEr/6JSMkLB73//++nvf/97Xj4Cj29961v5B2tUQ+2///75MRpzyy235EAwupM/8cQTuXJr/fXXz7fdcMMN+Yd1/NguV3bV58Ybb0yHH354rhyNEGDEiBE5KIyqrNoitIgf2NElNZ5vjz32SB999FGTq67uv//+HPrWfu8j9HvooYfy5xKPG9VrUYkWlawhApcIr+67777cXf7MM8/MAUVLRMgZ61+70i0q4X7961/nIDWCoajQjHAoAqGGHqMcVNR+T6M7bwR8ERbF64iKp+g6XX4dZfF5xnsdbSKWqStCpAhG4jNpqHt6jP+55ZZb5qDl0UcfzcFMBHDx2soiWIl2Va6miyDq8ccfb9b7FWFLrGOEW/HZRTAa7328f7XbebSTCILi/wh3I8yqHWhFCBphW7zP8bojsCl/hk15LXMSgV8ERZtsskmDy0Q7jYAywrrGfPLJJzmgi4A0Qq6yeA/iNcVjxGd98cUX50C4tjjw8Kc//Sn/3TXU/TfaVoT7ETbWFaHZQgst1OT3NT6fU089NQeJESJHKBxhW121t0Nf+9rX8t95hPjl9htd+ttSU9pAhOlHHXVUvj22cRH8x3YvQv/w8MMP5//vuuuu/JriMyiLUDcq1GMbcs4556RRo0bl8DIOaPzrX/9KBx54YN7m/ec//2n25x3bjDj4E+sc29+DDjpolmViWxyPW/cgEgB0SCUA6IT23nvv0je/+c18fubMmaU777yz1LNnz9LRRx+dr7vnnnsivSnNP//8+RTn4/SNb3xjtsfp1q1bzXJx+va3v93g85Yf97///W++/Pbbb5c23HDDUv/+/UtTpkypuf2mm26quc8XX3xRmm+++UoPPvjgLI+13377lXbbbbd8fuTIkaVVV111ltuPPfbYWZ7rsssuKy200EI1tw8dOrS0xx57NLiuyy67bOmXv/zlLNfVfYxY9+HDh8+yzHe+853S9ttvX3M51uGEE06oufzJJ5/k62677bYGn3uzzTYrzTPPPPn9jP9j+V69epX+8Y9/5NvffPPN/L6/9dZbs9xvq622yu9FWGONNUonnXRSvY9f93WEG2+8MT9P2ahRo0qDBg2qt82UHXrooaUtt9wyt6GmqPscoV+/fqXTTjttluvWW2+90kEHHZTPv/766/k+o0ePnuPjH3/88aXu3buXFl100dK2225bOuuss0rvvvtuze2nnnpqaZtttpnlPuPHj8+P/9JLL5UmT55c6tGjR+naa6+tuf3DDz8szTvvvKXDDz+85rpYPl5LbfF+xvsafv/735dWXnnlWd6XaN/xOHfccUfN+xltbPr06bO0nV133TWfj/WJ54m/zfrM6bXMyeeff15aZJFFSmeeeWajyz3xxBP5MceNG9fo334ss9RSS5Uee+yxRh/v7LPPLq2zzjqztLNo4xMmTGj0fl/96ldn2/7UZ07va30eeeSRvP7x+Yf6tkP1/U00VaxT3LepYtmuXbvOsl2NU2wHY73iM2lpG3j//ffz7c8888wsf1/lx6z7Ps6YMaPmumjTm2yySc3leI9jvf74xz826/OOdvOf//yn5rrYFsbrfeedd2qumzhxYl6ve++9t0nvGQC0Z93bOvwEgLby17/+NVdRRRVQVMFE18O645JF1VZ0i/vnP/+Zu9/VN+ZgdHesPTbXnCavCFFVGBlOTBIRXaij6ql2VV/tKquokonlovKotqggi4qfEJVJ0e24tqFDhza6DlFhFeMqViKet+4MwBtttFGu+Kktqjdrvz8LLrjgHCdBierImODjv//
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results2, fig2 = plot_prf_prediction(\n",
" sequence=str(data.iloc[2]['Full_Sequence']),\n",
" window_size=3,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.6,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[12]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABR8AAALtCAYAAAChPBNAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZE9JREFUeJzt3Qmc3PP9P/BPDklE3EdIhDiKKhJHRahbBaWl2qaoq0TUfZYo4iiK0rR1U1RLXXVUXSUoKuq+j7qTOhKRSuJKJJn/4/35/WftbnY3uzv7ze5mn8/HY5LZme/MfGfmM9/5zuv7/nw+nUqlUikBAAAAALSwzi19hwAAAAAAQfgIAAAAABRC+AgAAAAAFEL4CAAAAAAUQvgIAAAAABRC+AgAAAAAFEL4CAAAAAAUQvgIAAAAABRC+AgAAAAAFEL4CADzqAceeCB16tQpffzxx/nvK6+8Mi2yyCIV3WdL3Ed7cdJJJ6WBAwcWcr+9e/fO780tt9zS4vdPMU444YS03377pbZur732SjvuuGNrrwYVOPbYY9PBBx/c2qsBAC1G+AhAhxQ/0CP8iVO3bt3SyiuvnE455ZQ0Y8aMGsFd+bTkkkum7bbbLj3//PP13k/10+uvv17n49a+3wihdt555/Tmm28W/pyHDh2a/vOf/zR6+f79+6dRo0ZVdB/Ntdlmm1W9Rj169EirrLJKOuOMM1KpVErtOdR5+eWX08knn5wuvvji9P7776dtt902taRnn302ffe7301LLbVUft3iPYz3bMKECS36OO3V22+/Xefn9dFHH23wdh988EH67W9/m37xi1/U+9lffPHF0zbbbJOee+65wtY/2v8ll1ySBg0alHr16pUPBKy33nr5c/rZZ5+l9hrIN/Zxyu/fM88802KPVcR9Vuqoo45Kf/zjH+fK9wIAzA3CRwA6rAgKIgB67bXX0pFHHpl/8J599tk1lnn11VfzMnfffXeaNm1a+s53vpOmT59e5/1UP62wwgoNPnbc73vvvZduuOGG9OKLL6YddtghzZw5s86woRyIVmr++efPoVRr30djDRs2LL+W8VqNGDEinXjiiemiiy6aK49dlDfeeCP//73vfS8tvfTSqXv37s26ny+//HK2yz788MO05ZZbpsUWWyy31wg6r7jiitSnT5/06aefVrzu85J77723xud13XXXbXD5yy67LG244YZp+eWXr/ezP3r06NS1a9e0/fbbV7Rutbcv1e2+++7psMMOy+3n/vvvz4FZVGTeeuut6R//+EcqSqXboQhqY/tK4yyxxBJpyJAh6cILL2ztVQGAFiF8BKDDiuAnAqAIFH72s5+lrbbaKv3tb3+rsUwEbbHMOuusk3/0jxs3Lr3yyit13k/1U5cuXRp87LjfZZZZJm2yySY5VHvppZdytWS5MvLOO+/MgUjc98MPP5xmzZqVK/8i1IwAcMCAAenGG2+scZ933HFHrhCM6zfffPNc0TOnLtO33XZb+uY3v5mr5OIH70477VRVefjOO++kww8/vKqyq777iB/IK620Uq4gXXXVVdOf/vSnGtfHbSO8ifvu2bNn+trXvjbb61yXWLb8/uy9995prbXWSvfcc0/V9REGR4VQ37590wILLJCrweL1K4v1j1B30UUXzdd/4xvfyK9Rfc8jukCXn2dtEZxEJVKEPOXXIx4rgqKDDjoov5fxGsa6xvtU333E+oTOnTtXPVa8t1F1u+yyy+b3Oyq+7rrrrtkqs6677rq06aab5se5+uqrZ7v/f/3rX2ny5Mn5tV577bVzW4l28Jvf/KZGGP7CCy/kisuonIvK2wi0Jk6cWHV9BJV77LFHvj6e1znnnJPbQ7T/srq6jMfrGa9rWXxWfvSjH+XLIxCNwKx6myxXkv7617/OjxOVgwceeGCNYDXe42OOOSb169cvvzZRofyHP/yh0c+lPvFY1T+v8803X4PLX3vttVXvXX2f/XjfortsPO8Igsti/eNzGe15xRVXzGFh9edYrvKL9y3ep3h/63L99dfn9/0vf/lLOu644/LnNipb43W977778ntdXUOva3xGo2JywQUXzOu+66671qiOrWs79Oc//zlX7UZ1bfkzUP39bi1zagPxWfrWt76V22G8FhEOlw8ChPJnIz4z8ZyirVdvn6effnq+37h9uTr+6KOPzm06PrMR8FfX2Pc7qp+jXcdy8TmJz2510d6i3QHAvED4CAD/X4R29VUdxQ/D8g/BCNla+nFD9ceOEONXv/pVrl6L0C0CrauuuipX/kWlZISCP/nJT9I///nPvHwEHt///vfzD9aohtp3333zfTTk9ttvz4FgdCd/+umnc+XW+uuvn6+76aab8g/r+LFdruyqy80335wOPfTQXDkaIcDw4cNzUBhVWdVFaBE/sKNLajzebrvtliZNmtToqquHHnooh77VX/sI/caMGZPfl7jfH/7wh7kSLSpZQwQuEV49+OCDubv8mWeemQOK5oiQM9a/eqVbVML97ne/y0FqBENRoRnhUARC9d1HOaio/ppGd94I+CIsiucRFU/Rdbr8PMri/YzXOtpELFNbhEgRjMR7Ul/39Bj/c4sttshByxNPPJGDmfHjx+fnVhbBSrSrcjVdBFFPPfVUk16vCFtiHSPcivcugtF47eP1q97Oo51EEBT/R7gbYVb1QCtC0Ajb4nWO5x2BTfk9bMxzqU+5a3qEUnMKwqOdxsGBCOsa8sknn+SALgLSCLnK4jWI5xT3Ee/1pZdemgPh6uLAw1//+tf8uauv+2+0rQj3I2ysLUKzhRdeuNGva7w/p556ag4SI0SOUDjCttqqb4e+/e1v5895hPjl9htd+ltTY9pAhOlHHHFEvj62cRH8x3YvQv/w2GOP1aiGjfegLELdqFCPbci5556bRo4cmcPLOKDx73//O+2///55m/ff//63ye93bDPi4E+sc2x/DzjggBrLxLY47rf2QSQAaJdKANAB7bnnnqXvfe97+fysWbNK99xzT6l79+6lo446Kl92//33R3pTWmCBBfIpzsfpu9/97mz306VLl6rl4vSDH/yg3sct3+///ve//Pd7771X2nDDDUt9+/YtTZs2rer6W265peo2X3zxRalnz56lRx55pMZ97bPPPqVddtklnx8xYkRp9dVXr3H9McccU+OxrrjiitLCCy9cdf3gwYNLu+22W73ruvzyy5d+85vf1Lis9n3Eug8bNqzGMj/84Q9L2223XdXfsQ7HH3981d+ffPJJvuzOO++s97E33XTT0nzzzZdfz/g/lu/Ro0fpX//6V77+nXfeya/7u+++W+N2W265ZX4twpprrlk66aST6rz/2s8j3HzzzflxykaOHFkaMGBAnW2m7OCDDy5tscUWuQ01Ru3HCH369CmddtppNS775je/WTrggAPy+bfeeivfZtSoUXO8/+OOO67UtWvX0mKLLVbaZpttSmeddVbpgw8+qLr+1FNPLW299dY1bjNu3Lh8/6+++mpp6tSppW7dupWuv/76qus/+uij0vzzz1869NBDqy6L5eO5VBevZ7yu4U9/+lNp1VVXrfG6RPuO+7n77rurXs9oYzNmzKjRdoYOHZrPx/rE48Rnsy5zei51+fDDD0vnnHNO6dFHHy099thj+TPSqVOn0q233lrva/r000/n+xw7dmyDn/1YZplllik9+eSTpYacffbZpXXXXbdGO4s2PmHChAZv9/Wvf3227U9d5vS61uXxxx/P6x/vf6hrO1TXZ6KxYp3ito0Vy3bu3LnGdjVOsR2M9Yr3pJI2ENc///zzNT5f5fus/TrOnDmz6rJo0xtvvHHV3/Eax3r95S9/adL7He3mv//9b9VlsS2M5/v+++9XXTZ58uS8Xg888ECjXjMAaMu6tnb4CQCt5e9//3uuoooqoKiCia6Htccli6qt6BYXE1JE97u6xhyM7o7Vx+aKLr5zElWFkeHEJBHRhTqqnqpX9VWvsooqmVguKo+qiwqyqPgJUZkU3Y6rGzx4cIPrEBVWMa5iJeJxa88AvNFGG+WKn+qierP667PQQgvNcRKUqI6MCT7+97//5YqjqDS
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results0, fig0 = plot_prf_prediction(\n",
" sequence=str(data.iloc[3]['Full_Sequence']),\n",
" window_size=3,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.6,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[3]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator _BinMapper from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"c:\\Users\\31598\\.conda\\envs\\fs\\lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator HistGradientBoostingClassifier from version 1.6.0 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
" warnings.warn(\n",
"a:\\Code\\fscanpy-package\\FScanpy\\predictor.py:347: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.\n",
" plt.tight_layout()\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSYAAAL0CAYAAAARA+nhAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAe3VJREFUeJzt3QeYXFX9P+CTQgolFKmBSEAEDSU0QToISpEqYgSkSQkC0hQpIgEREFCMJUBEigUEQUSRJiKICEovSq+JlNBJaKnzf77n95/N7GZ3s7uZvbOz877PM8/utDtn7pl7597PnNKnVCqVEgAAAABAgfoW+WIAAAAAAEEwCQAAAAAUTjAJAAAAABROMAkAAAAAFE4wCQAAAAAUTjAJAAAAABROMAkAAAAAFE4wCQAAAAAUTjAJAAAAABROMAkAvcBtt92W+vTpk95+++18/ZJLLkmLLLLIPC2zGsuoFyeffHJac801u2W5Sy21VK6ba665purLp3b22muvdPrpp6eebvPNN09HHnlkrYvBPPjyl7+cfvjDH9a6GADQLQSTAPR6++67bw6G4jJgwIC00korpe9+97tpxowZzUK98mWJJZZI2223XXrkkUfaXE7l5emnn271dVsuNwKqXXfdNT377LPd/p5HjRqVnnzyyQ4/fvjw4Wns2LHztIx5CU7K62jQoEFp5ZVXTmeccUYqlUqpVqKud95553laxmOPPZZOOeWUNH78+PTyyy+nbbfdNlXTQw89lHbccce05JJL5vUWdRh19uqrr1b1derVhx9+mOtx9dVXT/37959rff7zn//Mj+tIQB3r/vrrr0+HH354q5/j8va+2267pRdeeCF1l2nTpqWzzjorjRw5Ms0///xp8cUXTxtttFG6+OKL0/Tp01M9bCfz8jotf5Cphu5Y5rw68cQT02mnnZbeeeedWhcFAKpOMAlAQ9hmm21yOPTUU0+lb3zjG7kl29lnn93sMU888UR+zE033ZSmTp2aPv/5z+cT/9aWU3lZYYUV2n3tWO5LL72UrrzyyvTf//437bDDDmnmzJlzPC6CuHJYOq8GDx6cA6taL6OjDjzwwLwuY10df/zx6aSTTkrnn39+qmfPPPNM/rvTTjulpZdeOg0cOLBLy2ktYHrttdfSlltumRZbbLH8eY0QNMKooUOHpvfee2+ey94bxDYWn+EID7faaqt2Hxsh1N57753XaUf89Kc/zaHjggsu2OrnOLb3P/7xj2nixInpK1/5yjy9j5b7oMrbt9566/T9738/HXTQQenOO+9Md999dzr00ENz+WJf053rdtasWV1+foS40SKbjllttdXSxz72sfSb3/ym1kUBgKoTTALQECIUinBo+eWXT1/72tdyUPGnP/2p2WMihIvHrL322rnrY4QKjz/+eKvLqbz069ev3deO5S6zzDJp0003zYHbo48+mltZllvm3HDDDWmdddbJy77jjjvyCX+0GIzAM4KVaA111VVXNVtmtNaKloVx/xZbbJGef/75uXbDvvbaa9OnPvWp3LouWlbtsssuTSFBtOo66qijmlp7tbWM8847L58gR8vTVVZZJf36179udn889xe/+EVedrTg+vjHPz7Hem5NPLZcP/vtt19aY4010s0339x0fwTF3/zmN9Oyyy6bFlhggbT++uvn9VcW5Y/Ad9FFF833r7rqqnkdtfU+olt1+X22FKH1L3/5yxwslddHvFYEQYcddliuy1iHUdaop7aWEeUJffv2bXqtqNtorbvccsvl+o7WeTfeeGPT86Ie47FXXHFF2myzzfLrXHrppa227ovWU7Gu11prrfxZic/Bj370o2ZB+X/+85/cUjMCtGjBF92PX3/99ab7I8SMQC7uj/cV3UVbdv1trRt6rM/KYCm2lS996Uv59ghLI4yt/EyWW7z94Ac/yK/zkY98JAdolaFr1PGxxx6bhg0bltdNtGy+8MILO/xeWorPQXxeIyyMz1Z7Dj744LTHHnukDTbYIHUklIvtsVy/rX2O4z1++tOfzp+X+++/v9lz999//6ZtO7ahH//4x82WUV5X0UIuguZ4TGuihfPtt9+ebrnllrwu47O04oor5vfx73//O297ZfG5+9a3vpXrJsoXn89K55xzTm5ZGuss1v8hhxyS3n333ab7y9tQbMsjRozI9fPVr3611e2k1mIfuskmm+T1G+8lgunKsD72Weuuu25aaKGF8rqI9VVuZRyf2diOQuxL4j1FfYTYLr7+9a/nbSPui8/gBRdckJcd+6xYXnxmY3/elfqO1tXRWn/IkCH589gykI7P2+WXX96t6w4AakEwCUBDipPEtloiReBTPgGMAK7arxsqX/u4447LrZ6i1VsEchF2/epXv8otBqPVUwSG0erq73//e1MI9IUvfCGfqD744IPpgAMOyMtoz3XXXZfDwuii/sADD+QwY7311sv3XX311Tkoi8Cs3Aq0NX/4wx/SEUcckVucRkg0evTofEJ+6623NntcnGBHSPXwww/n19tzzz3Tm2++2aH1E61G//GPf+RAuHLdR8Bz11135XqJ5UZrtWi9Gi1gQwQzEWxFUBNd8M8888w5WrN1VASgUf7K1rEbbrhh+slPfpKDmd/97ne5ZWcEhtF9uq1lRAvGULlOI5SI8C8Cungf0eItumOX30dZ1Ges6/hMxGNaikAlWtdGnbTV5T1aAX7mM5/JweW9996bA9BJkybl91Z2zDHH5M9VhEt/+ctfcrBUGaR1RISLUcYIZqLuIjSNdR/rr/JzHp+TaEUafyPQirCrMtyMgPS3v/1tXs/xvqMLfLkOO/JeuirqKYZXGDNmTIceH/UW+4gIt9oTn/n4rESIXhkQxrYWrafjB4r4oeKEE07Ij6sU22d8xiKc//Of/9zq8uPzFz+wxDppab755sshY1ms77gegWV0/Y5tvTL4j/A81nvsb+Kxf/vb33KQWen999/P21WE4fG4eHxr20ktxecryhNDZkQ9RcAfQWXsPyo/r6eeemrujh+Be4SR5fAxgszf//73zVrQVwaJsW7iR51omRohZfzIFfuieN+x3Xzuc5/LgXmsq87Wd3zmY/uLbSD2ybEfrRT763jd2M8BQK9SAoBebp999inttNNO+f9Zs2aVbr755tLAgQNL3/zmN/Ntt956ayQ7pQUWWCBf4v+47LjjjnMsp1+/fk2Pi8sXv/jFNl+3vNy33norX3/ppZdKG264YWnZZZctTZ06ten+a665puk5H374YWn++ecv3Xnnnc2Wtf/++5d23333/P/xxx9fGjFiRLP7jz322GavdfHFF5cWXnjhpvs32GCD0p577tlmWZdffvnSj370o2a3tVxGlP3AAw9s9pjddtuttN122zVdjzKceOKJTdfffffdfNsNN9zQ5mtvttlmpfnmmy+vz/gbjx80aFDpn//8Z77/hRdeyOv9xRdfbPa8LbfcMq+LsPrqq5dOPvnkVpff8n2EP/zhD/l1ysaMGVMaOXJkq5+Zsq9//eulz3zmM/kz1BEtXyMMHTq0dNpppzW77VOf+lTpkEMOyf8/99xz+Tljx46d6/JPOOGEUv/+/UuLLbZYaZtttimdddZZpVdeeaXp/lNPPbX0uc99rtlzJk6cmJf/xBNPlKZMmVIaMGBA6Xe/+13T/W+88UZp8ODBpSOOOKLptnh8vJdKsT5jvYZf//rXpVVWWaXZeonPdyznpptualqf8RmbMWNGs8/OqFGj8v9Rnnid2DZbM7f3Mjet1Wd48sknS0suuWTTMlp+DloT6yI+jy0/B5Wf49iGo2wrr7xyrtP2HHrooaVdd921WVmXWmqpvA7bE+v38MMPb/cx5XJtvPHGc3zmYp/RliuvvLL0kY98pOl61HW8nwcffLBD67UjZSp/fjqitX1vXGI/Ubnfi/3kQQcd1Oy5//jHP0p9+/YtffDBB60u+5577snLiO2htf12W+sxPstRhr322qvptpdffjk/96677upUfcc2/N577zXddt5555UWXHDB0syZM5tue+ihh/Kyn3/++Q6tMwCoF1pMAtAQotVRtL6KrrHRHTQmCWn
"text/plain": [
"<Figure size 1600x800 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sequence_results4, fig4 = plot_prf_prediction(\n",
" sequence=str(data.iloc[4]['Full_Sequence']),\n",
" window_size=3,\n",
" short_threshold=0.2,\n",
" long_threshold=0.2,\n",
" ensemble_weight=0.4,\n",
" title=f\"PRF Prediction Results for Sequence {data.iloc[4]['Sequence_ID']} (Bar Chart + Heatmap)\",\n",
" figsize=(16, 8),\n",
" dpi=150\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 结果解读Sequence4\n",
"### 真实情况\n",
"该序列核糖体程序性移码发生于第216nt处\n",
"### 图上信息\n",
"我们的算法并不能总是解决问题在该处我们可以看到三个显著的高峰其中80nt左右和216nt左右的高峰并无明显差距。我们需要通过湿实验验证位点的真实性。"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "fs",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}