diff --git a/FScanpy.egg-info/PKG-INFO b/FScanpy.egg-info/PKG-INFO new file mode 100644 index 0000000..31947af --- /dev/null +++ b/FScanpy.egg-info/PKG-INFO @@ -0,0 +1,14 @@ +Metadata-Version: 2.4 +Name: FScanpy +Version: 1.0.0 +Summary: PRF prediction tool +Author: FScanpy Developer +Author-email: FScanpy Developer +Requires-Python: >=3.7 +Requires-Dist: numpy +Requires-Dist: pandas +Requires-Dist: tensorflow +Requires-Dist: scikit-learn +Requires-Dist: wrapt>=1.10.11 +Dynamic: author +Dynamic: requires-python diff --git a/FScanpy.egg-info/SOURCES.txt b/FScanpy.egg-info/SOURCES.txt new file mode 100644 index 0000000..1ca5e1d --- /dev/null +++ b/FScanpy.egg-info/SOURCES.txt @@ -0,0 +1,14 @@ +README.md +pyproject.toml +setup.py +FScanpy/__init__.py +FScanpy/predictor.py +FScanpy/utils.py +FScanpy.egg-info/PKG-INFO +FScanpy.egg-info/SOURCES.txt +FScanpy.egg-info/dependency_links.txt +FScanpy.egg-info/requires.txt +FScanpy.egg-info/top_level.txt +FScanpy/features/__init__.py +FScanpy/features/cnn_input.py +FScanpy/features/sequence.py \ No newline at end of file diff --git a/FScanpy.egg-info/dependency_links.txt b/FScanpy.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/FScanpy.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/FScanpy.egg-info/requires.txt b/FScanpy.egg-info/requires.txt new file mode 100644 index 0000000..8351bce --- /dev/null +++ b/FScanpy.egg-info/requires.txt @@ -0,0 +1,5 @@ +numpy +pandas +tensorflow +scikit-learn +wrapt>=1.10.11 diff --git a/FScanpy.egg-info/top_level.txt b/FScanpy.egg-info/top_level.txt new file mode 100644 index 0000000..729703a --- /dev/null +++ b/FScanpy.egg-info/top_level.txt @@ -0,0 +1 @@ +FScanpy diff --git a/FScanpy/predictor.py b/FScanpy/predictor.py index 3e3c199..b57542e 100644 --- a/FScanpy/predictor.py +++ b/FScanpy/predictor.py @@ -26,11 +26,8 @@ class PRFPredictor: try: # 加载模型 self.gb_model = self._load_pickle(os.path.join(model_dir, 'GradientBoosting_all.pkl')) - self.cnn_model = self._load_pickle(os.path.join(model_dir, 'BiLSTM-CNN_all.pkl')) - - self.voting_model = self._load_pickle(os.path.join(model_dir, 'Voting_all.pkl')) - + # 初始化特征提取器和CNN处理器,使用与训练时相同的序列长度 self.gb_seq_length = 33 # HistGradientBoosting使用的序列长度 self.cnn_seq_length = 399 # BiLSTM-CNN使用的序列长度 @@ -145,14 +142,12 @@ class PRFPredictor: # 出错时设置概率为0 cnn_prob = 0.0 - # 投票模型预测 + # 使用4:6的加权平均替代投票模型 try: - # 确保投票模型输入是二维数组 (1, n_features) - voting_input = np.array([[gb_prob, cnn_prob]]) - voting_prob = self.voting_model.predict_proba(voting_input)[0][1] + voting_prob = 0.4 * gb_prob + 0.6 * cnn_prob except Exception as e: - print(f"投票模型预测时出错: {str(e)}") - # 出错时使用两个模型的平均值 + print(f"计算加权平均时出错: {str(e)}") + # 出错时使用简单平均 voting_prob = (gb_prob + cnn_prob) / 2 return { @@ -373,14 +368,12 @@ class PRFPredictor: # 出错时设置概率为0 cnn_prob = 0.0 - # 投票模型预测 + # 使用4:6的加权平均替代投票模型 try: - # 确保投票模型输入是二维数组 (1, n_features) - voting_input = np.array([[gb_prob, cnn_prob]]) - voting_prob = self.voting_model.predict_proba(voting_input)[0][1] + voting_prob = 0.4 * gb_prob + 0.6 * cnn_prob except Exception as e: - print(f"投票模型预测序列 {i+1} 时出错: {str(e)}") - # 出错时使用两个模型的平均值 + print(f"计算加权平均时出错: {str(e)}") + # 出错时使用简单平均 voting_prob = (gb_prob + cnn_prob) / 2 results.append({ diff --git a/FScanpy/pretrained/BiLSTM-CNN_tokenizer.pickle b/FScanpy/pretrained/BiLSTM-CNN_tokenizer.pickle deleted file mode 100644 index 4e2ecce..0000000 Binary files a/FScanpy/pretrained/BiLSTM-CNN_tokenizer.pickle and /dev/null differ diff --git a/FScanpy/pretrained/Voting_all.pkl b/FScanpy/pretrained/Voting_all.pkl deleted file mode 100644 index fae86b6..0000000 Binary files a/FScanpy/pretrained/Voting_all.pkl and /dev/null differ