""" RNA Structure Prediction and MFE Calculation """ import csv import RNA import pandas as pd from utils.config import BaseConfig def predict_rna_structure_multiple(csv_file, predictions): """ Predict RNA structure for multiple regions and write results back to CSV file Args: csv_file: CSV file path predictions: List of prediction configurations, each containing: - start: Starting position (1-based) - length: Prediction length - column_name: Column name for results in CSV """ # Read CSV file df = pd.read_csv(csv_file) # Process each sequence for index, row in df.iterrows(): sequence = row['full_seq'] # Process each prediction configuration for pred in predictions: start = pred['start'] - 1 # Convert to 0-based index length = pred['length'] column_name = pred['column_name'] try: # Extract subsequence from specified region sub_seq = sequence[start:start + length] # Create fold_compound object and predict fc = RNA.fold_compound(sub_seq) (ss, mfe) = fc.mfe() # Store result in DataFrame df.at[index, column_name] = mfe except Exception as e: df.at[index, column_name] = None # Save results to original CSV file df.to_csv(csv_file, index=False) def predict_rna_mfe_batch(sequences, start_pos=198, length=40): """ Batch prediction of RNA MFE for multiple sequences Args: sequences: List of RNA sequences start_pos: Starting position (1-based) length: Length of subsequence for MFE calculation Returns: List of MFE values """ mfe_values = [] start_idx = start_pos - 1 # Convert to 0-based for sequence in sequences: try: # Extract subsequence sub_seq = sequence[start_idx:start_idx + length] # Calculate MFE fc = RNA.fold_compound(sub_seq) (ss, mfe) = fc.mfe() mfe_values.append(mfe) except Exception as e: mfe_values.append(0.0) # Default value on error return mfe_values def calculate_mfe_features(data_file, output_file=None): """ Calculate MFE features for sequences in a data file Args: data_file: Input CSV file path output_file: Output CSV file path (optional, defaults to input file) """ if output_file is None: output_file = data_file # Standard MFE prediction configurations predictions = [ { 'start': 198, 'length': 40, 'column_name': 'mfe_40bp' }, { 'start': 198, 'length': 120, 'column_name': 'mfe_120bp' } ] predict_rna_structure_multiple(data_file, predictions) if __name__ == "__main__": # Example usage with virtual paths data_file = BaseConfig.VALIDATION_DATA calculate_mfe_features(data_file)