Skip to content

Commit

Permalink
Merge pull request #65 from BackofenLab/update_eval
Browse files Browse the repository at this point in the history
Update eval
  • Loading branch information
teresa-m authored Jan 24, 2024
2 parents 1a7c743 + e0774e1 commit 4369dde
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 574 deletions.
525 changes: 5 additions & 520 deletions README.md

Large diffs are not rendered by default.

57 changes: 53 additions & 4 deletions bin/cherri
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import sys
import rrieval.lib as rl
from scipy.sparse import csr_matrix, vstack, hstack, load_npz, save_npz
from ubergauss.tools import loadfile, dumpfile
import csv
import sys
#import logging
## DeBug, INFO
# import subprocess
Expand Down Expand Up @@ -96,7 +98,7 @@ def setup_argument_parser():
# Optional arguments for evaluate.
p_ex.add_argument("-i2", "--occupied_regions",
default="non",
help= "Path to occupied regions python object file containing a dictionary")
help= "Path to occupied regions python object. This file should be used if there are regions which that should be blocked from interactions. One can create this file with the find_occupied_regions.py")
p_ex.add_argument("-c", "--context",
nargs='?',
type=int,
Expand Down Expand Up @@ -231,6 +233,53 @@ def setup_argument_parser():

################################################################################

def test_eval_input(input):
# check header line
issue = 'no'
header = ['chrom1','start1','stop1','strand1','chrom2','start2','stop2','strand2']
if not check_header_line(input, header):
print(f'Input ERROR:\nplease proved the headerline:\n{header}\n')
issue = 'yes'
else:
print('You provided the corret input header line')

# check if order of the provided start stop positions are correct
if not check_positive_difference(input, 'stop1', 'start1'):
issue = 'yes'
print(f'Input ERROR:\nPlease provied a start1 smaller then stop1')
if not check_positive_difference(input, 'stop2', 'start2'):
issue = 'yes'
print(f'Input ERROR:\Please provied a start2 smaller then stop2')


if issue == 'yes':
sys.exit(1)


def check_header_line(input,required_headers):

with open(input, newline='') as csvfile:
reader = csv.reader(csvfile)
# reads fist line into headers
headers = next(reader, None)

if headers:
return all(header in headers for header in required_headers)
else:
return False

def check_positive_difference(csv_file_path, pos_end, pos_start):
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Calculate the difference
difference = df[pos_end] - df[pos_start]

# Check if there is a negative value
has_negatives = (difference < 0).any()

# returns True if there is no and False if there is at least one negative value
return not has_negatives

def read_RRI_table(file):
"""
Expand Down Expand Up @@ -296,9 +345,6 @@ def main_eval(args):
Output files:
├── date_Cherri_evaluation_mode
| ├── evaluate_RRIs.csv
| ├── date_occ_out
| ├── occupied_regions.obj
| ├── rri_occupied_regions_overlapTH_0.3_scoreTH_1.csv
| ├── positive_instance
| ├── {name}_context_{context}pos.csv
| ├── {name}_context_{context}_block_ends_0_RRI_dataset.csv
Expand Down Expand Up @@ -342,6 +388,9 @@ def main_eval(args):
if not os.path.exists(model_params):
print('Error: please set the path to your feature file of your model')

# test input data: RRIs_table
test_eval_input(RRIs_table)


# define output folder
timestr = time.strftime("%Y%m%d")
Expand Down
6 changes: 5 additions & 1 deletion bin/generate_pos_neg_with_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def decode_Intarna_output(out):

# out, err = process.communicate()
out = out.decode('utf-8').strip().split('\n')
# print(f'IntaRNAout:\n{out}')
#print(f'IntaRNAout:\n{out}')
for idx, line in enumerate(out):
#print(idx)
line = line.strip().split(';')
Expand Down Expand Up @@ -478,6 +478,9 @@ def decode_IntaRNA_call(call, lost_inst, row, list_rows_add, df_data, no_sub_opt
"""
# print(f'####\nIntRNA call: \n{call}####\n')
out = rl.call_script(call,reprot_stdout=True)
#print(out.decode('utf-8').strip().split('\n'))
if 'ERROR' in out.decode('utf-8'):
print(f'\n####\nIntaRNA is complining:\n{out}\nFor call:\n{call}\n####')
#print(call)
df = decode_Intarna_output(out)
#print(df)
Expand Down Expand Up @@ -526,6 +529,7 @@ def get_context_added(input_rris, output_path, genome_file, context,

# adding context by including infors into the df
df_RRIs = extention_df(df_RRIs)
print(f'output dataframe:\ndf_RRIs')
df_target = rl.get_context('target', df_RRIs, output_path,
genome_file, context, chrom_len_file)
#print(df_target)
Expand Down
6 changes: 3 additions & 3 deletions source/docs/documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ Here we search for trusted RRIs, so RRIs which can be found in all replicates. I
| ID | name | description |
|---|---|-----|
| `-i` | `--input_path` | Path to folder storing input data (containing all replicates) |
|`-r`| `--list_of_replicats` | List of file names for all replicates |
|`-r`| `--list_of_replicates` | List of file names for all replicates |
| `-o` | `--overlap_th` | Overlap threshold to find trusted RRIs |
| `-d` | `--output_path` | Path where output folder should be stored |
|`-n` | `--experiment_name` | Name of the data source of positive trusted RRIs |
| `-s` | `--score_th` | Threshold for EM score from ChiRA |
| `-fh` | `--filter_hybrid` | Filter the data for hyprids alrady detected by ChiRA |
| `-fh` | `--filter_hybrid` | Filter the data for hybrids already detected by ChiRA |

#### Output of find_trusted_RRI.py
The filtered set of trusted RRI sites in tabular format.
Expand Down Expand Up @@ -91,7 +91,7 @@ To generate the current features IntaRNA parameters by default are set to:
| intLoopMax | 3 | number of unpaired bases between inter molecular base pairs |


IntaRNA parameters can be changed by specifying a custom IntaRNA parameter file.
IntaRNA parameters can be changed by specifying a custom IntaRNA parameter file. CheRRIs default parameter set can be found [here](https://github.com/BackofenLab/Cherri/tree/master/rrieval/IntaRNA_param).



Expand Down
Loading

0 comments on commit 4369dde

Please sign in to comment.