Merge branch 'master' of https://github.com/RickGelhausen/RiboReport

RickGelhausen · Oct 12, 2021 · 76a8166 · 76a8166
2 parents 2f885b3 + f1d9373
commit 76a8166
Show file tree

Hide file tree

Showing 4 changed files with 296 additions and 188 deletions.
diff --git a/README.md b/README.md
@@ -147,15 +147,48 @@ All **file path** statements have to be replaced by the path to your benchmark f
 ####  Excluding a prediction tool:
 
 Currently, to remove a tool from the analysis simply remove the line from the input and shell part of the mergeConditions rule.
-You can find it in under `rules/postprocessing.smk`:
+You can find it in under `rules/postprocessing.smk`.
+
+To remove IRSOM change:
+~~~~
+rule mergeConditions:
+    input:
+        ribotish="tracks/{condition}.ribotish.gff",
+        reparation="tracks/{condition}.reparation.gff",
+        deepribo="tracks/{condition}.deepribo.gff",
+        irsom="tracks/{condition}.irsom.gff",
+        spectre="tracks/{condition}.spectre.gff",
+        smorfer="tracks/{condition}.smorfer.gff",
+        ribotricer="tracks/{condition}.ribotricer.gff",
+        price="tracks/{condition}.price.gff"
+    output:
+        "tracks/{condition, [a-zA-Z]+}.merged.gff"
+    conda:
+        "../envs/bedtools.yaml"
+    threads: 1
+    shell:
+        """
+        mkdir -p tracks;
+        cat {input.spectre} > {output}.unsorted;
+        cat {input.ribotish} >> {output}.unsorted;
+        cat {input.reparation} >> {output}.unsorted;
+        cat {input.deepribo} >> {output}.unsorted;
+        cat {input.irsom} >> {output}.unsorted;
+        cat {input.smorfer} >> {output}.unsorted;
+        cat {input.ribotricer} >> {output}.unsorted;
+        cat {input.price} >> {output}.unsorted;
+        bedtools sort -i {output}.unsorted > {output};
+        """
+~~~~
+
+to
 
 ~~~~
 rule mergeConditions:
     input:
         ribotish="tracks/{condition}.ribotish.gff",
         reparation="tracks/{condition}.reparation.gff",
         deepribo="tracks/{condition}.deepribo.gff",
-        ~~irsom="tracks/{condition}.irsom.gff",~~
         spectre="tracks/{condition}.spectre.gff",
         smorfer="tracks/{condition}.smorfer.gff",
         ribotricer="tracks/{condition}.ribotricer.gff",
@@ -172,7 +205,6 @@ rule mergeConditions:
         cat {input.ribotish} >> {output}.unsorted;
         cat {input.reparation} >> {output}.unsorted;
         cat {input.deepribo} >> {output}.unsorted;
-        ~~cat {input.irsom} >> {output}.unsorted;~~
         cat {input.smorfer} >> {output}.unsorted;
         cat {input.ribotricer} >> {output}.unsorted;
         cat {input.price} >> {output}.unsorted;

diff --git a/evaluation/prc_plotting.py b/evaluation/prc_plotting.py
@@ -22,34 +22,24 @@ def compute_prc(labels, scores):
 
 def get_list(saved_dir, tool):
     dir_score_list = saved_dir + '/' + tool + '_score_list'
-    #dir_lable_list = saved_dir + '/' + tool + '_label_list'
     if not os.path.isfile(dir_score_list):
         sys.exit('not existing file (PRC): %s' % (dir_score_list))
 
     with open(dir_score_list, 'rb') as handle:
         score_overlap_label_list = pickle.load(handle)
-    #with open(dir_lable_list, 'rb') as handle:
-        #label_list = pickle.load(handle)
-    #print(len(label_list))
-    #print(len(score_overlap_label_list))
 
     return score_overlap_label_list
 
 def get_ranks(score_overlap_label_list, tool):
     #first_indexes = len(score_list)
     rank_list = list(range(len(score_overlap_label_list)))
+    #print('score list:\n',len(score_overlap_label_list))
+    #print('rank list:\n',rank_list)
     list.reverse(rank_list)
 
     #diff = len(score_overlap_label_list)-(len(set(score_overlap_label_list)))
-
     sorted_list = sorted(score_overlap_label_list, key=itemgetter(0,1), reverse=True)
 
-    #print ('#####Ranked List 1:#############')
-    #print (count)
-    #print ('#####END#############')
-    #print ('#####sorted List#############')
-    #print (sorted_list)
-    #print ('#####END#############')
 
     last_score = 0
     last_overlap = 0
@@ -65,9 +55,9 @@ def get_ranks(score_overlap_label_list, tool):
         if trippel[2] == '1':
             count_pos += 1
             #if tool == 'deepribo' and trippel[0]<0:
-
         elif trippel[2] == '0':
             count_neg += 1
+
         # check if the current score the same as the last score
         if trippel[0] == last_score:
             #check if overlaps are the same
@@ -96,14 +86,15 @@ def comput_roc_for_tool(saved_dir, tool):
 
     score_overlap_label_list = get_list(saved_dir, tool)
     #print('score list of %s'%(tool))
+    #print(score_overlap_label_list)
     sort_list = sorted(score_overlap_label_list, key=itemgetter(0), reverse=True)
-    #print(sort_list[0:100])
+    #print(sort_list)
 
     score_list, label_list, base = get_ranks(score_overlap_label_list, tool)
     #print('score ranked list of %s'%(tool))
-    #print(score_list[0:100])
+    #print(score_list)
     #print('lable ranked list of %s'%(tool))
-    #print(label_list[0:100])
+    #print(label_list)
 
     precision, recall, thresholds, auc_prc = compute_prc(label_list, score_list)
 
@@ -132,48 +123,31 @@ def main():
 
 
     overlap = coverage_percent.replace(".", "")
-    # 'deepribo', 'ribotish', 'reparation', 'irsom'
-
+    # 'deepribo', 'ribotish', 'reparation', 'irsom', 'spectre'
 
+    #print('tool: deepribo\n')
     precision_deepribo, recall_deepribo, base, auc_prc_deepribo = comput_roc_for_tool(experiment_dict_path, 'deepribo')
+    #print('tool: ribotish\n' )
     precision_ribotish, recall_ribotish, base, auc_prc_ribotish = comput_roc_for_tool(experiment_dict_path, 'ribotish',)
+    #print('tool: reparation\n' )
     precision_reparation, recall_reparation, base, auc_prc_reparation = comput_roc_for_tool(experiment_dict_path, 'reparation')
+    #print('tool: irsom\n' )
     precision_irsom, recall_irsom, base, auc_prc_irsom = comput_roc_for_tool(experiment_dict_path, 'irsom')
+    #print('tool: spectre\n' )
+    precision_spectre, recall_spectre, base, auc_prc_spectre = comput_roc_for_tool(experiment_dict_path, 'spectre')
+    #print('tool: price\n' )
+    precision_price, recall_price, base, auc_prc_price = comput_roc_for_tool(experiment_dict_path, 'price')
+    #print('tool: ribotricer\n' )
+    precision_ribotricer, recall_ribotricer, base, auc_prc_ribotricer = comput_roc_for_tool(experiment_dict_path, 'ribotricer')
+    #print('tool: smorfer\n' )
+    precision_smorfer, recall_smorfer, base, auc_prc_smorfer = comput_roc_for_tool(experiment_dict_path, 'smorfer')
+    #print('deepribo AUC: %f' % (auc_prc_deepribo))
+    #print('ribotish AUC: %f' %  (auc_prc_ribotish))
+    #print('reparation AUC: %f' %(auc_prc_reparation))
+    #print('irsom AUC: %f' %  (auc_prc_irsom))
+    #print('spectre AUC: %f' %  (auc_prc_spectre))
+
 
-    #print('deepribo AUC: %f' % (auc_deepribo))
-    #print('ribotish AUC: %f' %  (auc_ribotish))
-    #print('reparation AUC: %f' %(auc_reparation))
-    #print('irsom AUC: %f' %  (auc_irsom))
-
-
-#     label_deepribo = 'deepribo AUC: %f' % (auc_deepribo)
-#     label_ribotish = ('ribotish AUC: %f' %  (auc_ribotish))
-#     label_reparation = ('reparation AUC: %f' %(auc_reparation))
-#     label_irsom = ('irsom AUC: %f' %  (auc_irsom))
-#
-#     print('++++\nfpr deepribo:')
-#     print(fpr_deepribo)
-#     print('+++++++++++++++++++++')
-#     print('++++\ntpr deepribo:')
-#     print(tpr_deepribo)
-#     print('+++++++++++++++++++++')
-#     plt.plot(fpr_deepribo, tpr_deepribo, color='orange', label=label_deepribo)
-#     plt.plot(fpr_ribotish, tpr_ribotish, color='blue', label=label_ribotish)
-#     plt.plot(fpr_reparation, tpr_reparation, color='red', label=label_reparation)
-#     plt.plot(fpr_irsom, tpr_irsom, color='green', label=label_irsom)
-#
-#     plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
-#     plt.xlabel('False Positive Rate')
-#     plt.ylabel('True Positive Rate')
-#     plt.title('Receiver Operating Characteristic (ROC) Curve ' + experiment)
-#     plt.legend()
-# #plt.show()
-#     save_roc_diag = plot_dir + experiment + '_roc.pdf'
-#     plt.savefig(save_roc_diag, format='pdf', dpi=300, bbox_inches='tight')
-
-    #
-
-    # plot title Escherichia
     if species == 'EC':
         title = 'E. coli'
     elif species == 'LM':
@@ -182,22 +156,38 @@ def main():
         title = 'P. aeruginosa'
     elif species == 'ST':
         title = 'S. Typhimurium'
+    elif species == 'HV':
+        title = 'Haloferax volcanii'
     else:
         print('Error: unknown species label')
         title = 'unknown species label'
 
 
+    # generat the legend information
     label_deepribo = 'DeepRibo AUC: %.2f' % (auc_prc_deepribo)
     label_ribotish = ('Ribo-TISH AUC: %.2f' %  (auc_prc_ribotish))
     label_reparation = ('Reparation AUC: %.2f' %(auc_prc_reparation))
     label_irsom = ('IRSOM AUC: %.2f' %  (auc_prc_irsom))
-
-    plt.plot(recall_deepribo, precision_deepribo, color='green', label=label_deepribo)
-    plt.plot(recall_reparation, precision_reparation, color='blue', label=label_reparation)
-    plt.plot(recall_ribotish, precision_ribotish, color='yellow', label=label_ribotish)
-    plt.plot(recall_irsom, precision_irsom, color='red', label=label_irsom)
-
-    plt.plot([0, 1], [base, base], color='grey', linestyle='--')
+    label_spectre = ('SPECtre AUC: %.2f' %  (auc_prc_spectre))
+    label_price = ('price AUC: %.2f' %  (auc_prc_price))
+    label_ribotricer = ('ribotricer AUC: %.2f' %  (auc_prc_ribotricer))
+    label_smorfer = ('smorfer AUC: %.2f' %  (auc_prc_smorfer))
+
+    # choosing colure
+
+    # plot the PRC into one plot
+    plt.plot(recall_deepribo, precision_deepribo, color='#009E73', label=label_deepribo) # green
+    plt.plot(recall_reparation, precision_reparation, color='#0072B2', label=label_reparation) #blue
+    plt.plot(recall_ribotish, precision_ribotish, color='#F0E442', label=label_ribotish) #yellow
+    plt.plot(recall_irsom, precision_irsom, color='#D55E00', label=label_irsom) #red
+    plt.plot(recall_spectre, precision_spectre, color='#E69F00', label=label_spectre) #orange
+    plt.plot(recall_price, precision_price, color='#000000', label=label_price) #black
+    plt.plot(recall_ribotricer, precision_ribotricer, color='#56B4E9', label=label_ribotricer) #sky blue
+    plt.plot(recall_smorfer, precision_smorfer, color='#CC79A7', label=label_smorfer) #reddish purple
+
+    # plot the baseline
+    plt.plot([0, 1], [base, base], color='#BBBBBB', linestyle='--') #grey #808080
+    # set axix labels and title
     plt.xlabel('Recall', fontsize=14)
     plt.ylabel('Precision', fontsize=14)
     plt.xticks(fontsize=12, rotation=30)
@@ -206,10 +196,12 @@ def main():
     plt.title(title)
 
     #fontsize=20
-    plt.legend(prop={'size': 14})
+    plt.legend(prop={'size': 12})
     plt.ylim(-0.02, 1.02)
     #plt.show()
-    save_prc_diag = plot_dir + species + '_prc_' + overlap + '_' + experiment+'_.pdf'
+
+    # save plot
+    save_prc_diag = plot_dir + species + '_prc_' + overlap + '_' + experiment + '_.pdf'
     plt.savefig(save_prc_diag, format='pdf', dpi=300, bbox_inches='tight')
 
 if __name__ == '__main__':

diff --git a/evaluation/print_calls.py b/evaluation/print_calls.py
@@ -26,8 +26,8 @@ def main():
                         help="path to the negative reference data.")
     parser.add_argument("-s", "--python_script_dir", action="store",
                         dest="python_script_dir", required=True,
-                        default="/home/muellert/Dokumente/benchmark_ribo_seq/ribo_benchmark/evaluation/",
-                        help="path to your '/ribo_benchmark/evaluation/'' forder where all evaluation scripts are stored.")
+                        default="/home/teresa/Dokumente/benchmark_ribo_seq/RiboReport/evaluation/",
+                        help="path to your '/RiboReport/evaluation/' forder where all evaluation scripts are stored.")
 
 
 
@@ -52,21 +52,28 @@ def main():
 
     shell_script =  result_dir + '/evaluation_calls.sh'
 
-    python_script_dir = '/home/muellert/Dokumente/benchmark_ribo_seq/ribo_benchmark/evaluation/'
+    python_script_dir = '/home/teresa/Dokumente/benchmark_ribo_seq/RiboReport/evaluation/'
 
 
     f= open(shell_script,"w+")
     f.write("#!/usr/bin/env bash\n\ntrap ctrl_c INT\n\nfunction ctrl_c() {\necho \"** Trapped CTRL-C\"\nexit\n}\n\n")
     #f.write('test')
     # what to investigat test:
-    #dataset_list = ['bm01', 'bm06']
-    #experiment_list = ['CDS_labels']
+    #dataset_list = ['bm_01', 'bm_03', 'bm_06', 'bm_12']
+    #dataset_list = ['bm_14']
+    #experiment_list = ['smallORFs_labels']
+
+    #experiment_list = ['CDS_labels', 'smallORFs_labels', 'operons_intersect_labels', 'operons_complement_labels']
     #overlap_list = ['0.01', '0.5']
 
     #bar_plot_call_dict = {}
     # full data:
-    dataset_list = ['bm_01', 'bm_03', 'bm_06', 'bm_12']
-    experiment_list = ['CDS_labels', 'smallORFs_labels', 'operons_intersect_labels', 'operons_complement_labels', 'ncRNAs_labels', 'pseudogenes_labels']
+    #dataset_list = ['bm_01', 'bm_03', 'bm_06', 'bm_12']
+    dataset_list = ['bm_01']
+    #dataset_list = ['bm_06', 'bm_12']
+    #dataset_list = ['bm_03']
+
+    experiment_list = ['CDS_labels', 'smallORFs_labels', 'operons_intersect_labels', 'operons_complement_labels']
     overlap_list =  ['0.01', '0.9',  '0.7']
     for dataset in dataset_list:
         bar_plot_call_dict = {}
@@ -139,7 +146,6 @@ def main():
         #print(experiment)
         experiment_dict[experiment]=[]
         for dataset in dataset_list:
-            #print(dataset)
             if dataset == 'bm_01':
                 lable = 'EC'
             elif dataset == 'bm_03':
@@ -148,8 +154,10 @@ def main():
                 lable = 'PA'
             elif dataset == 'bm_12':
                 lable = 'ST'
+            elif dataset == 'bm_14':
+                lable = 'HV'
             for overlap in overlap_list:
-                #print(overlap)
+                #print(lable)
                 stat_path = result_dir + '/' + dataset + '/' + experiment + '/' + overlap+ '/df_stat.csv'
                 hue_symbel = lable + '_' + overlap
                 experiment_dict[experiment].append((stat_path,hue_symbel, dataset, overlap))
@@ -188,7 +196,7 @@ def main():
     call_script = ('python3  %s/experiment_barplots.py '%(python_script_dir))
     call_exp_barplots = ('%s -e %s -o %s '%(call_script, save_experiment_dict, plot_dir))
     #print(call_exp_barplots)
-    f.write('\n#######################\n%s\n'%(call_exp_barplots))
+    # f.write('\n#######################\n%s\n'%(call_exp_barplots))