From e041be35ba4045f319ac72d3e8badbfab1fb9ffc Mon Sep 17 00:00:00 2001 From: wdecoster Date: Wed, 6 Sep 2023 09:42:35 +0200 Subject: [PATCH] fix histgram dotsize for very small datasets --- src/histograms.rs | 7 ++++--- src/main.rs | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/histograms.rs b/src/histograms.rs index 6f6407e..8917b08 100644 --- a/src/histograms.rs +++ b/src/histograms.rs @@ -1,4 +1,5 @@ use itertools::Itertools; +use std::cmp::max; // the histograms below are fully defined by the step size and the maximum value // the step size is the size of each bin @@ -25,7 +26,7 @@ pub fn make_histogram_lengths(array: &[u64]) { counts[step_count] = array.len() - counts.iter().sum::(); // the dotsize variable determines how many reads are represented by a single dot // I either have to set this dynamically or experiment with it further - let dotsize = array.len() / 500; + let dotsize = max(array.len() / 500, 1); println!("\n\n# Histogram for read lengths:"); // print every entry in the vector, except the last one which is done separately for (index, entry) in counts.iter().dropping_back(1).enumerate() { @@ -60,7 +61,7 @@ pub fn make_histogram_identities(array: &[f64]) { counts[step_count] = array.len() - counts.iter().sum::(); // the dotsize variable determines how many reads are represented by a single dot // I either have to set this dynamically or experiment with it further - let dotsize = array.len() / 500; + let dotsize = max(array.len() / 500, 1); println!("\n\n# Histogram for Phred-scaled accuracies:"); // print every entry in the vector, except the last one which is done separately for (index, entry) in counts.iter().dropping_back(1).enumerate() { @@ -100,7 +101,7 @@ pub fn make_histogram_phaseblocks(array: &[i64]) { counts[step_count] = array.len() - counts.iter().sum::(); // the dotsize variable determines how many reads are represented by a single dot // I either have to set this dynamically or experiment with it further - let dotsize = array.len() / 500; + let dotsize = max(array.len() / 500, 1); println!("\n\n# Histogram for phaseblock lengths:"); // print every entry in the vector, except the last one which is done separately for (index, entry) in counts.iter().dropping_back(1).enumerate() { diff --git a/src/main.rs b/src/main.rs index 58c9ef7..c6289cc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -203,9 +203,9 @@ fn extract_ubam() { hist: true, checksum: false, arrow: Some("test.feather".to_string()), - karyotype: true, - phased: true, - spliced: true, + karyotype: false, + phased: false, + spliced: false, ubam: true, }; let metrics = extract_from_bam::extract(&args);