Skip to content

Commit

Permalink
fix histgram dotsize for very small datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
wdecoster committed Sep 6, 2023
1 parent 435ff1f commit e041be3
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
7 changes: 4 additions & 3 deletions src/histograms.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use itertools::Itertools;
use std::cmp::max;

// the histograms below are fully defined by the step size and the maximum value
// the step size is the size of each bin
Expand All @@ -25,7 +26,7 @@ pub fn make_histogram_lengths(array: &[u64]) {
counts[step_count] = array.len() - counts.iter().sum::<usize>();
// the dotsize variable determines how many reads are represented by a single dot
// I either have to set this dynamically or experiment with it further
let dotsize = array.len() / 500;
let dotsize = max(array.len() / 500, 1);
println!("\n\n# Histogram for read lengths:");
// print every entry in the vector, except the last one which is done separately
for (index, entry) in counts.iter().dropping_back(1).enumerate() {
Expand Down Expand Up @@ -60,7 +61,7 @@ pub fn make_histogram_identities(array: &[f64]) {
counts[step_count] = array.len() - counts.iter().sum::<usize>();
// the dotsize variable determines how many reads are represented by a single dot
// I either have to set this dynamically or experiment with it further
let dotsize = array.len() / 500;
let dotsize = max(array.len() / 500, 1);
println!("\n\n# Histogram for Phred-scaled accuracies:");
// print every entry in the vector, except the last one which is done separately
for (index, entry) in counts.iter().dropping_back(1).enumerate() {
Expand Down Expand Up @@ -100,7 +101,7 @@ pub fn make_histogram_phaseblocks(array: &[i64]) {
counts[step_count] = array.len() - counts.iter().sum::<usize>();
// the dotsize variable determines how many reads are represented by a single dot
// I either have to set this dynamically or experiment with it further
let dotsize = array.len() / 500;
let dotsize = max(array.len() / 500, 1);
println!("\n\n# Histogram for phaseblock lengths:");
// print every entry in the vector, except the last one which is done separately
for (index, entry) in counts.iter().dropping_back(1).enumerate() {
Expand Down
6 changes: 3 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,9 @@ fn extract_ubam() {
hist: true,
checksum: false,
arrow: Some("test.feather".to_string()),
karyotype: true,
phased: true,
spliced: true,
karyotype: false,
phased: false,
spliced: false,
ubam: true,
};
let metrics = extract_from_bam::extract(&args);
Expand Down

0 comments on commit e041be3

Please sign in to comment.