Skip to content

Commit

Permalink
copy Tim's oscillation detection from python to rust
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Aug 21, 2023
1 parent a164b37 commit a4083b7
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 3 deletions.
31 changes: 30 additions & 1 deletion src/bin/commands/trimmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ pub(crate) struct TrimmerOpts {
#[clap(long, short = 'L', default_value = "15")]
filter_longer: usize,

/// Size of window to look for oscillations.
#[clap(long, default_value = "15")]
osc_window: usize,

/// Number of oscillations in a window to trigger trimming/masking.
#[clap(long, default_value = "4")]
osc_max_oscillations: usize,

/// Difference between adjacent bases to be considered on oscilation.
#[clap(long, default_value = "10")]
osc_delta: usize,

/// Minimum difference in base-quality for one read to correct an overlapping
/// base from the other read.
#[clap(long, short = 'd', default_value = "15")]
Expand Down Expand Up @@ -149,7 +161,24 @@ impl Command for TrimmerOpts {
}
}
Operation::Osc => {
unimplemented!("OSC not implemented")
if let Some(i) = base_quality::identify_trim_point(
r1.qual(),
self.osc_delta as i32,
self.osc_window,
self.osc_max_oscillations,
) {
// TODO [Brent]: allow clip or mask with flag
base_quality::clip_read(&mut r1, 0usize..i);
}
if let Some(i) = base_quality::identify_trim_point(
r2.qual(),
self.osc_delta as i32,
self.osc_window,
self.osc_max_oscillations,
) {
// TODO [Brent]: allow clip or mask with flag
base_quality::clip_read(&mut r2, 0usize..i);
}
}
Operation::LenFilter => {
if r1.seq().len().min(r2.seq().len()) < self.filter_shorter
Expand Down
81 changes: 79 additions & 2 deletions src/lib/base_quality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,48 @@ use seq_io::fastq::OwnedRecord;

use std::ops::Range;

pub fn find_oscillating_quals(_bqs: &[u8]) -> Range<usize> {
unimplemented!("find_oscillating_quals");
/// Returns an optional point at which to trim the read due to quality oscillations.
///
/// # Arguments
///
/// * `quals` - The quality scores for the read.
/// * `osc_delta` - The minimum difference in quality scores to be considered an oscillation.
/// * `window_size` - The number of bases to consider in a window.
/// * `max_oscillations` - The maximum number of oscillations allowed in a window.
pub fn identify_trim_point(
quals: &[u8],
osc_delta: i32,
window_size: usize,
max_oscillations: usize,
) -> Option<usize> {
// Compute a vector of booleans to represent whether each position is immediately
// after an oscillation
let mut is_osc = vec![false; quals.len()];

for i in 1..quals.len() {
is_osc[i] = (quals[i] as i32 - quals[i - 1] as i32).abs() >= osc_delta;
}

// TODO [brent]: this is ~O(n^2/2) and could be ~O(n)
// instead, convert is_osc into a vector of indices of oscillations. then can use a
// window of `max_oscillations` and check if there are at least `window_size` oscillations in the window.
for i in 1..is_osc.len() {
if !is_osc[i] {
continue;
}
let mut n = 1;
#[allow(clippy::needless_range_loop)] // will refactor this later anyway
for j in (i + 1)..std::cmp::min(i + window_size, quals.len()) {
if is_osc[j] {
n += 1;
if n > max_oscillations {
return Some(i);
}
}
}
}

None
}

// Indicates which tail(s) to clip.
Expand Down Expand Up @@ -82,4 +122,41 @@ mod tests {
let range = find_high_quality_bases(bqs, 'I' as u8, 1, Tail::Right);
assert_eq!(range, 0..bqs.len() - 1);
}

#[test]
fn test_identify_trim_point() {
let quals = vec![33; 100];
let oscillation = 10;
let window_size = 20;
let max_oscillations = 3;

let trim_point = identify_trim_point(&quals, oscillation, window_size, max_oscillations);

assert_eq!(trim_point, None);
}

#[test]
fn test_identify_trim_point_not_quite_big_enough_changes() {
let base = [9u8, 18, 27, 36, 27, 18];
let quals: Vec<u8> = base.into_iter().cycle().take(base.len() * 20).collect();

let oscillation = 10;
let window_size = 20;
let max_oscillations = 3;

let trim_point = identify_trim_point(&quals, oscillation, window_size, max_oscillations);
assert_eq!(trim_point, None);
}

#[test]
fn test_identify_trim_point_finds_point() {
let left = vec![30u8; 50];
let right = [15u8, 22, 35, 20, 32];
let quals =
[&left[..], &right.into_iter().cycle().take(right.len() * 5).collect::<Vec<u8>>()]
.concat();

let trim_point = identify_trim_point(&quals, 10, 20, 3);
assert_eq!(trim_point, Some(50));
}
}

0 comments on commit a4083b7

Please sign in to comment.