Skip to content

Commit

Permalink
feat: set a default allow_overlapping_hits=False to avoid breaking ch…
Browse files Browse the repository at this point in the history
…anges
  • Loading branch information
ameynert committed Jan 7, 2025
1 parent e39670e commit cfaca8f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 11 deletions.
44 changes: 34 additions & 10 deletions prymer/offtarget/offtarget_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
OffTargetResult(primer_pair=..., passes=False, cached=False, spans=[], left_primer_spans=[], right_primer_spans=[])
>>> list(detector.check_all(primer_pairs=[primer_pair]).values())
[OffTargetResult(primer_pair=..., passes=False, cached=True, spans=[], left_primer_spans=[], right_primer_spans=[])]
>>> detector = OffTargetDetector(ref=ref_fasta, max_primer_hits=204, max_primer_pair_hits=856, three_prime_region_length=20, max_mismatches_in_three_prime_region=0, max_mismatches=0, min_amplicon_size=10, max_amplicon_size=250)
>>> detector = OffTargetDetector(ref=ref_fasta, max_primer_hits=204, max_primer_pair_hits=856, three_prime_region_length=20, max_mismatches_in_three_prime_region=0, max_mismatches=0, max_amplicon_size=250)
>>> result = detector.check_one(primer_pair=primer_pair)
>>> len(result.spans)
856
Expand Down Expand Up @@ -168,6 +168,7 @@ def __init__(
max_amplicon_size: int,
min_amplicon_size: int = 1,
min_primer_pair_hits: int = 1,
allow_overlapping_hits: bool = False,
cache_results: bool = True,
threads: Optional[int] = None,
keep_spans: bool = True,
Expand All @@ -186,7 +187,8 @@ def __init__(
and `max_primer_hits`.
2. Filtering of individual primers: `max_primer_hits`.
3. Checking of primer pairs: `max_primer_hits`, `min_primer_pair_hits`,
`max_primer_pair_hits`, and `max_amplicon_size`.
`max_primer_pair_hits`, `allow_overlapping_hits`, `min_amplicon_size, and
`max_amplicon_size`.
Args:
ref: the reference genome fasta file (must be indexed with BWA)
Expand Down Expand Up @@ -214,7 +216,12 @@ def __init__(
max_amplicon_size: the maximum amplicon size to consider amplifiable. Must be greater
than 0.
min_amplicon_size: the minimum amplicon size to consider amplifiable. Must be between 1
and `max_amplicon_size` inclusive. Default 1bp.
and `max_amplicon_size` inclusive. If `allow_overlapping_hits` is False, primer
pair hits that overlap each other will not be considered, even if they would create
an amplicon of at least `min_amplicon_size`. Default 1bp.
allow_overlapping_hits: if True, allow hits to the invididual primers in a primer pair
to overlap with each other. If False, no overlap will be allowed, even if
`min_amplicon_size` would otherwise allow it. Default False.
cache_results: if True, cache results for faster re-querying
threads: the number of threads to use when invoking bwa
keep_spans: if True, [[OffTargetResult]] objects will be reported with amplicon spans
Expand Down Expand Up @@ -295,6 +302,7 @@ def __init__(
self._min_primer_pair_hits: int = min_primer_pair_hits
self._max_amplicon_size: int = max_amplicon_size
self._min_amplicon_size: int = min_amplicon_size
self._allow_overlapping_hits: bool = allow_overlapping_hits
self._cache_results: bool = cache_results
self._keep_spans: bool = keep_spans
self._keep_primer_spans: bool = keep_primer_spans
Expand Down Expand Up @@ -342,7 +350,11 @@ def check_all(self, primer_pairs: list[PrimerPair]) -> dict[PrimerPair, OffTarge
This method maps each primer to the specified reference with `bwa aln` to search for
off-target hits. Possible amplicons are identified from the pairwise combinations of these
alignments, up to the specified `max_amplicon_size`.
alignments.
If `allow_overlapping_hits=True`, amplicons in the size range `min_amplicon_size` to
`max_amplicon_size` will be returned. If `allow_overlapping_hits=False`, the size range will
still apply, but will exclude amplicons with overlapping hits.
Primer pairs are marked as passing if both of the following are true:
1. Each primer has no more than `max_primer_hits` alignments to the genome.
Expand Down Expand Up @@ -448,6 +460,7 @@ def _build_off_target_result(
negative_hits=right_negative_hits[refname],
min_len=self._min_amplicon_size,
max_len=self._max_amplicon_size,
allow_overlapping_hits=self._allow_overlapping_hits,
strand=Strand.POSITIVE,
)
)
Expand All @@ -457,6 +470,7 @@ def _build_off_target_result(
negative_hits=left_negative_hits[refname],
min_len=self._min_amplicon_size,
max_len=self._max_amplicon_size,
allow_overlapping_hits=self._allow_overlapping_hits,
strand=Strand.NEGATIVE,
)
)
Expand Down Expand Up @@ -538,14 +552,16 @@ def _to_amplicons(
max_len: int,
strand: Strand,
min_len: int = 1,
allow_overlapping_hits: bool = False,
) -> list[Span]:
"""Takes lists of positive strand hits and negative strand hits and constructs amplicon
mappings anywhere a positive strand hit and a negative strand hit occur where the end of
the negative strand hit is at least `min_len` and no more than `max_len` from the start of
the positive strand hit.
Primer hits may overlap if `min_len` is shorter than the length of the left and right
primers together.
primers together and `allow_overlapping_hits` is True. If `allow_overlapping_hits` is False,
these will not be returned.
Args:
positive_hits: List of hits on the positive strand for one of the primers in the pair.
Expand All @@ -555,7 +571,12 @@ def _to_amplicons(
`positive_hits` are for the left primer and `negative_hits` are for the right
primer. Set to Strand.NEGATIVE if `positive_hits` are for the right primer and
`negative_hits` are for the left primer.
min_len: Minimum length of amplicons to consider. Default 1.
min_len: Minimum length of amplicons to consider. If `allow_overlap` is False, primer
pair hits that overlap each other will not be considered, even if they would create
an amplicon of at least `min_len`. Default 1bp.
allow_overlapping_hits: if True, allow hits to the invididual primers in a primer pair
to overlap with each other. If False, no overlap will be allowed, even if
`min_amplicon_size` would otherwise allow it. Default False.
Raises:
ValueError: If any of the positive hits are not on the positive strand, or any of the
Expand Down Expand Up @@ -595,10 +616,13 @@ def _to_amplicons(
):
# Hit coordinates are 1-based end-inclusive.
amplicon_length: int = negative_hit.end - positive_hit.start + 1
if min_len <= amplicon_length <= max_len:
# If the negative hit starts to the right of the positive hit, and the amplicon
# length is in the correct size range, add it to the list of amplicon hits to
# be returned.
if min_len <= amplicon_length <= max_len and (
allow_overlapping_hits or negative_hit.start > positive_hit.end
):
# If overlapping hits are allowed and the amplicon length is in the correct
# size range, -OR- if overlapping hits are not allowed, the negative hit starts
# after the positive hit ends, and the amplicon length is less than or equal
# to the allowed maximum, add it to the list of amplicon hits to be returned.
amplicons.append(
Span(
refname=positive_hit.refname,
Expand Down
5 changes: 4 additions & 1 deletion tests/offtarget/test_offtarget.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ def _build_detector(
three_prime_region_length: int = 20,
max_mismatches_in_three_prime_region: int = 0,
max_mismatches: int = 0,
min_amplicon_size: int = 10,
min_amplicon_size: int = 1,
max_amplicon_size: int = 250,
allow_overlapping_hits: bool = False,
cache_results: bool = True,
) -> OffTargetDetector:
"""Builds an `OffTargetDetector` with strict defaults"""
Expand All @@ -37,6 +38,7 @@ def _build_detector(
max_mismatches=max_mismatches,
min_amplicon_size=min_amplicon_size,
max_amplicon_size=max_amplicon_size,
allow_overlapping_hits=allow_overlapping_hits,
cache_results=cache_results,
keep_spans=True,
keep_primer_spans=True,
Expand Down Expand Up @@ -349,6 +351,7 @@ def test_to_amplicons_overlapping(
negative_hits=[negative],
min_len=1,
max_len=250,
allow_overlapping_hits=True,
strand=strand,
)
assert actual == expected, test_id
Expand Down

0 comments on commit cfaca8f

Please sign in to comment.