From 3550def16ead856f63676728f40cff7b4806b6c4 Mon Sep 17 00:00:00 2001 From: Jonas Fuchs <78491186+jonas-fuchs@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:05:05 +0200 Subject: [PATCH] Further tweaks (#21) * fixed alignment bug, tweaked config, fixed plot warnings * changed kmer list to tuple to speed up kmer digestion * checks now blast installation for qpcr mode * adjusted best primer selection * fixed unwanted sorting behaviour * updated docs * reworked output and dimer search * docstring update * adjusted cutoff for t=1 --- docs/how_varvamp_works.md | 2 +- varvamp/__init__.py | 2 +- varvamp/command.py | 14 +++--- varvamp/scripts/alignment.py | 4 +- varvamp/scripts/config.py | 4 +- varvamp/scripts/primers.py | 26 ++++++++--- varvamp/scripts/qpcr.py | 4 +- varvamp/scripts/regions.py | 8 ++-- varvamp/scripts/reporting.py | 91 ++++++++++++++++++++---------------- varvamp/scripts/scheme.py | 85 ++++++++++++++++----------------- 10 files changed, 128 insertions(+), 112 deletions(-) diff --git a/docs/how_varvamp_works.md b/docs/how_varvamp_works.md index 52009ac..610ea36 100644 --- a/docs/how_varvamp_works.md +++ b/docs/how_varvamp_works.md @@ -23,7 +23,7 @@ varVAMP searches for potential primer regions as defined by a user-defined numbe varVAMP uses [`primer3-py`](https://pypi.org/project/primer3-py/) to search for potential primers. Some of the evaluation process, determining if primers match certain criteria, was adapted from [`primalscheme`](www.github.com/aresti/primalscheme). The primer search contains multiple steps: 1. Digest the primer regions into kmers with the min and max length of primers. This is performed on a consensus sequence that does not contain ambiguous characters but is just the majority consensus of the alignment. Therefore, primer parameters will be later calculated for the best fitting primer. 2. Evaluate if these kmers are potential primers independent of their orientation (temperature, GC, size, poly-x repeats and poly dinucleotide repeats) and dependent on their orientation (secondary structure, GC clamp, number of GCs in the last 5 bases of the 3' end and min 3' nucleotides without an ambiguous base). Filter for kmers that satisfy all constraints and calculate their penalties (explained in the last section). -3. Sanger and tiled mode: Find lowest scoring primer. varVAMP sorts the primers by their score and always takes the best scoring if the primer positions have not been covered by a better primer. This greatly reduces the complexity of the later amplicon search while only retaining the best scoring primer of a set of overlapping primers. +3. Sanger and tiled mode: Find lowest scoring primer. varVAMP sorts the primers by their score and always takes the best scoring if middle third of the primer has not been covered by a better primer. This greatly reduces the complexity of the later amplicon search while only retaining the best scoring primer of a set of overlapping primers. ### Amplicon search diff --git a/varvamp/__init__.py b/varvamp/__init__.py index 19c42d0..1ae9eb8 100644 --- a/varvamp/__init__.py +++ b/varvamp/__init__.py @@ -1,3 +1,3 @@ """Tool to design amplicons for highly variable virusgenomes""" _program = "varvamp" -__version__ = "0.9.3" +__version__ = "0.9.4" diff --git a/varvamp/command.py b/varvamp/command.py index f2eeb1b..7005751 100644 --- a/varvamp/command.py +++ b/varvamp/command.py @@ -285,7 +285,7 @@ def sanger_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_ logging.varvamp_progress( log_file, progress=0.7, - job="Considering non-overlapping low scoring primers.", + job="Considering low scoring primers.", progress_text=f"{len(all_primers['+'])} fw and {len(all_primers['-'])} rw primers" ) @@ -345,7 +345,7 @@ def sanger_workflow(args, amplicons, all_primers, log_file): return amplicon_scheme -def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candidates, all_primers, ambiguous_consensus, log_file): +def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candidates, all_primers, ambiguous_consensus, log_file, results_dir): """ part of the workflow specific for the tiled mode """ @@ -371,7 +371,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida f"varVAMP found {len(dimers_not_solved)} primer dimers without replacements. Check the dimer file and perform the PCR for incomaptible amplicons in a sperate reaction.", log_file ) - reporting.write_dimers(dir, dimers_not_solved) + reporting.write_dimers(results_dir, dimers_not_solved) # evaluate coverage percent_coverage = round(coverage/len(ambiguous_consensus)*100, 2) @@ -491,9 +491,8 @@ def main(sysargs=sys.argv[1:]): start_time = datetime.datetime.now() results_dir, data_dir, log_file = logging.create_dir_structure(args.input[1]) # check if blast is installed - if args.mode == "tiled" or args.mode == "sanger": - if args.database is not None: - blast.check_BLAST_installation(log_file) + if args.database is not None: + blast.check_BLAST_installation(log_file) # mode unspecific part of the workflow alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file) @@ -528,7 +527,8 @@ def main(sysargs=sys.argv[1:]): right_primer_candidates, all_primers, ambiguous_consensus, - log_file + log_file, + results_dir ) if args.database is not None: blast.write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file) diff --git a/varvamp/scripts/alignment.py b/varvamp/scripts/alignment.py index 5b95eb9..1bbda28 100644 --- a/varvamp/scripts/alignment.py +++ b/varvamp/scripts/alignment.py @@ -171,7 +171,7 @@ def clean_gaps(alignment, gaps_to_mask): stop = region[0] masked_seq_temp = sequence[1][start:stop] # check if the deletion is at the start - if len(masked_seq_temp) == 0: + if start == 0 and len(masked_seq_temp) == 0: masked_seq = mask # check if deletion is not at start elif start == 0 and len(masked_seq_temp) != 0: @@ -181,7 +181,7 @@ def clean_gaps(alignment, gaps_to_mask): masked_seq = masked_seq + mask + masked_seq_temp start = region[1]+1 if max(gaps_to_mask)[1] < len(sequence[1])-1: - # append the last gaps if it is not + # append the last seq if no gap is at # the end of the sequence start = max(gaps_to_mask)[1] stop = len(sequence[1])-1 diff --git a/varvamp/scripts/config.py b/varvamp/scripts/config.py index dc91200..3c7b9ec 100644 --- a/varvamp/scripts/config.py +++ b/varvamp/scripts/config.py @@ -5,7 +5,7 @@ # CAN BE CHANGED, DO NOT DELETE # basic primer parameters -PRIMER_TMP = (57, 63, 60) # melting temperatur (min, max, opt) +PRIMER_TMP = (56, 63, 60) # melting temperatur (min, max, opt) PRIMER_GC_RANGE = (35, 65, 50) # gc (min, max, opt) PRIMER_SIZES = (18, 24, 21) # size (min, max, opt) PRIMER_MAX_POLYX = 3 # max number of polyx repeats @@ -40,7 +40,7 @@ PRIMER_TM_PENALTY = 2 # temperature penalty PRIMER_GC_PENALTY = 0.2 # gc penalty PRIMER_SIZE_PENALTY = 0.5 # size penalty -PRIMER_MAX_BASE_PENALTY = 8 # max base penalty for a primer +PRIMER_MAX_BASE_PENALTY = 10 # max base penalty for a primer PRIMER_3_PENALTY = (32, 16, 8, 4, 2) # penalties for 3' mismatches PRIMER_PERMUTATION_PENALTY = 0.1 # penalty for the number of permutations diff --git a/varvamp/scripts/primers.py b/varvamp/scripts/primers.py index 2ebd718..4e12e7a 100644 --- a/varvamp/scripts/primers.py +++ b/varvamp/scripts/primers.py @@ -5,6 +5,7 @@ # LIBS from Bio.Seq import Seq import primer3 as p3 +import math # varVAMP from varvamp.scripts import config @@ -129,7 +130,7 @@ def rev_complement(seq): """ reverse complement a sequence """ - return(str(Seq(seq).reverse_complement())) + return str(Seq(seq).reverse_complement()) def calc_permutation_penalty(amb_seq): @@ -367,21 +368,34 @@ def find_best_primers(left_primer_candidates, right_primer_candidates): Primer candidates are likely overlapping. Here, the list of primers is sorted for the best to worst scoring. Then, the next best scoring is retained if it does not have any nucleotides that have already - been covered by a better scoring primer candidate. This significantly - reduces the amount of primers while retaining the best scoring ones. + been covered by the middle third of a better scoring primer candidate. + This significantly reduces the amount of primers while retaining + the best scoring ones. + + Example: + -------- (score 1) 1 + ------------- (score 1) 2 + ------------ (score 0.8) 3 + ----------(score 0.9) 4 + + --> primer 3 would be retained and primer 2 excluded, primer 4 and 1 + will however be considered in the next set of overlapping primers. + """ all_primers = {} for direction, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]: - # sort the primers for the best scoring - primer_candidates.sort(key=lambda x: x[3]) + # sort the primers for the best scoring, and if same score by start + primer_candidates.sort(key=lambda x: (x[3], x[1])) # ini everything with the top scoring primer to_retain = [primer_candidates[0]] primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]+1)) primer_set = set(primer_ranges) for primer in primer_candidates: - primer_positions = list(range(primer[1], primer[2]+1)) + # get the thirds of the primer, only consider the middle + thirds_len = int((primer[2] - primer[1])/3) + primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len + 1)) # check if none of the nucleotides of the next primer # are already covered by a better primer if not any(x in primer_positions for x in primer_set): diff --git a/varvamp/scripts/qpcr.py b/varvamp/scripts/qpcr.py index 10bd289..5cdc796 100644 --- a/varvamp/scripts/qpcr.py +++ b/varvamp/scripts/qpcr.py @@ -111,8 +111,8 @@ def flanking_primer_subset(primer_list, primer_direction, probe): if window_start < primer[1] and primer[2] < window_stop: subset.append(primer) - # sort by score - subset.sort(key=lambda x: x[3]) + # sort by score and start if same score + subset.sort(key=lambda x: (x[3], x[1])) return subset diff --git a/varvamp/scripts/regions.py b/varvamp/scripts/regions.py index 48b5820..c1b891e 100644 --- a/varvamp/scripts/regions.py +++ b/varvamp/scripts/regions.py @@ -69,14 +69,14 @@ def digest_seq(seq, kmer_size): """ digest the sequence into kmers """ - return[[seq[i:i+kmer_size], i, i+len(seq[i:i+kmer_size])] for i in range(len(seq)-kmer_size+1)] + return [[seq[i:i+kmer_size], i, i+len(seq[i:i+kmer_size])] for i in range(len(seq)-kmer_size+1)] def produce_kmers(primer_regions, consensus, sizes=config.PRIMER_SIZES): """ produce kmers for all primer regions """ - kmers = [] + kmers = set() for region in primer_regions: sliced_seq = consensus[region[0]:region[1]] @@ -87,8 +87,6 @@ def produce_kmers(primer_regions, consensus, sizes=config.PRIMER_SIZES): kmer_temp[1] = kmer_temp[1]+region[0] kmer_temp[2] = kmer_temp[2]+region[0] # check if kmer is already in list (overlapping regions) - if kmer_temp in kmers: - continue - kmers.append(kmer_temp) + kmers.add(tuple(kmer_temp)) return kmers diff --git a/varvamp/scripts/reporting.py b/varvamp/scripts/reporting.py index e149d14..c7e11d4 100644 --- a/varvamp/scripts/reporting.py +++ b/varvamp/scripts/reporting.py @@ -18,36 +18,36 @@ from varvamp.scripts import config -def write_fasta(dir, seq_id, seq): +def write_fasta(path, seq_id, seq): """ write fasta files """ name = f"{seq_id}.fasta" - out = os.path.join(dir, name) + out = os.path.join(path, name) with open(out, 'w') as o: print(f">{seq_id}\n{seq}", file=o) -def write_alignment(dir, alignment): +def write_alignment(path, alignment): """ write alignment to file """ name = "alignment_cleaned.fasta" - out = os.path.join(dir, name) + out = os.path.join(path, name) with open(out, "w") as o: for seq in alignment: print(f">{seq[0]}\n{seq[1]}", file=o) -def write_regions_to_bed(primer_regions, dir, mode=None): +def write_regions_to_bed(primer_regions, path, mode=None): """ write primer regions as bed file """ if mode == "probe": - outfile = f"{dir}probe_regions.bed" + outfile = f"{path}probe_regions.bed" else: - outfile = f"{dir}primer_regions.bed" + outfile = f"{path}primer_regions.bed" counter = 0 with open(outfile, 'w') as o: @@ -80,11 +80,11 @@ def write_primers_to_bed(outfile, primer_name, primer_properties, direction): ) -def write_all_primers(dir, all_primers): +def write_all_primers(path, all_primers): """ write all primers that varVAMP designed as bed file """ - outfile = f"{dir}all_primers.bed" + outfile = f"{path}all_primers.bed" for direction in all_primers: for primer in all_primers[direction]: @@ -121,15 +121,15 @@ def calc_mean_stats(permutations): return round(gc/len(permutations), 1), round(temp/len(permutations), 1) -def write_qpcr_to_files(dir, final_schemes, ambiguous_consensus): +def write_qpcr_to_files(path, final_schemes, ambiguous_consensus): """ write all relevant bed files and tsv file for the qPCR design """ - tsv_file = os.path.join(dir, "qpcr_design.tsv") - tsv_file_2 = os.path.join(dir, "qpcr_primers.tsv") - primer_bed_file = os.path.join(dir, "primers.bed") - amplicon_bed_file = os.path.join(dir, "amplicons.bed") + tsv_file = os.path.join(path, "qpcr_design.tsv") + tsv_file_2 = os.path.join(path, "qpcr_primers.tsv") + primer_bed_file = os.path.join(path, "primers.bed") + amplicon_bed_file = os.path.join(path, "amplicons.bed") with open(tsv_file, "w") as tsv, open(tsv_file_2, "w") as tsv2, open(amplicon_bed_file, "w") as bed: print( @@ -160,7 +160,7 @@ def write_qpcr_to_files(dir, final_schemes, ambiguous_consensus): round(final_schemes[scheme]["score"], 1), final_schemes[scheme]["deltaG"], len(amplicon_seq), - amplicon_start, + amplicon_start + 1, amplicon_stop, amplicon_seq, sep="\t", @@ -183,7 +183,7 @@ def write_qpcr_to_files(dir, final_schemes, ambiguous_consensus): print( scheme, type, - final_schemes[scheme][type][1], + final_schemes[scheme][type][1] + 1, final_schemes[scheme][type][2], seq, len(seq), @@ -204,14 +204,14 @@ def write_qpcr_to_files(dir, final_schemes, ambiguous_consensus): ) -def write_scheme_to_files(dir, amplicon_scheme, ambiguous_consensus, mode): +def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode): """ write all relevant bed files and a tsv file with all primer stats """ - tsv_file = os.path.join(dir, "primers.tsv") - primer_bed_file = os.path.join(dir, "primers.bed") - amplicon_bed_file = os.path.join(dir, "amplicons.bed") - tabular_file = os.path.join(dir, "primer_to_amplicon_assignment.tabular") + tsv_file = os.path.join(path, "primers.tsv") + primer_bed_file = os.path.join(path, "primers.bed") + amplicon_bed_file = os.path.join(path, "amplicons.bed") + tabular_file = os.path.join(path, "primer_to_amplicon_assignment.tabular") counter = 0 @@ -219,7 +219,7 @@ def write_scheme_to_files(dir, amplicon_scheme, ambiguous_consensus, mode): with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular: # write header for primer tsv print( - "amlicon_name\tprimer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tscore", + "amlicon_name\tamplicon_length\tprimer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tscore", file=tsv ) counter = 0 @@ -233,6 +233,7 @@ def write_scheme_to_files(dir, amplicon_scheme, ambiguous_consensus, mode): primer_names = list(amplicon_scheme[pool][amp].keys()) left = (primer_names[0], amplicon_scheme[pool][amp][primer_names[0]]) right = (primer_names[1], amplicon_scheme[pool][amp][primer_names[1]]) + amp_length = right[1][2] - left[1][1] # write amplicon bed if mode == "tiled": bed_score = pool @@ -265,9 +266,10 @@ def write_scheme_to_files(dir, amplicon_scheme, ambiguous_consensus, mode): # write tsv file print( new_name, + amp_length, primer[0], pool, - primer[1][1], + primer[1][1] + 1, primer[1][2], seq, len(primer[1][0]), @@ -288,24 +290,25 @@ def write_scheme_to_files(dir, amplicon_scheme, ambiguous_consensus, mode): ) -def write_dimers(dir, not_solved): +def write_dimers(path, primer_dimers): """ write dimers for which no replacement was found to file """ - tsv_file = os.path.join(dir, "unsolvable_primer_dimers.tsv") - print( - "pool\tprimer_name_1\tprimer_name_2\tdimer melting temp", - file=tsv_file - ) - for dimers in not_solved: + tsv_file = os.path.join(path, "unsolvable_primer_dimers.tsv") + with open(tsv_file, "w") as tsv: print( - dimers[0][0], - dimers[0][2], - dimers[1][2], - round(primers.calc_dimer(dimers[0][3][0], dimers[1][3][0]).tm, 1), - sep="\t", - file=tsv_file + "pool\tprimer_name_1\tprimer_name_2\tdimer melting temp", + file=tsv ) + for dimers in primer_dimers: + print( + dimers[0][0], + dimers[0][2], + dimers[1][2], + round(primers.calc_dimer(dimers[0][3][0], dimers[1][3][0]).tm, 1), + sep="\t", + file=tsv + ) def entropy(pos, states): @@ -455,7 +458,7 @@ def qpcr_subplot(ax, amplicon_scheme): ax[1].hlines(0.75, probe[1], probe[2], linewidth=5, color="darkgrey", label="probe") -def varvamp_plot(dir, alignment_cleaned, primer_regions, all_primers=None, amplicon_scheme=None, probe_regions=None): +def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, amplicon_scheme=None, probe_regions=None): """ creates overview plot for the amplicon design and per base coverage plots @@ -463,7 +466,7 @@ def varvamp_plot(dir, alignment_cleaned, primer_regions, all_primers=None, ampli # first plot: overview # create pdf name name = "amplicon_plot.pdf" - out = os.path.join(dir, name) + out = os.path.join(path, name) # ini figure fig, ax = plt.subplots(2, 1, figsize=[22, 6], squeeze=True, sharex=True, gridspec_kw={'height_ratios': [4, 1]}) fig.subplots_adjust(hspace=0) @@ -491,6 +494,7 @@ def varvamp_plot(dir, alignment_cleaned, primer_regions, all_primers=None, ampli # save fig fig.savefig(out, bbox_inches='tight') + plt.close() def get_SANGER_TILED_primers_for_plot(amplicon_scheme): @@ -526,11 +530,11 @@ def get_QPCR_primers_for_plot(amplicon_schemes): return amplicon_primers -def per_base_mismatch_plot(dir, amplicon_scheme, threshold, mode="SANGER/TILED"): +def per_base_mismatch_plot(path, amplicon_scheme, threshold, mode="SANGER/TILED"): """ per base pair mismatch multiplot """ - out = os.path.join(dir, "per_base_mismatches.pdf") + out = os.path.join(path, "per_base_mismatches.pdf") if mode == "SANGER/TILED": amplicon_primers = get_SANGER_TILED_primers_for_plot(amplicon_scheme) elif mode == "QPCR": @@ -557,6 +561,11 @@ def per_base_mismatch_plot(dir, amplicon_scheme, threshold, mode="SANGER/TILED") ax[idx].xaxis.set_ticklabels(x, rotation=45) ax[idx].set_ylabel(ylabel="freq of sequences") ax[idx].set_xlabel("position") - ax[idx].set_ylim(0, 1-threshold) + # set yaxis lims reasonably + if threshold < 1: + ax[idx].set_ylim(0, 1-threshold) + else: + ax[idx].set_ylim(0, 0.001) # - to pdf pdf.savefig(fig, bbox_inches='tight') + plt.close() diff --git a/varvamp/scripts/scheme.py b/varvamp/scripts/scheme.py index 43da059..0dc3ba4 100644 --- a/varvamp/scripts/scheme.py +++ b/varvamp/scripts/scheme.py @@ -4,6 +4,7 @@ # BUILT-INS import heapq +import math # varVAMP from varvamp.scripts import config, primers @@ -77,8 +78,8 @@ def find_amplicons(all_primers, opt_len, max_len): if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP: continue # calculate length dependend amplicon costs as the cumulative primer - # score multiplied by the fold length of the optimal length. - amplicon_costs = (right_primer[3] + left_primer[3])*(amplicon_length/opt_len) + # score multiplied by the e^(fold length of the optimal length). + amplicon_costs = (right_primer[3] + left_primer[3])*math.exp(amplicon_length/opt_len) amplicon_name = "amplicon_"+str(amplicon_number) amplicon_dict[amplicon_name] = [ left_primer[1], # start @@ -111,18 +112,18 @@ def create_amplicon_graph(amplicons, min_overlap): current_amplicon = amplicons[current] start = current_amplicon[0] + current_amplicon[4]/2 stop = current_amplicon[1] - min_overlap - for next in amplicons: - next_amplicon = amplicons[next] + for next_amp in amplicons: + next_amplicon = amplicons[next_amp] # check if the next amplicon lies within the start/stop range of # the current amplicon and if its non-overlapping part is large # enough to ensure space for a primer and the min overlap of the # following amplicon. - if not all((start <= next_amplicon[0] <= stop, next_amplicon[1] > current_amplicon[1] + next_amplicon[4]/2)): + if not all([start <= next_amplicon[0] <= stop, next_amplicon[1] > current_amplicon[1] + next_amplicon[4]/2]): continue if current not in amplicon_graph: - amplicon_graph[current] = {next: next_amplicon[5]} + amplicon_graph[current] = {next_amp: next_amplicon[5]} else: - amplicon_graph[current][next] = next_amplicon[5] + amplicon_graph[current][next_amp] = next_amplicon[5] # return a graph object return Graph(nodes, amplicon_graph) @@ -177,7 +178,7 @@ def get_end_node(previous_nodes, shortest_path, amplicons): return min(possible_end_nodes.items(), key=lambda x: x[1]) -def get_min_path(previous_nodes, shortest_path, start_node, target_node): +def get_min_path(previous_nodes, start_node, target_node): """ get the min path from the start to stop node from the previosuly calculated shortest path @@ -268,7 +269,7 @@ def find_best_covering_scheme(amplicons, amplicon_graph, all_primers): break if best_previous_nodes: - amplicon_scheme = get_min_path(best_previous_nodes, best_shortest_path, best_start_node, best_target_node) + amplicon_scheme = get_min_path(best_previous_nodes, best_start_node, best_target_node) else: # if no previous nodes are found but the single amplicon results in the largest # coverage - return as the best scheme @@ -314,9 +315,10 @@ def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidat # test each primer in dimer for primer in dimer: overlapping_primers_temp = [] + thirds_len = int((primer[3][2] - primer[3][1]) / 3) + # get the middle third of the primer (here are the previously excluded primers) + overlap_set = set(range(primer[3][1] + thirds_len, primer[3][2] - thirds_len + 1)) # check in which list to look for them - overlap_range = range(primer[3][1], primer[3][2]+1) - overlap_set = set(overlap_range) if "RIGHT" in primer[2]: primers_to_test = right_primer_candidates else: @@ -349,45 +351,38 @@ def check_and_solve_heterodimers(amplicon_scheme, left_primer_candidates, right_ """ check scheme for heterodimers, try to find new primers that overlap and replace the existing ones. - this can lead to new primer dimers. therefore the - process is repeated until no primer dimers are found - in the updated scheme or all found primer dimers have - no replacements. + this can lead to new primer dimers. therefore the scheme + is checked a second time. if there are still primer dimers + present the non-solvable dimers are returned """ - not_solvable = [] primer_dimers = test_scheme_for_dimers(amplicon_scheme) - while primer_dimers: - for dimer in primer_dimers: - # skip the primer dimers that have not been previously solved - if dimer in not_solvable: - continue - overlapping_primers = get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates) - # test all possible primers against each other for dimers - new_primers = test_overlaps_for_dimers(overlapping_primers) - # now change these primers in the scheme - if new_primers: - for new in new_primers: - # overwrite in final scheme - amplicon_scheme[new[0]][new[1]][new[2]] = new[3] - # and in all primers - if "LEFT" in new[2]: - strand = "+" - else: - strand = "-" - all_primers[strand][new[2]] = new[3] - # or remember the dimers for which varvamp did not find a replacement. - else: - not_solvable.append(dimer) - # none of the primer dimers of this iteration could be solved - if all(x in not_solvable for x in primer_dimers): - break - # some could be solved. lets check the updated scheme again. - else: - primer_dimers = test_scheme_for_dimers(amplicon_scheme) + if primer_dimers: + print(f"varVAMP found {len(primer_dimers)} dimer pairs in scheme ... trying to find replacements") + else: + return [] + + for dimer in primer_dimers: + # get overlapping primers that have not been considere + overlapping_primers = get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates) + # test all possible primers against each other for dimers + new_primers = test_overlaps_for_dimers(overlapping_primers) + # now change these primers in the scheme + if new_primers: + for new in new_primers: + # overwrite in final scheme + amplicon_scheme[new[0]][new[1]][new[2]] = new[3] + # and in all primers + if "LEFT" in new[2]: + strand = "+" + else: + strand = "-" + all_primers[strand][new[2]] = new[3] + + primer_dimers = test_scheme_for_dimers(amplicon_scheme) - return not_solvable + return primer_dimers def find_sanger_amplicons(amplicons, all_primers, n):