Skip to content

Commit

Permalink
added --name option and now bed primer files are reported in ARTIC v3…
Browse files Browse the repository at this point in the history
… style
  • Loading branch information
jonas-fuchs committed Aug 30, 2024
1 parent 1bf9c55 commit 74dd62d
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 63 deletions.
2 changes: 1 addition & 1 deletion varvamp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Tool to design amplicons for highly variable virusgenomes"""
_program = "varvamp"
__version__ = "1.2.0"
__version__ = "1.2.1"
26 changes: 18 additions & 8 deletions varvamp/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ def get_args(sysargs):
type=int,
default=1
)
par.add_argument(
"--name",
help="name of the scheme",
metavar="varVAMP",
type=str,
default="varVAMP"
)
for par in (SINGLE_parser, TILED_parser):
par.add_argument(
"-ol",
Expand Down Expand Up @@ -496,10 +503,10 @@ def main(sysargs=sys.argv[1:]):
alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file)

# write files that are shared in all modes
reporting.write_regions_to_bed(primer_regions, data_dir)
reporting.write_regions_to_bed(primer_regions, args.name, data_dir)
reporting.write_alignment(data_dir, alignment_cleaned)
reporting.write_fasta(data_dir, "majority_consensus", majority_consensus)
reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus)
reporting.write_fasta(data_dir, f"majority_consensus", f"{args.name}_consensus",majority_consensus)
reporting.write_fasta(results_dir, f"ambiguous_consensus", f"{args.name}_consensus", ambiguous_consensus)

# Functions called from here on return lists of amplicons that are refined step-wise into final schemes.
# These lists that are passed between functions and later used for reporting consist of dictionary elemnts,
Expand Down Expand Up @@ -549,22 +556,24 @@ def main(sysargs=sys.argv[1:]):
else:
# make sure amplicons with no off-target products and with low penalties get the lowest numbers
amplicon_scheme.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
reporting.write_all_primers(data_dir, all_primers)
reporting.write_all_primers(data_dir, args.name, all_primers)
reporting.write_scheme_to_files(
results_dir,
amplicon_scheme,
ambiguous_consensus,
args.name,
args.mode,
log_file
)
reporting.varvamp_plot(
results_dir,
alignment_cleaned,
primer_regions,
args.name,
all_primers=all_primers,
amplicon_scheme=amplicon_scheme,
)
reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold)
reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold, args.name)

# QPCR mode
if args.mode == "qpcr":
Expand All @@ -583,16 +592,17 @@ def main(sysargs=sys.argv[1:]):

# make sure amplicons with no off-target products and with low penalties get the lowest numbers
final_schemes.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
reporting.write_regions_to_bed(probe_regions, data_dir, "probe")
reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, log_file)
reporting.write_regions_to_bed(probe_regions, args.name, data_dir, "probe")
reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, args.name, log_file)
reporting.varvamp_plot(
results_dir,
alignment_cleaned,
primer_regions,
args.name,
probe_regions=probe_regions,
amplicon_scheme=final_schemes
)
reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, mode="QPCR")
reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, args.name, mode="QPCR")

# varVAMP finished
logging.varvamp_progress(log_file, progress=1, start_time=start_time)
Expand Down
9 changes: 9 additions & 0 deletions varvamp/scripts/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,5 +581,14 @@ def goodbye_message():
"Barba non facit philosophum.",
"Task failed successfully.",
"Never gonna give you up, never gonna let you down.",
"Have you tried turing it off and on again?",
"Look I am your primer scheme.",
"Quod erat demonstrandum.",
"Miau?",
"This is an automated message informing you that you are awsome.",
"Why was the negative-sense virus angry at the positive-sense virus?\nBecause he was left stranded!",
"If you see this message twice, you are an experienced user.",
"No one expects the spanish inquisition!",
"Primer design you must."
]
print(f"\n{random.choice(messages)}")
107 changes: 53 additions & 54 deletions varvamp/scripts/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
from varvamp.scripts import logging


def write_fasta(path, seq_id, seq):
def write_fasta(path, file_name, seq_id, seq):
"""
write fasta files
"""
name = f"{seq_id}.fasta"
name = f"{file_name}.fasta"
out = os.path.join(path, name)
with open(out, 'w') as o:
print(f">{seq_id}\n{seq}", file=o)
Expand All @@ -40,7 +40,7 @@ def write_alignment(path, alignment):
print(f">{seq[0]}\n{seq[1]}", file=o)


def write_regions_to_bed(primer_regions, path, mode=None):
def write_regions_to_bed(primer_regions, scheme_name, path, mode=None):
"""
write primer regions as bed file
"""
Expand All @@ -53,7 +53,7 @@ def write_regions_to_bed(primer_regions, path, mode=None):
with open(outfile, 'w') as o:
for counter, region in enumerate(primer_regions):
print(
"ambiguous_consensus",
f"{scheme_name}_consensus",
region[0],
region[1],
"REGION_"+str(counter),
Expand All @@ -62,32 +62,38 @@ def write_regions_to_bed(primer_regions, path, mode=None):
)


def write_primers_to_bed(outfile, primer_name, primer_properties, direction):
def write_primers_to_bed(outfile, scheme_name, primer_name, primer_properties, numeric_value, direction, sequence=None):
"""
write primers as bed file
"""
with open(outfile, 'a') as o:
print(
"ambiguous_consensus",
# write header for primer bed
if os.path.getsize(outfile) == 0 and sequence is not None:
print("#chrom\tchromStart\tchromEnd\tprimer-name\tpool\tstrand\tprimer-sequence", file=o)
data = [f"{scheme_name}_consensus",
primer_properties[1], # start
primer_properties[2], # stop
primer_name,
round(primer_properties[3], 1), # penalty
direction,
numeric_value, # can be pool or score
direction]
if sequence is not None:
data.append(sequence)
print(
*data,
sep="\t",
file=o
)


def write_all_primers(path, all_primers):
def write_all_primers(path, scheme_name, all_primers):
"""
write all primers that varVAMP designed as bed file
"""
outfile = f"{path}all_primers.bed"

for direction in all_primers:
for primer in all_primers[direction]:
write_primers_to_bed(outfile, primer, all_primers[direction][primer], direction)
write_primers_to_bed(outfile, scheme_name, primer, all_primers[direction][primer], round(all_primers[direction][primer][3], 2), direction)


def get_permutations(seq):
Expand Down Expand Up @@ -120,7 +126,7 @@ def calc_mean_stats(permutations):
return round(gc/len(permutations), 1), round(temp/len(permutations), 1)


def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file):
def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, log_file):
"""
write all relevant bed files and tsv file for the qPCR design
"""
Expand All @@ -141,10 +147,10 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file):
file=tsv
)
for n, amp in enumerate(final_schemes):
amp_name = f"QPCR_SCHEME_{n}"
amp_name = f"{scheme_name}_{n}"
# write bed amplicon file
print(
"ambiguous_consensus",
f"{scheme_name}_consensus",
amp["LEFT"][1],
amp["RIGHT"][2],
amp_name,
Expand Down Expand Up @@ -188,6 +194,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file):

permutations = get_permutations(seq)
gc, temp = calc_mean_stats(permutations)
primer_name = f"{amp_name}_{oligo_type}"

print(
amp_name,
Expand All @@ -208,15 +215,18 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file):
# write primer bed file
write_primers_to_bed(
primer_bed_file,
f"{amp_name}_{oligo_type}",
scheme_name,
primer_name,
amp[oligo_type],
direction
round(amp[oligo_type][3], 2),
direction,
seq.upper()
)
# write fasta
print(f">{amp_name}_{oligo_type}\n{seq.upper()}", file=fasta)
print(f">{primer_name}\n{seq.upper()}", file=fasta)


def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_file):
def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file):
"""
write all relevant bed files and a tsv file with all primer stats
"""
Expand All @@ -229,7 +239,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_
with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular:
# write header for primer tsv
print(
"amlicon_name\tamplicon_length\tprimer_name\talternate_primer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons",
"amlicon_name\tamplicon_length\tprimer_name\tprimer_name_all_primers\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons",
file=tsv
)
amplicon_bed_records = []
Expand All @@ -245,7 +255,6 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_
for counter, amp in enumerate(amplicon_scheme[pool::len(pools)]):
# give a new amplicon name
amplicon_index = counter*len(pools) + pool
new_name = f"AMPLICON_{amplicon_index}"
# get left and right primers and their names
amp_length = amp["RIGHT"][2] - amp["LEFT"][1]
if "off_targets" in amp:
Expand All @@ -265,33 +274,33 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_
(
amp["LEFT"][1],
amp["RIGHT"][2],
new_name,
f"{scheme_name}_{amplicon_index}",
bed_score
)
)
primer_assignment_records.append(
(
# will need amplicon_index for sorting
amplicon_index,
(f"{new_name}_LEFT", f"{new_name}_RIGHT")
(f"{scheme_name}_{amplicon_index}_LEFT", f"{scheme_name}_{amplicon_index}_RIGHT")
)
)
# write primer tsv and primer bed
for direction, primer in [("+", amp["LEFT"]), ("-", amp["RIGHT"])]:
seq = ambiguous_consensus[primer[1]:primer[2]]
if direction == "-":
seq = primers.rev_complement(seq)
primer_name = f"{new_name}_RIGHT"
primer_name = f"{scheme_name}_{amplicon_index}_RIGHT"
else:
primer_name = f"{new_name}_LEFT"
primer_name = f"{scheme_name}_{amplicon_index}_LEFT"
# write primers to fasta pool file
print(f">{primer_name}\n{seq.upper()}", file=primer_fasta)
# calc primer parameters for all permutations
permutations = get_permutations(seq)
gc, temp = calc_mean_stats(permutations)
# write tsv file
print(
new_name,
f"{scheme_name}_{amplicon_index}",
amp_length,
primer_name,
primer[-1],
Expand All @@ -313,13 +322,13 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_
(
# will need amplicon_index for sorting
amplicon_index,
(primer_name, primer, direction)
(primer_name, primer, pool, direction, seq.upper())
)
)
# write amplicon bed with amplicons sorted by start position
for record in sorted(amplicon_bed_records, key=lambda x: x[0]):
print(
"ambiguous_consensus",
f"{scheme_name}_consensus",
*record,
".",
sep="\t",
Expand All @@ -336,6 +345,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_
for record in sorted(primer_bed_records):
write_primers_to_bed(
primer_bed_file,
scheme_name,
*record[1]
)

Expand Down Expand Up @@ -405,7 +415,7 @@ def alignment_entropy(alignment_cleaned):
return entropy_df


def entropy_subplot(ax, alignment_cleaned):
def entropy_subplot(ax, alignment_cleaned, scheme_name):
"""
creates the entropy subplot
"""
Expand All @@ -417,7 +427,7 @@ def entropy_subplot(ax, alignment_cleaned):
ax[0].set_ylim((0, 1))
ax[0].set_xlim(0, max(entropy_df["position"]))
ax[0].set_ylabel("normalized Shannon's entropy")
ax[0].set_title("final amplicon design")
ax[0].set_title(f"{scheme_name} amplicon design")
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)

Expand Down Expand Up @@ -501,7 +511,7 @@ def qpcr_subplot(ax, amplicon_scheme):
ax[1].hlines(0.75, probe[1], probe[2], linewidth=5, color="darkgrey", label="probe")


def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, amplicon_scheme=None, probe_regions=None):
def varvamp_plot(path, alignment_cleaned, primer_regions, scheme_name, all_primers=None, amplicon_scheme=None, probe_regions=None):
"""
creates overview plot for the amplicon design
and per base coverage plots
Expand All @@ -514,7 +524,7 @@ def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, ampl
fig, ax = plt.subplots(2, 1, figsize=[22, 6], squeeze=True, sharex=True, gridspec_kw={'height_ratios': [4, 1]})
fig.subplots_adjust(hspace=0)
# entropy plot
entropy_subplot(ax, alignment_cleaned)
entropy_subplot(ax, alignment_cleaned, scheme_name)
# primer regions plot
region_subplot(ax, primer_regions)
# probe region plot for probes
Expand All @@ -540,43 +550,32 @@ def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, ampl
plt.close()


def get_SINGLE_TILED_primers_for_plot(amplicon_scheme):
def get_primers_for_plot(amplicon_scheme, scheme_name, mode):
"""
get the primers for per base pair plot (single, tiled)
"""
amplicon_primers = []

for counter, amp in enumerate(amplicon_scheme):
for type in ["LEFT", "RIGHT"]:
primer_name = f"AMPLICON_{counter}_{type}"
amplicon_primers.append((primer_name, amp[type]))

return amplicon_primers


def get_QPCR_primers_for_plot(amplicon_schemes):
"""
get the primers for per base pair plot (qpcr)
"""
amplicon_primers = []
if mode == "SINGLE/TILED":
oligo_types = ["LEFT", "RIGHT"]
else:
oligo_types = ["PROBE", "LEFT", "RIGHT"]

for counter, amp in enumerate(amplicon_schemes):
for type in ["PROBE", "LEFT", "RIGHT"]:
primer_name = f"QPCR_SCHEME_{counter}_{type}"
amplicon_primers.append((primer_name, amp[type]))
for counter, amp in enumerate(amplicon_scheme):
for oligo_type in oligo_types:
primer_name = f"{scheme_name}_{counter}_{oligo_type}"
amplicon_primers.append((primer_name, amp[oligo_type]))

return amplicon_primers


def per_base_mismatch_plot(path, amplicon_scheme, threshold, mode="SINGLE/TILED"):
def per_base_mismatch_plot(path, amplicon_scheme, threshold, scheme_name, mode="SINGLE/TILED"):
"""
per base pair mismatch multiplot
"""
out = os.path.join(path, "per_base_mismatches.pdf")
if mode == "SINGLE/TILED":
amplicon_primers = get_SINGLE_TILED_primers_for_plot(amplicon_scheme)
elif mode == "QPCR":
amplicon_primers = get_QPCR_primers_for_plot(amplicon_scheme)

amplicon_primers = get_primers_for_plot(amplicon_scheme, scheme_name, mode)
# ini multi pdf
with PdfPages(out) as pdf:
# always print 4 primers to one page
Expand Down

0 comments on commit 74dd62d

Please sign in to comment.