From bf4ed5f1492bfc357fe3d64c175c2f7a55e595ee Mon Sep 17 00:00:00 2001 From: Florian Zwagemaker Date: Tue, 6 Dec 2022 12:22:03 +0100 Subject: [PATCH] fix: replace gaps in nucleotide-seq with "N" characters to ensure valid translation when `--keep-gaps` flag is given. (forces ambigious AA call) --- AminoExtract/args.py | 2 +- AminoExtract/sequences.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AminoExtract/args.py b/AminoExtract/args.py index 220198b..a2f6e07 100644 --- a/AminoExtract/args.py +++ b/AminoExtract/args.py @@ -153,7 +153,7 @@ def get_args(givenargs: list[str] | None = None) -> argparse.Namespace: "-kg", action="store_true", default=False, - help='If this flag is set then the amino acid translation will be done including gaps in the nucleotide sequence.\nThis results in an "X" on gap positions in the aminoacid sequence.\n [underline]By default, gaps are removed before translation.[/underline]', + help='If this flag is set then the amino acid translation will be done including gaps in the nucleotide sequence.\n This results in an "X" on gapped positions in the aminoacid sequence as gap characters ("-") will be replaced by "N" in the nucleotide sequence.\n [underline]By default, gaps are removed before translation.[/underline]', required=False, ) diff --git a/AminoExtract/sequences.py b/AminoExtract/sequences.py index e10b5c2..cf19a90 100644 --- a/AminoExtract/sequences.py +++ b/AminoExtract/sequences.py @@ -82,7 +82,7 @@ def Extract_AminoAcids( # get the sequence slice from the start to the end position seq_slice = ( - NucSequence[start:end] + NucSequence[start:end].replace("-", "N") if keep_gaps else NucSequence[start:end].replace("-", "") )