diff --git a/bamdash/__init__.py b/bamdash/__init__.py index 08276cd..0fe8615 100644 --- a/bamdash/__init__.py +++ b/bamdash/__init__.py @@ -1,3 +1,3 @@ """interactively visualize coverage and tracks""" _program = "bamdash" -__version__ = "0.1" +__version__ = "0.1.1" diff --git a/bamdash/__pycache__/__init__.cpython-39.pyc b/bamdash/__pycache__/__init__.cpython-39.pyc index 31f17f6..1c2c555 100644 Binary files a/bamdash/__pycache__/__init__.cpython-39.pyc and b/bamdash/__pycache__/__init__.cpython-39.pyc differ diff --git a/bamdash/scripts/data.py b/bamdash/scripts/data.py index 4b48b8c..844e5b0 100644 --- a/bamdash/scripts/data.py +++ b/bamdash/scripts/data.py @@ -121,14 +121,14 @@ def vcf_to_df(vcf_file, ref): variant_dict["reference"].append(rec.ref) variant_dict["mutation"].append(rec.alts[0]) # annotate the point mutation type - base_exchange = rec.ref + rec.alts[0] + base_exchange = f"{rec.ref}{rec.alts[0]}" if len(base_exchange) == 2: if base_exchange in ["AG", "GA", "CT", "TC"]: - variant_dict["point mutation type"] = "transition" + variant_dict["point mutation type"].append("TRANSITION") else: - variant_dict["point mutation type"] = "transversion" + variant_dict["point mutation type"].append("TRANSVERSION") else: - variant_dict["point mutation type"] = "none" + variant_dict["point mutation type"].append("NONE") # get mutation type if len(rec.alts[0]) > len(rec.ref): @@ -289,6 +289,9 @@ def annotate_vcf_df(vcf_df, cds_dict, seq): :return: annotated df """ + # define the potential identifiers to look for + potential_cds_identifiers = ["protein_id", "gene", "locus_tag", "product"] + proteins, as_exchange, as_effect = [], [], [] for variant in vcf_df.iterrows(): @@ -297,22 +300,29 @@ def annotate_vcf_df(vcf_df, cds_dict, seq): # check each cds for potential mutations for cds in cds_dict: # check if a protein identifier is present - if "protein_id" not in cds_dict[cds]: + if not any(identifier in potential_cds_identifiers for identifier in cds_dict[cds]): continue start, stop = cds_dict[cds]["start"], cds_dict[cds]["stop"] # check if pos is in range if pos not in range(start, stop): continue # now we know the protein - proteins_temp.append(cds_dict[cds]["protein_id"]) + for identifier in potential_cds_identifiers: + if identifier in cds_dict[cds]: + proteins_temp.append(cds_dict[cds][identifier]) + break # at the moment only check SNPs if mut_type != "SNP": as_exchange_temp.append("AMBIG"), as_effect_temp.append(variant[1]["type"]) continue # now we can analyse for a potential as exchange - strand, seq_mut, codon_start = cds_dict[cds]["strand"], str(seq), int(cds_dict[cds]["codon_start"]) + if "codon_start" in cds_dict[cds]: + codon_start = int(cds_dict[cds]["codon_start"]) + else: + codon_start = 1 + strand, seq_mut = cds_dict[cds]["strand"], str(seq) # mutate the sequence and get the CDS nt seq - seq_mut = Seq.Seq(seq_mut[:pos] + mut + seq_mut[pos+1:]) + seq_mut = Seq.Seq(seq_mut[:pos-1] + mut + seq_mut[pos:]) seq_cds, seq_mut_cds = seq[start+codon_start-2:stop], seq_mut[start+codon_start-2:stop] # translate strand depend if strand == "+": diff --git a/bamdash/scripts/plotting.py b/bamdash/scripts/plotting.py index 07c85ee..7090a2a 100644 --- a/bamdash/scripts/plotting.py +++ b/bamdash/scripts/plotting.py @@ -176,7 +176,7 @@ def create_vcf_plot(fig, row, vcf_df): if "AF" not in vcf_df: fig.update_yaxes(visible=False, row=row, col=1) else: - fig.update_yaxes(title_text="frequency", range=[0, 1], row=row, col=1) + fig.update_yaxes(title_text="frequency", range=[0, 1.15], row=row, col=1) def create_track_plot(fig, row, feature_dict, box_size, box_alpha):