diff --git a/README.md b/README.md index 913af12..290b684 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ + bamdash +[![language](https://img.shields.io/badge/python-%3E3.9-green)](https://www.python.org/) +[![License: GPL v3](https://img.shields.io/github/license/jonas-fuchs/varvamp)](https://www.gnu.org/licenses/gpl-3.0) +![Static Badge](https://img.shields.io/badge/platform-linux_osx-blue) + ## Overview **BAMdash lets you create interactive coverage plots from your bam file with [`plotly`](https://plotly.com/)** @@ -8,15 +13,19 @@ - **create** a interactive `html` for data exploration - **create** a static image (`jpg`, `png`, `pdf`, `svg`) ready for publication - **add** additional tracks (supported: `.vcf`, `.gb`, `.bed`) +- **annotate** tracks with coverage data and vcf with additional information if a `.gb` file is provided +- **export** annoated track data as tabular files (`.bed`, `.vcf`) or json (`.gb`) - **developed** for viral genomics - **customize** all plotting parameters +**Feel free to report any bugs or request new features as issues!** + ## Example -example +example ## Installation -### via pip (recommened, coming soon): +### via pip (recommened): ```shell pip install bamdash ``` @@ -52,7 +61,7 @@ full usage: -h, --help show this help message and exit -b , --bam bam file location - -r , --reference reference id + -r , --reference seq reference id -t [track_1 ...], --tracks [track_1 ...] file location of tracks -c 5, --coverage 5 minimum coverage @@ -62,6 +71,7 @@ full usage: export as png, jpg, pdf, svg -d px px, --dimensions px px width and height of the static image in px + --dump, --no-dump dump annotated track data (default: False) -v, --version show program's version number and exit ``` @@ -96,7 +106,7 @@ average_line_width = 1 # track customize track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank) -track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors) +track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors) strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand strand_marker_size = 8 strand_marker_line_width = 1 @@ -121,6 +131,9 @@ To apply these new settings just repeat the installation procedure in the BAMdas pip install . ``` +Buy Me A Coffee + + --- **Important disclaimer:** diff --git a/bamdash/__init__.py b/bamdash/__init__.py index e3121ea..08276cd 100644 --- a/bamdash/__init__.py +++ b/bamdash/__init__.py @@ -1,3 +1,3 @@ """interactively visualize coverage and tracks""" _program = "bamdash" -__version__ = "0.0" +__version__ = "0.1" diff --git a/bamdash/__pycache__/__init__.cpython-39.pyc b/bamdash/__pycache__/__init__.cpython-39.pyc index 8a4c7ad..31f17f6 100644 Binary files a/bamdash/__pycache__/__init__.cpython-39.pyc and b/bamdash/__pycache__/__init__.cpython-39.pyc differ diff --git a/bamdash/command.py b/bamdash/command.py index c30514a..58e54e1 100644 --- a/bamdash/command.py +++ b/bamdash/command.py @@ -6,8 +6,11 @@ import sys import argparse import math +import json # LIBS +import plotly.io as pio +import pandas as pd from plotly.subplots import make_subplots # BAMDASH @@ -39,7 +42,7 @@ def get_args(sysargs): required=True, type=str, metavar=" ", - help="reference id" + help="seq reference id" ) parser.add_argument( "-t", @@ -83,6 +86,12 @@ def get_args(sysargs): nargs=2, help="width and height of the static image in px" ) + parser.add_argument( + "--dump", + action=argparse.BooleanOptionalAction, + default=False, + help="dump annotated track data" + ) parser.add_argument( "-v", "--version", @@ -102,21 +111,47 @@ def main(sysargs=sys.argv[1:]): """ # parse args args = get_args(sysargs) - # define subplot number and track heights + + # define subplot number, track heights and parse data + coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage) track_heights = [1] + track_data = [] + # extract data and check if ref was found if args.tracks is not None: number_of_tracks = len(args.tracks)+1 for track in args.tracks: if track.endswith("vcf"): - track_heights = track_heights + [config.vcf_track_proportion] + vcf_data = [data.vcf_to_df(track, args.reference), "vcf"] + if vcf_data[0].empty: + print("WARNING: vcf data does not contain the seq reference id") + number_of_tracks -= 1 + else: + track_heights = track_heights + [config.vcf_track_proportion] + track_data.append(vcf_data) elif track.endswith("gb"): - track_heights = track_heights + [config.gb_track_proportion] + gb_dict, seq = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage) + if gb_dict: + track_heights = track_heights + [config.gb_track_proportion] + track_data.append([gb_dict, "gb", seq]) + else: + print("WARNING: gb data does not contain the seq reference id") + number_of_tracks -= 1 elif track.endswith("bed"): - track_heights = track_heights + [config.bed_track_proportion] + bed_data = [data.bed_to_dict(track, coverage_df, args.reference, args.coverage), "bed"] + if bed_data[0]["bed annotations"]: + track_heights = track_heights + [config.bed_track_proportion] + track_data.append(bed_data) + else: + print("WARNING: bed data does not contain the seq reference id") + number_of_tracks -= 1 else: - sys.exit("one of the track types is not supported") + sys.exit("one of the track types is not supported (supported are *.vcf, *.bed and *.gb") else: number_of_tracks = 1 + + # annotate if one gb and vcfs are in tracks + track_data = data.annotate_vcfs_in_tracks(track_data) + # define layout fig = make_subplots( rows=number_of_tracks, @@ -126,54 +161,96 @@ def main(sysargs=sys.argv[1:]): vertical_spacing=config.plot_spacing, ) # create coverage plot - coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage) plotting.create_coverage_plot(fig, 1, coverage_df) # create track plots - if args.tracks is not None: - for index, track in enumerate(args.tracks): + if track_data: + for index, track in enumerate(track_data): row = index+2 - if track.endswith("vcf"): - vcf_df = data.vcf_to_df(track, args.reference) - plotting.create_vcf_plot(fig, row, vcf_df) - elif track.endswith("gb"): - gb_dict = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage) - plotting.create_track_plot(fig, row, gb_dict, config.box_gb_size, config.box_gb_alpha) - elif track.endswith("bed"): - bed_dict = data.bed_to_dict(track, coverage_df, args.reference, args.coverage) - plotting.create_track_plot(fig, row, bed_dict, config.box_bed_size, config.box_bed_alpha) + if track[1] == "vcf": + plotting.create_vcf_plot(fig, row, track[0]) + elif track[1] == "gb": + plotting.create_track_plot(fig, row, track[0], config.box_gb_size, config.box_gb_alpha) + elif track[1] == "bed": + plotting.create_track_plot(fig, row, track[0], config.box_bed_size, config.box_bed_alpha) + + # define own templates + pio.templates["plotly_dark_custom"], pio.templates["plotly_white_custom"] = pio.templates["plotly_dark"], pio.templates["plotly_white"] + # change params + pio.templates["plotly_dark_custom"].update( + layout=dict(yaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"), + xaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"), + updatemenudefaults=dict(bgcolor="rgb(115, 115, 115)") + ) + ) + pio.templates["plotly_white_custom"].update( + layout=dict(yaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"), + xaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"), + updatemenudefaults=dict(bgcolor="rgb(204, 204, 204)") + ) + ) # global formatting fig.update_layout( - plot_bgcolor="white", + template="plotly_white_custom", hovermode="x unified", - title=dict( - text=title, - x=1, - font=dict( - family="Arial", - size=16, - color='#000000' - ) - ), font=dict( - family="Arial", - size=16, - ) + family=config.font, + size=config.font_size, + ), + # Add buttons + updatemenus=[ + dict( + type="buttons", + direction="left", + buttons=[ + dict( + args=["yaxis.type", "linear"], + label="linear", + method="relayout" + ), + dict( + args=["yaxis.type", "log"], + label="log", + method="relayout" + ), + dict(args=[{"template": pio.templates["plotly_dark_custom"], "visible": True}], + label="dark", + method="relayout"), + dict(args=[{"template": pio.templates["plotly_white_custom"], "visible": True}], + label="light", + method="relayout"), + ], + pad={"r": 10, "t": 1}, + showactive=False, + xanchor="left", + y=1.15, + yanchor="top" + ) + ], + # add global stats as annotation + annotations=[ + dict(text=title, y=1.14, yref="paper", + align="center", showarrow=False) + ] ) # global x axes fig.update_xaxes( mirror=False, - ticks="outside", showline=True, - linecolor="black", - range=[0, max(coverage_df["position"])] + linewidth=1, + ticks="outside", + minor_ticks="outside", + range=[0, max(coverage_df["position"])], + showgrid=False, ) # global y axis fig.update_yaxes( mirror=False, - ticks="outside", showline=True, - linecolor="black" + linewidth=1, + ticks="outside", + minor_ticks="outside", + showgrid=False ) # if a range slider is shown, do not display the xaxis title # (will be shown underneath) @@ -189,14 +266,34 @@ def main(sysargs=sys.argv[1:]): else: # last x axis fig.update_xaxes(title_text="genome position", row=number_of_tracks, col=1) - + # html export fig.write_html(f"{args.reference}_plot.html") + # static image export if args.export_static is not None: # static image specific options - if config.show_log: + if config.show_log: # correct log layout fig["layout"]["yaxis"]["type"] = "log" fig["layout"]["yaxis"]["range"] = (0, math.log(fig["layout"]["yaxis"]["range"][1], 10)) fig.update_yaxes(dtick=1, row=1) - fig.update_layout(updatemenus=[dict(visible=False)]) + fig.update_layout(updatemenus=[dict(visible=False)]) # no buttons + fig.update_layout(annotations=[dict(visible=False)]) # no annotations # write static image + pio.kaleido.scope.mathjax = None # fix so no weird box is shown fig.write_image(f"{args.reference}_plot.{args.export_static}", width=args.dimensions[0], height=args.dimensions[1]) + + # dump track data + vcf_track_count, bed_track_count, gb_track_count = 0, 0, 0 + if args.dump and track_data: + for track in track_data: + if track[1] == "vcf": + track[0].to_csv(f"{args.reference}_vcf_data_{vcf_track_count}.tabular", sep="\t", header=True, index=False) + vcf_track_count += 1 + elif track[1] == "bed": + bed_df = pd.DataFrame.from_dict(track[0]["bed annotations"], orient="index") + bed_df.drop("track", axis=1, inplace=True) + bed_df.to_csv(f"{args.reference}_bed_data_{bed_track_count}.tabular", sep="\t", header=True, index=False) + bed_track_count += 1 + elif track[1] == "gb": + with open(f"{args.reference}_gb_data_{gb_track_count}.json", "w") as fp: + json.dump(track[0], fp) + gb_track_count += 1 diff --git a/bamdash/scripts/config.py b/bamdash/scripts/config.py index 71e8b5f..76748ff 100644 --- a/bamdash/scripts/config.py +++ b/bamdash/scripts/config.py @@ -10,23 +10,25 @@ gb_track_proportion = 0.5 bed_track_proportion = 0.2 plot_spacing = 0.05 +font = "Arial" +font_size = 16 # coverage customize -coverage_fill_color = "rgba(255, 212, 135, 0.2)" +coverage_fill_color = "rgba(255, 212, 135, 0.4)" coverage_line_color = "rgba(224, 168, 68, 1)" average_line_color = "grey" average_line_width = 1 # track customize track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank) -track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors) +track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors) strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand strand_marker_size = 8 strand_marker_line_width = 1 strand_marker_line_color = "rgba(0, 0, 0, 0.2)" -box_bed_alpha = [0.6, 0.6] # alpha values for boxes (bed) +box_bed_alpha = [0.7, 0.7] # alpha values for boxes (bed) box_bed_size = [0.4, 0.4] # size values for boxes (bed) -box_gb_alpha = [0.6, 0.8] # alpha values for boxes (gb) +box_gb_alpha = [0.7, 0.8] # alpha values for boxes (gb) box_gb_size = [0.4, 0.3] # size values for boxes (gb) # variant customize diff --git a/bamdash/scripts/data.py b/bamdash/scripts/data.py index 1b3a099..4b48b8c 100644 --- a/bamdash/scripts/data.py +++ b/bamdash/scripts/data.py @@ -6,6 +6,7 @@ # LIBS import pandas as pd +from Bio import Seq from Bio import SeqIO import pysam from pysam import VariantFile @@ -33,7 +34,7 @@ def get_coverage_stats(coverage_df, start, stop, min_cov): mean_coverage = statistics.mean(df_subset["coverage"]) recovery = len(df_subset["coverage"])/(stop-start+1)*100 - return f"{round(mean_coverage)}x", f"{round(recovery, 2)} %" + return round(mean_coverage), round(recovery, 2) def make_title_string(parsed_bam, coverage_df, reference, min_cov): @@ -52,9 +53,9 @@ def make_title_string(parsed_bam, coverage_df, reference, min_cov): stat_string = make_stat_substring(stat_string, "reference", bam_stats[0]) stat_string = make_stat_substring(stat_string, "reference length", f"{parsed_bam.get_reference_length(reference)} bp") gc_content = round((sum(coverage_df["C"])+sum(coverage_df["G"]))/len(coverage_df), 2) - for bam_stat, stat_type in zip(bam_stats[1:], ["mapped", "unmapped", "total reads"]): + for bam_stat, stat_type in zip(bam_stats[1:], ["mapped", "unmapped", "total"]): stat_string = make_stat_substring(stat_string, stat_type, bam_stat) - stat_string = make_stat_substring(stat_string, "mean coverage", mean) + stat_string = make_stat_substring(stat_string, "
mean coverage", mean) stat_string = make_stat_substring(stat_string, "recovery", rec) stat_string = make_stat_substring(stat_string, "gc content", f"{gc_content}%") @@ -111,7 +112,7 @@ def vcf_to_df(vcf_file, ref): # extract vcf info vcf_info = list(vcf.header.info) - variant_dict = {x: [] for x in ["position", "reference", "mutation", "type"] + vcf_info} + variant_dict = {x: [] for x in ["position", "reference", "mutation", "type", "point mutation type"] + vcf_info} # create vcf dictionary for rec in vcf.fetch(): if rec.chrom != ref: @@ -119,6 +120,16 @@ def vcf_to_df(vcf_file, ref): variant_dict["position"].append(rec.pos) variant_dict["reference"].append(rec.ref) variant_dict["mutation"].append(rec.alts[0]) + # annotate the point mutation type + base_exchange = rec.ref + rec.alts[0] + if len(base_exchange) == 2: + if base_exchange in ["AG", "GA", "CT", "TC"]: + variant_dict["point mutation type"] = "transition" + else: + variant_dict["point mutation type"] = "transversion" + else: + variant_dict["point mutation type"] = "none" + # get mutation type if len(rec.alts[0]) > len(rec.ref): variant_dict["type"].append("INS") @@ -136,7 +147,7 @@ def vcf_to_df(vcf_file, ref): # clear all keys that have None appended empty_keys = [] for key in variant_dict: - if all([x != None for x in variant_dict[key]]): + if all([x is not None for x in variant_dict[key]]): continue empty_keys.append(key) if empty_keys: @@ -189,18 +200,22 @@ def genbank_to_dict(gb_file, coverage_df, ref, min_cov): """ feature_dict = {} + seq = "" for gb_record in SeqIO.parse(open(gb_file, "r"), "genbank"): if gb_record.id != ref and gb_record.name != ref: - return feature_dict + break + seq = gb_record.seq for feature in gb_record.features: if feature.type not in feature_dict: feature_dict[feature.type] = {} start, stop = feature.location.start + 1, feature.location.end feature_dict[feature.type][f"{start} {stop}"] = {} mean_cov, rec = get_coverage_stats(coverage_df, start, stop, min_cov) + feature_dict[feature.type][f"{start} {stop}"]["start"] = start + feature_dict[feature.type][f"{start} {stop}"]["stop"] = stop feature_dict[feature.type][f"{start} {stop}"]["mean coverage"] = mean_cov - feature_dict[feature.type][f"{start} {stop}"]["recovery"] = rec + feature_dict[feature.type][f"{start} {stop}"]["% recovery"] = rec # define strand info if feature.strand == 1: feature_dict[feature.type][f"{start} {stop}"]["strand"] = "+" @@ -212,7 +227,10 @@ def genbank_to_dict(gb_file, coverage_df, ref, min_cov): for qualifier in feature.qualifiers: feature_dict[feature.type][f"{start} {stop}"][qualifier] = feature.qualifiers[qualifier][0] - return define_track_position(feature_dict) + if feature_dict: + return define_track_position(feature_dict), seq + else: + return feature_dict, seq def bed_to_dict(bed_file, coverage_df, ref, min_cov): @@ -248,6 +266,8 @@ def bed_to_dict(bed_file, coverage_df, ref, min_cov): for line in bed_line_list: start, stop = int(line[1]), int(line[2]) bed_dict["bed annotations"][f"{start} {stop}"] = {} + bed_dict["bed annotations"][f"{start} {stop}"]["start"] = start + bed_dict["bed annotations"][f"{start} {stop}"]["stop"] = stop # check for additional info if len(line) > 3: for element, classifier in zip(line[3:], possible_classifiers): @@ -259,3 +279,100 @@ def bed_to_dict(bed_file, coverage_df, ref, min_cov): return define_track_position(bed_dict) + +def annotate_vcf_df(vcf_df, cds_dict, seq): + """ + annotate mutations for their as effect + :param vcf_df: dataframe with vcf data + :param cds_dict: dictionary with all coding sequences + :param seq: sequence of the reference + :return: annotated df + """ + + proteins, as_exchange, as_effect = [], [], [] + + for variant in vcf_df.iterrows(): + proteins_temp, as_exchange_temp, as_effect_temp = [], [], [] + pos, mut_type, mut = variant[1]["position"], variant[1]["type"], variant[1]["mutation"] + # check each cds for potential mutations + for cds in cds_dict: + # check if a protein identifier is present + if "protein_id" not in cds_dict[cds]: + continue + start, stop = cds_dict[cds]["start"], cds_dict[cds]["stop"] + # check if pos is in range + if pos not in range(start, stop): + continue + # now we know the protein + proteins_temp.append(cds_dict[cds]["protein_id"]) + # at the moment only check SNPs + if mut_type != "SNP": + as_exchange_temp.append("AMBIG"), as_effect_temp.append(variant[1]["type"]) + continue + # now we can analyse for a potential as exchange + strand, seq_mut, codon_start = cds_dict[cds]["strand"], str(seq), int(cds_dict[cds]["codon_start"]) + # mutate the sequence and get the CDS nt seq + seq_mut = Seq.Seq(seq_mut[:pos] + mut + seq_mut[pos+1:]) + seq_cds, seq_mut_cds = seq[start+codon_start-2:stop], seq_mut[start+codon_start-2:stop] + # translate strand depend + if strand == "+": + cds_trans, cds_mut_trans = seq_cds.translate(), seq_mut_cds.translate() + else: + cds_trans, cds_mut_trans = seq_cds.reverse_complement().translate(), seq_mut_cds.reverse_complement().translate() + # get the mutation by searching for a diff between string - prop a bit slow + mut_string = [f"{x}{i+1}{y}" for i, (x, y) in enumerate(zip(cds_trans, cds_mut_trans)) if x != y] + # now append to list + if not mut_string: + as_exchange_temp.append("NONE"), as_effect_temp.append("SYN") + else: + as_exchange_temp.append(mut_string[0]), as_effect_temp.append("NON-SYN") + # check if we did not find a protein + if not proteins_temp: + proteins.append("NONE"), as_exchange.append("NONE"), as_effect.append("NONE") + # else append all mutations found in all cds + elif len(proteins_temp) == 1: + proteins.append(proteins_temp[0]), as_exchange.append(as_exchange_temp[0]), as_effect.append(as_effect_temp[0]) + else: + proteins.append(" | ".join(proteins_temp)), as_exchange.append(" | ".join(as_exchange_temp)), as_effect.append(" | ".join(as_effect_temp)) + + # annotate and return df + vcf_df["protein"], vcf_df["effect"], vcf_df["as mutation"] = proteins, as_effect, as_exchange + return vcf_df + + +def annotate_vcfs_in_tracks(track_data): + """ + annotates all vcf in the tracks + :param track_data + :return: track data with annotated vcfs + """ + # annotate mutation effects in vcf df if gb is present + index_positions = [[], []] # index of gb and vcf + for index, track in enumerate(track_data): + if "gb" in track[1]: + index_positions[0].append(index) + elif "vcf" in track[1]: + index_positions[1].append(index) + # check if there is a gb file and a vcf file + if index_positions[0] and index_positions[1]: + # and only one gb file + gb_indices = index_positions[0] + if len(gb_indices) > 1: + print("WARNING: cannot annotate from multiple *.gb files!") + return track_data + # annotate each vcf df + for vcf_track in index_positions[1]: + # check if CDS is present + if "CDS" not in track_data[gb_indices[0]][0]: + continue + # check again that there is no empty data + if not track_data[vcf_track][0].empty: + # and then annotate the vcf dict + track_data[vcf_track][0] = annotate_vcf_df( + track_data[vcf_track][0], # vcf df + track_data[gb_indices[0]][0]["CDS"], # CDS dict + track_data[gb_indices[0]][2] # seq + ) + + return track_data + diff --git a/bamdash/scripts/plotting.py b/bamdash/scripts/plotting.py index 31062be..07c85ee 100644 --- a/bamdash/scripts/plotting.py +++ b/bamdash/scripts/plotting.py @@ -36,7 +36,9 @@ def create_coverage_plot(fig, row, coverage_df): fillcolor=config.coverage_fill_color, line=dict(color=config.coverage_line_color), hovertemplate=h_template, - name="coverage", + legendgroup="coverage", + legendgrouptitle_text="coverage", + name="", showlegend=True ), row=row, @@ -53,39 +55,16 @@ def create_coverage_plot(fig, row, coverage_df): mode="lines+text", line=dict(color=config.average_line_color, width=config.average_line_width, dash="dash"), showlegend=True, - name="average" + legendgroup="average", + name="", + legendgrouptitle_text="average", ), row=row, col=1 ) # y axis title - fig.update_yaxes(title_text="genome coverage", range=[0, max(coverage_df["coverage"])], row=row, col=1) - # Add dropdown - fig.update_layout( - updatemenus=[ - dict( - type="buttons", - direction="left", - buttons=list([ - dict( - args=[f"yaxis{row}.type" if row > 1 else "yaxis.type", "linear"], - label="linear", - method="relayout" - ), - dict( - args=[f"yaxis{row}.type" if row > 1 else "yaxis.type", "log"], - label="log", - method="relayout" - ) - ]), - pad={"r": 10, "t": 10}, - showactive=True, - xanchor="left", - y=1.1, - yanchor="top" - ) - ], - ) + fig.update_yaxes(range=[0, max(coverage_df["coverage"])], row=row, col=1) + def split_vcf_df(df): @@ -149,8 +128,9 @@ def create_vcf_plot(fig, row, vcf_df): go.Scatter( x=vcf_subset["position"], y=y_data, - name=mut, + name=f"plot {row}", legendgroup=mut, + legendgrouptitle_text=mut, mode="markers", customdata=vcf_subset, showlegend=show_legend, @@ -173,8 +153,7 @@ def create_vcf_plot(fig, row, vcf_df): else: y_data = [1] * len(vcf_df["position"]) # add lines - fig.update_layout( - shapes=[dict( + shapes = [dict( type="line", xref=f"x{row}", yref=f"y{row}", @@ -187,7 +166,12 @@ def create_vcf_plot(fig, row, vcf_df): width=config.stem_width ) ) for x, y in zip(vcf_df["position"], y_data)] - ) + # plot shape in each subplot + if fig["layout"]["shapes"]: + for shape in shapes: + fig.add_shape(shape) + else: + fig.update_layout(shapes=shapes) # not need to show yaxis if af is not in vcf if "AF" not in vcf_df: fig.update_yaxes(visible=False, row=row, col=1) @@ -202,6 +186,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha): :param feature_dict: all infos for tracks as dictionary :param box_size: list of box sizes :param box_alpha: list of box alpha values + :param subplot: subplot index of the plot :return: updated figure """ # define colors @@ -220,7 +205,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha): if cycle == 2: cycle = 0 # get various plot info - positions = [int(x) for x in annotation.split(" ")] + positions = [feature_dict[feature][annotation]["start"], feature_dict[feature][annotation]["stop"]] track = feature_dict[feature][annotation]["track"] # define strand marker if feature_dict[feature][annotation]["strand"] == "+": @@ -272,7 +257,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha): line=dict(color=color_thes[cycle]), showlegend=legend_vis, hoverinfo='skip', - name="", + name=f"plot {row}", legendgroup=feature, legendgrouptitle_text=feature, ), diff --git a/example.gif b/example.gif new file mode 100644 index 0000000..ed3a8ff Binary files /dev/null and b/example.gif differ diff --git a/example.png b/example.png deleted file mode 100644 index f86fb26..0000000 Binary files a/example.png and /dev/null differ diff --git a/setup.py b/setup.py index 662bde4..4fdb4dd 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ "pysam>=0.21.0", "biopython>=1.79" ], - description='bamdash creates a interactive coverage dashboard optionally with vcf, bed, gb tracks', + description='create a interactive coverage plot dashboard from bam files and add gb, vcf and bed tracks', url='https://github.com/jonas-fuchs/BAMdash', author='Dr. Jonas Fuchs', author_email='jonas.fuchs@uniklinik-freiburg.de',