diff --git a/README.md b/README.md
index 913af12..290b684 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,10 @@
+
+[![language](https://img.shields.io/badge/python-%3E3.9-green)](https://www.python.org/)
+[![License: GPL v3](https://img.shields.io/github/license/jonas-fuchs/varvamp)](https://www.gnu.org/licenses/gpl-3.0)
+![Static Badge](https://img.shields.io/badge/platform-linux_osx-blue)
+
## Overview
**BAMdash lets you create interactive coverage plots from your bam file with [`plotly`](https://plotly.com/)**
@@ -8,15 +13,19 @@
- **create** a interactive `html` for data exploration
- **create** a static image (`jpg`, `png`, `pdf`, `svg`) ready for publication
- **add** additional tracks (supported: `.vcf`, `.gb`, `.bed`)
+- **annotate** tracks with coverage data and vcf with additional information if a `.gb` file is provided
+- **export** annoated track data as tabular files (`.bed`, `.vcf`) or json (`.gb`)
- **developed** for viral genomics
- **customize** all plotting parameters
+**Feel free to report any bugs or request new features as issues!**
+
## Example
-
+
## Installation
-### via pip (recommened, coming soon):
+### via pip (recommened):
```shell
pip install bamdash
```
@@ -52,7 +61,7 @@ full usage:
-h, --help show this help message and exit
-b , --bam bam file location
- -r , --reference reference id
+ -r , --reference seq reference id
-t [track_1 ...], --tracks [track_1 ...]
file location of tracks
-c 5, --coverage 5 minimum coverage
@@ -62,6 +71,7 @@ full usage:
export as png, jpg, pdf, svg
-d px px, --dimensions px px
width and height of the static image in px
+ --dump, --no-dump dump annotated track data (default: False)
-v, --version show program's version number and exit
```
@@ -96,7 +106,7 @@ average_line_width = 1
# track customize
track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank)
-track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors)
+track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors)
strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand
strand_marker_size = 8
strand_marker_line_width = 1
@@ -121,6 +131,9 @@ To apply these new settings just repeat the installation procedure in the BAMdas
pip install .
```
+
+
+
---
**Important disclaimer:**
diff --git a/bamdash/__init__.py b/bamdash/__init__.py
index e3121ea..08276cd 100644
--- a/bamdash/__init__.py
+++ b/bamdash/__init__.py
@@ -1,3 +1,3 @@
"""interactively visualize coverage and tracks"""
_program = "bamdash"
-__version__ = "0.0"
+__version__ = "0.1"
diff --git a/bamdash/__pycache__/__init__.cpython-39.pyc b/bamdash/__pycache__/__init__.cpython-39.pyc
index 8a4c7ad..31f17f6 100644
Binary files a/bamdash/__pycache__/__init__.cpython-39.pyc and b/bamdash/__pycache__/__init__.cpython-39.pyc differ
diff --git a/bamdash/command.py b/bamdash/command.py
index c30514a..58e54e1 100644
--- a/bamdash/command.py
+++ b/bamdash/command.py
@@ -6,8 +6,11 @@
import sys
import argparse
import math
+import json
# LIBS
+import plotly.io as pio
+import pandas as pd
from plotly.subplots import make_subplots
# BAMDASH
@@ -39,7 +42,7 @@ def get_args(sysargs):
required=True,
type=str,
metavar=" ",
- help="reference id"
+ help="seq reference id"
)
parser.add_argument(
"-t",
@@ -83,6 +86,12 @@ def get_args(sysargs):
nargs=2,
help="width and height of the static image in px"
)
+ parser.add_argument(
+ "--dump",
+ action=argparse.BooleanOptionalAction,
+ default=False,
+ help="dump annotated track data"
+ )
parser.add_argument(
"-v",
"--version",
@@ -102,21 +111,47 @@ def main(sysargs=sys.argv[1:]):
"""
# parse args
args = get_args(sysargs)
- # define subplot number and track heights
+
+ # define subplot number, track heights and parse data
+ coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage)
track_heights = [1]
+ track_data = []
+ # extract data and check if ref was found
if args.tracks is not None:
number_of_tracks = len(args.tracks)+1
for track in args.tracks:
if track.endswith("vcf"):
- track_heights = track_heights + [config.vcf_track_proportion]
+ vcf_data = [data.vcf_to_df(track, args.reference), "vcf"]
+ if vcf_data[0].empty:
+ print("WARNING: vcf data does not contain the seq reference id")
+ number_of_tracks -= 1
+ else:
+ track_heights = track_heights + [config.vcf_track_proportion]
+ track_data.append(vcf_data)
elif track.endswith("gb"):
- track_heights = track_heights + [config.gb_track_proportion]
+ gb_dict, seq = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage)
+ if gb_dict:
+ track_heights = track_heights + [config.gb_track_proportion]
+ track_data.append([gb_dict, "gb", seq])
+ else:
+ print("WARNING: gb data does not contain the seq reference id")
+ number_of_tracks -= 1
elif track.endswith("bed"):
- track_heights = track_heights + [config.bed_track_proportion]
+ bed_data = [data.bed_to_dict(track, coverage_df, args.reference, args.coverage), "bed"]
+ if bed_data[0]["bed annotations"]:
+ track_heights = track_heights + [config.bed_track_proportion]
+ track_data.append(bed_data)
+ else:
+ print("WARNING: bed data does not contain the seq reference id")
+ number_of_tracks -= 1
else:
- sys.exit("one of the track types is not supported")
+ sys.exit("one of the track types is not supported (supported are *.vcf, *.bed and *.gb")
else:
number_of_tracks = 1
+
+ # annotate if one gb and vcfs are in tracks
+ track_data = data.annotate_vcfs_in_tracks(track_data)
+
# define layout
fig = make_subplots(
rows=number_of_tracks,
@@ -126,54 +161,96 @@ def main(sysargs=sys.argv[1:]):
vertical_spacing=config.plot_spacing,
)
# create coverage plot
- coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage)
plotting.create_coverage_plot(fig, 1, coverage_df)
# create track plots
- if args.tracks is not None:
- for index, track in enumerate(args.tracks):
+ if track_data:
+ for index, track in enumerate(track_data):
row = index+2
- if track.endswith("vcf"):
- vcf_df = data.vcf_to_df(track, args.reference)
- plotting.create_vcf_plot(fig, row, vcf_df)
- elif track.endswith("gb"):
- gb_dict = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage)
- plotting.create_track_plot(fig, row, gb_dict, config.box_gb_size, config.box_gb_alpha)
- elif track.endswith("bed"):
- bed_dict = data.bed_to_dict(track, coverage_df, args.reference, args.coverage)
- plotting.create_track_plot(fig, row, bed_dict, config.box_bed_size, config.box_bed_alpha)
+ if track[1] == "vcf":
+ plotting.create_vcf_plot(fig, row, track[0])
+ elif track[1] == "gb":
+ plotting.create_track_plot(fig, row, track[0], config.box_gb_size, config.box_gb_alpha)
+ elif track[1] == "bed":
+ plotting.create_track_plot(fig, row, track[0], config.box_bed_size, config.box_bed_alpha)
+
+ # define own templates
+ pio.templates["plotly_dark_custom"], pio.templates["plotly_white_custom"] = pio.templates["plotly_dark"], pio.templates["plotly_white"]
+ # change params
+ pio.templates["plotly_dark_custom"].update(
+ layout=dict(yaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"),
+ xaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"),
+ updatemenudefaults=dict(bgcolor="rgb(115, 115, 115)")
+ )
+ )
+ pio.templates["plotly_white_custom"].update(
+ layout=dict(yaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"),
+ xaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"),
+ updatemenudefaults=dict(bgcolor="rgb(204, 204, 204)")
+ )
+ )
# global formatting
fig.update_layout(
- plot_bgcolor="white",
+ template="plotly_white_custom",
hovermode="x unified",
- title=dict(
- text=title,
- x=1,
- font=dict(
- family="Arial",
- size=16,
- color='#000000'
- )
- ),
font=dict(
- family="Arial",
- size=16,
- )
+ family=config.font,
+ size=config.font_size,
+ ),
+ # Add buttons
+ updatemenus=[
+ dict(
+ type="buttons",
+ direction="left",
+ buttons=[
+ dict(
+ args=["yaxis.type", "linear"],
+ label="linear",
+ method="relayout"
+ ),
+ dict(
+ args=["yaxis.type", "log"],
+ label="log",
+ method="relayout"
+ ),
+ dict(args=[{"template": pio.templates["plotly_dark_custom"], "visible": True}],
+ label="dark",
+ method="relayout"),
+ dict(args=[{"template": pio.templates["plotly_white_custom"], "visible": True}],
+ label="light",
+ method="relayout"),
+ ],
+ pad={"r": 10, "t": 1},
+ showactive=False,
+ xanchor="left",
+ y=1.15,
+ yanchor="top"
+ )
+ ],
+ # add global stats as annotation
+ annotations=[
+ dict(text=title, y=1.14, yref="paper",
+ align="center", showarrow=False)
+ ]
)
# global x axes
fig.update_xaxes(
mirror=False,
- ticks="outside",
showline=True,
- linecolor="black",
- range=[0, max(coverage_df["position"])]
+ linewidth=1,
+ ticks="outside",
+ minor_ticks="outside",
+ range=[0, max(coverage_df["position"])],
+ showgrid=False,
)
# global y axis
fig.update_yaxes(
mirror=False,
- ticks="outside",
showline=True,
- linecolor="black"
+ linewidth=1,
+ ticks="outside",
+ minor_ticks="outside",
+ showgrid=False
)
# if a range slider is shown, do not display the xaxis title
# (will be shown underneath)
@@ -189,14 +266,34 @@ def main(sysargs=sys.argv[1:]):
else:
# last x axis
fig.update_xaxes(title_text="genome position", row=number_of_tracks, col=1)
-
+ # html export
fig.write_html(f"{args.reference}_plot.html")
+ # static image export
if args.export_static is not None:
# static image specific options
- if config.show_log:
+ if config.show_log: # correct log layout
fig["layout"]["yaxis"]["type"] = "log"
fig["layout"]["yaxis"]["range"] = (0, math.log(fig["layout"]["yaxis"]["range"][1], 10))
fig.update_yaxes(dtick=1, row=1)
- fig.update_layout(updatemenus=[dict(visible=False)])
+ fig.update_layout(updatemenus=[dict(visible=False)]) # no buttons
+ fig.update_layout(annotations=[dict(visible=False)]) # no annotations
# write static image
+ pio.kaleido.scope.mathjax = None # fix so no weird box is shown
fig.write_image(f"{args.reference}_plot.{args.export_static}", width=args.dimensions[0], height=args.dimensions[1])
+
+ # dump track data
+ vcf_track_count, bed_track_count, gb_track_count = 0, 0, 0
+ if args.dump and track_data:
+ for track in track_data:
+ if track[1] == "vcf":
+ track[0].to_csv(f"{args.reference}_vcf_data_{vcf_track_count}.tabular", sep="\t", header=True, index=False)
+ vcf_track_count += 1
+ elif track[1] == "bed":
+ bed_df = pd.DataFrame.from_dict(track[0]["bed annotations"], orient="index")
+ bed_df.drop("track", axis=1, inplace=True)
+ bed_df.to_csv(f"{args.reference}_bed_data_{bed_track_count}.tabular", sep="\t", header=True, index=False)
+ bed_track_count += 1
+ elif track[1] == "gb":
+ with open(f"{args.reference}_gb_data_{gb_track_count}.json", "w") as fp:
+ json.dump(track[0], fp)
+ gb_track_count += 1
diff --git a/bamdash/scripts/config.py b/bamdash/scripts/config.py
index 71e8b5f..76748ff 100644
--- a/bamdash/scripts/config.py
+++ b/bamdash/scripts/config.py
@@ -10,23 +10,25 @@
gb_track_proportion = 0.5
bed_track_proportion = 0.2
plot_spacing = 0.05
+font = "Arial"
+font_size = 16
# coverage customize
-coverage_fill_color = "rgba(255, 212, 135, 0.2)"
+coverage_fill_color = "rgba(255, 212, 135, 0.4)"
coverage_line_color = "rgba(224, 168, 68, 1)"
average_line_color = "grey"
average_line_width = 1
# track customize
track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank)
-track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors)
+track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors)
strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand
strand_marker_size = 8
strand_marker_line_width = 1
strand_marker_line_color = "rgba(0, 0, 0, 0.2)"
-box_bed_alpha = [0.6, 0.6] # alpha values for boxes (bed)
+box_bed_alpha = [0.7, 0.7] # alpha values for boxes (bed)
box_bed_size = [0.4, 0.4] # size values for boxes (bed)
-box_gb_alpha = [0.6, 0.8] # alpha values for boxes (gb)
+box_gb_alpha = [0.7, 0.8] # alpha values for boxes (gb)
box_gb_size = [0.4, 0.3] # size values for boxes (gb)
# variant customize
diff --git a/bamdash/scripts/data.py b/bamdash/scripts/data.py
index 1b3a099..4b48b8c 100644
--- a/bamdash/scripts/data.py
+++ b/bamdash/scripts/data.py
@@ -6,6 +6,7 @@
# LIBS
import pandas as pd
+from Bio import Seq
from Bio import SeqIO
import pysam
from pysam import VariantFile
@@ -33,7 +34,7 @@ def get_coverage_stats(coverage_df, start, stop, min_cov):
mean_coverage = statistics.mean(df_subset["coverage"])
recovery = len(df_subset["coverage"])/(stop-start+1)*100
- return f"{round(mean_coverage)}x", f"{round(recovery, 2)} %"
+ return round(mean_coverage), round(recovery, 2)
def make_title_string(parsed_bam, coverage_df, reference, min_cov):
@@ -52,9 +53,9 @@ def make_title_string(parsed_bam, coverage_df, reference, min_cov):
stat_string = make_stat_substring(stat_string, "reference", bam_stats[0])
stat_string = make_stat_substring(stat_string, "reference length", f"{parsed_bam.get_reference_length(reference)} bp")
gc_content = round((sum(coverage_df["C"])+sum(coverage_df["G"]))/len(coverage_df), 2)
- for bam_stat, stat_type in zip(bam_stats[1:], ["mapped", "unmapped", "total reads"]):
+ for bam_stat, stat_type in zip(bam_stats[1:], ["mapped", "unmapped", "total"]):
stat_string = make_stat_substring(stat_string, stat_type, bam_stat)
- stat_string = make_stat_substring(stat_string, "mean coverage", mean)
+ stat_string = make_stat_substring(stat_string, "
mean coverage", mean)
stat_string = make_stat_substring(stat_string, "recovery", rec)
stat_string = make_stat_substring(stat_string, "gc content", f"{gc_content}%")
@@ -111,7 +112,7 @@ def vcf_to_df(vcf_file, ref):
# extract vcf info
vcf_info = list(vcf.header.info)
- variant_dict = {x: [] for x in ["position", "reference", "mutation", "type"] + vcf_info}
+ variant_dict = {x: [] for x in ["position", "reference", "mutation", "type", "point mutation type"] + vcf_info}
# create vcf dictionary
for rec in vcf.fetch():
if rec.chrom != ref:
@@ -119,6 +120,16 @@ def vcf_to_df(vcf_file, ref):
variant_dict["position"].append(rec.pos)
variant_dict["reference"].append(rec.ref)
variant_dict["mutation"].append(rec.alts[0])
+ # annotate the point mutation type
+ base_exchange = rec.ref + rec.alts[0]
+ if len(base_exchange) == 2:
+ if base_exchange in ["AG", "GA", "CT", "TC"]:
+ variant_dict["point mutation type"] = "transition"
+ else:
+ variant_dict["point mutation type"] = "transversion"
+ else:
+ variant_dict["point mutation type"] = "none"
+
# get mutation type
if len(rec.alts[0]) > len(rec.ref):
variant_dict["type"].append("INS")
@@ -136,7 +147,7 @@ def vcf_to_df(vcf_file, ref):
# clear all keys that have None appended
empty_keys = []
for key in variant_dict:
- if all([x != None for x in variant_dict[key]]):
+ if all([x is not None for x in variant_dict[key]]):
continue
empty_keys.append(key)
if empty_keys:
@@ -189,18 +200,22 @@ def genbank_to_dict(gb_file, coverage_df, ref, min_cov):
"""
feature_dict = {}
+ seq = ""
for gb_record in SeqIO.parse(open(gb_file, "r"), "genbank"):
if gb_record.id != ref and gb_record.name != ref:
- return feature_dict
+ break
+ seq = gb_record.seq
for feature in gb_record.features:
if feature.type not in feature_dict:
feature_dict[feature.type] = {}
start, stop = feature.location.start + 1, feature.location.end
feature_dict[feature.type][f"{start} {stop}"] = {}
mean_cov, rec = get_coverage_stats(coverage_df, start, stop, min_cov)
+ feature_dict[feature.type][f"{start} {stop}"]["start"] = start
+ feature_dict[feature.type][f"{start} {stop}"]["stop"] = stop
feature_dict[feature.type][f"{start} {stop}"]["mean coverage"] = mean_cov
- feature_dict[feature.type][f"{start} {stop}"]["recovery"] = rec
+ feature_dict[feature.type][f"{start} {stop}"]["% recovery"] = rec
# define strand info
if feature.strand == 1:
feature_dict[feature.type][f"{start} {stop}"]["strand"] = "+"
@@ -212,7 +227,10 @@ def genbank_to_dict(gb_file, coverage_df, ref, min_cov):
for qualifier in feature.qualifiers:
feature_dict[feature.type][f"{start} {stop}"][qualifier] = feature.qualifiers[qualifier][0]
- return define_track_position(feature_dict)
+ if feature_dict:
+ return define_track_position(feature_dict), seq
+ else:
+ return feature_dict, seq
def bed_to_dict(bed_file, coverage_df, ref, min_cov):
@@ -248,6 +266,8 @@ def bed_to_dict(bed_file, coverage_df, ref, min_cov):
for line in bed_line_list:
start, stop = int(line[1]), int(line[2])
bed_dict["bed annotations"][f"{start} {stop}"] = {}
+ bed_dict["bed annotations"][f"{start} {stop}"]["start"] = start
+ bed_dict["bed annotations"][f"{start} {stop}"]["stop"] = stop
# check for additional info
if len(line) > 3:
for element, classifier in zip(line[3:], possible_classifiers):
@@ -259,3 +279,100 @@ def bed_to_dict(bed_file, coverage_df, ref, min_cov):
return define_track_position(bed_dict)
+
+def annotate_vcf_df(vcf_df, cds_dict, seq):
+ """
+ annotate mutations for their as effect
+ :param vcf_df: dataframe with vcf data
+ :param cds_dict: dictionary with all coding sequences
+ :param seq: sequence of the reference
+ :return: annotated df
+ """
+
+ proteins, as_exchange, as_effect = [], [], []
+
+ for variant in vcf_df.iterrows():
+ proteins_temp, as_exchange_temp, as_effect_temp = [], [], []
+ pos, mut_type, mut = variant[1]["position"], variant[1]["type"], variant[1]["mutation"]
+ # check each cds for potential mutations
+ for cds in cds_dict:
+ # check if a protein identifier is present
+ if "protein_id" not in cds_dict[cds]:
+ continue
+ start, stop = cds_dict[cds]["start"], cds_dict[cds]["stop"]
+ # check if pos is in range
+ if pos not in range(start, stop):
+ continue
+ # now we know the protein
+ proteins_temp.append(cds_dict[cds]["protein_id"])
+ # at the moment only check SNPs
+ if mut_type != "SNP":
+ as_exchange_temp.append("AMBIG"), as_effect_temp.append(variant[1]["type"])
+ continue
+ # now we can analyse for a potential as exchange
+ strand, seq_mut, codon_start = cds_dict[cds]["strand"], str(seq), int(cds_dict[cds]["codon_start"])
+ # mutate the sequence and get the CDS nt seq
+ seq_mut = Seq.Seq(seq_mut[:pos] + mut + seq_mut[pos+1:])
+ seq_cds, seq_mut_cds = seq[start+codon_start-2:stop], seq_mut[start+codon_start-2:stop]
+ # translate strand depend
+ if strand == "+":
+ cds_trans, cds_mut_trans = seq_cds.translate(), seq_mut_cds.translate()
+ else:
+ cds_trans, cds_mut_trans = seq_cds.reverse_complement().translate(), seq_mut_cds.reverse_complement().translate()
+ # get the mutation by searching for a diff between string - prop a bit slow
+ mut_string = [f"{x}{i+1}{y}" for i, (x, y) in enumerate(zip(cds_trans, cds_mut_trans)) if x != y]
+ # now append to list
+ if not mut_string:
+ as_exchange_temp.append("NONE"), as_effect_temp.append("SYN")
+ else:
+ as_exchange_temp.append(mut_string[0]), as_effect_temp.append("NON-SYN")
+ # check if we did not find a protein
+ if not proteins_temp:
+ proteins.append("NONE"), as_exchange.append("NONE"), as_effect.append("NONE")
+ # else append all mutations found in all cds
+ elif len(proteins_temp) == 1:
+ proteins.append(proteins_temp[0]), as_exchange.append(as_exchange_temp[0]), as_effect.append(as_effect_temp[0])
+ else:
+ proteins.append(" | ".join(proteins_temp)), as_exchange.append(" | ".join(as_exchange_temp)), as_effect.append(" | ".join(as_effect_temp))
+
+ # annotate and return df
+ vcf_df["protein"], vcf_df["effect"], vcf_df["as mutation"] = proteins, as_effect, as_exchange
+ return vcf_df
+
+
+def annotate_vcfs_in_tracks(track_data):
+ """
+ annotates all vcf in the tracks
+ :param track_data
+ :return: track data with annotated vcfs
+ """
+ # annotate mutation effects in vcf df if gb is present
+ index_positions = [[], []] # index of gb and vcf
+ for index, track in enumerate(track_data):
+ if "gb" in track[1]:
+ index_positions[0].append(index)
+ elif "vcf" in track[1]:
+ index_positions[1].append(index)
+ # check if there is a gb file and a vcf file
+ if index_positions[0] and index_positions[1]:
+ # and only one gb file
+ gb_indices = index_positions[0]
+ if len(gb_indices) > 1:
+ print("WARNING: cannot annotate from multiple *.gb files!")
+ return track_data
+ # annotate each vcf df
+ for vcf_track in index_positions[1]:
+ # check if CDS is present
+ if "CDS" not in track_data[gb_indices[0]][0]:
+ continue
+ # check again that there is no empty data
+ if not track_data[vcf_track][0].empty:
+ # and then annotate the vcf dict
+ track_data[vcf_track][0] = annotate_vcf_df(
+ track_data[vcf_track][0], # vcf df
+ track_data[gb_indices[0]][0]["CDS"], # CDS dict
+ track_data[gb_indices[0]][2] # seq
+ )
+
+ return track_data
+
diff --git a/bamdash/scripts/plotting.py b/bamdash/scripts/plotting.py
index 31062be..07c85ee 100644
--- a/bamdash/scripts/plotting.py
+++ b/bamdash/scripts/plotting.py
@@ -36,7 +36,9 @@ def create_coverage_plot(fig, row, coverage_df):
fillcolor=config.coverage_fill_color,
line=dict(color=config.coverage_line_color),
hovertemplate=h_template,
- name="coverage",
+ legendgroup="coverage",
+ legendgrouptitle_text="coverage",
+ name="",
showlegend=True
),
row=row,
@@ -53,39 +55,16 @@ def create_coverage_plot(fig, row, coverage_df):
mode="lines+text",
line=dict(color=config.average_line_color, width=config.average_line_width, dash="dash"),
showlegend=True,
- name="average"
+ legendgroup="average",
+ name="",
+ legendgrouptitle_text="average",
),
row=row,
col=1
)
# y axis title
- fig.update_yaxes(title_text="genome coverage", range=[0, max(coverage_df["coverage"])], row=row, col=1)
- # Add dropdown
- fig.update_layout(
- updatemenus=[
- dict(
- type="buttons",
- direction="left",
- buttons=list([
- dict(
- args=[f"yaxis{row}.type" if row > 1 else "yaxis.type", "linear"],
- label="linear",
- method="relayout"
- ),
- dict(
- args=[f"yaxis{row}.type" if row > 1 else "yaxis.type", "log"],
- label="log",
- method="relayout"
- )
- ]),
- pad={"r": 10, "t": 10},
- showactive=True,
- xanchor="left",
- y=1.1,
- yanchor="top"
- )
- ],
- )
+ fig.update_yaxes(range=[0, max(coverage_df["coverage"])], row=row, col=1)
+
def split_vcf_df(df):
@@ -149,8 +128,9 @@ def create_vcf_plot(fig, row, vcf_df):
go.Scatter(
x=vcf_subset["position"],
y=y_data,
- name=mut,
+ name=f"plot {row}",
legendgroup=mut,
+ legendgrouptitle_text=mut,
mode="markers",
customdata=vcf_subset,
showlegend=show_legend,
@@ -173,8 +153,7 @@ def create_vcf_plot(fig, row, vcf_df):
else:
y_data = [1] * len(vcf_df["position"])
# add lines
- fig.update_layout(
- shapes=[dict(
+ shapes = [dict(
type="line",
xref=f"x{row}",
yref=f"y{row}",
@@ -187,7 +166,12 @@ def create_vcf_plot(fig, row, vcf_df):
width=config.stem_width
)
) for x, y in zip(vcf_df["position"], y_data)]
- )
+ # plot shape in each subplot
+ if fig["layout"]["shapes"]:
+ for shape in shapes:
+ fig.add_shape(shape)
+ else:
+ fig.update_layout(shapes=shapes)
# not need to show yaxis if af is not in vcf
if "AF" not in vcf_df:
fig.update_yaxes(visible=False, row=row, col=1)
@@ -202,6 +186,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha):
:param feature_dict: all infos for tracks as dictionary
:param box_size: list of box sizes
:param box_alpha: list of box alpha values
+ :param subplot: subplot index of the plot
:return: updated figure
"""
# define colors
@@ -220,7 +205,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha):
if cycle == 2:
cycle = 0
# get various plot info
- positions = [int(x) for x in annotation.split(" ")]
+ positions = [feature_dict[feature][annotation]["start"], feature_dict[feature][annotation]["stop"]]
track = feature_dict[feature][annotation]["track"]
# define strand marker
if feature_dict[feature][annotation]["strand"] == "+":
@@ -272,7 +257,7 @@ def create_track_plot(fig, row, feature_dict, box_size, box_alpha):
line=dict(color=color_thes[cycle]),
showlegend=legend_vis,
hoverinfo='skip',
- name="",
+ name=f"plot {row}",
legendgroup=feature,
legendgrouptitle_text=feature,
),
diff --git a/example.gif b/example.gif
new file mode 100644
index 0000000..ed3a8ff
Binary files /dev/null and b/example.gif differ
diff --git a/example.png b/example.png
deleted file mode 100644
index f86fb26..0000000
Binary files a/example.png and /dev/null differ
diff --git a/setup.py b/setup.py
index 662bde4..4fdb4dd 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
"pysam>=0.21.0",
"biopython>=1.79"
],
- description='bamdash creates a interactive coverage dashboard optionally with vcf, bed, gb tracks',
+ description='create a interactive coverage plot dashboard from bam files and add gb, vcf and bed tracks',
url='https://github.com/jonas-fuchs/BAMdash',
author='Dr. Jonas Fuchs',
author_email='jonas.fuchs@uniklinik-freiburg.de',