Skip to content

Commit

Permalink
Version 0.1 (#1)
Browse files Browse the repository at this point in the history
* added buttons to command.py

* moved font and size to config

* white and dark theme buttons

* switched to toggle button

* some visual tweaks

* better gif

* added checks if ref id was found in data

* small fixes

* added file dump and show additional stats

* visual fixes and vcf annotation
  • Loading branch information
jonas-fuchs authored Oct 8, 2023
1 parent daf9c8c commit b9cc0b8
Show file tree
Hide file tree
Showing 10 changed files with 306 additions and 92 deletions.
21 changes: 17 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@

<img src="./bamdash.png" alt="bamdash" />

[![language](https://img.shields.io/badge/python-%3E3.9-green)](https://www.python.org/)
[![License: GPL v3](https://img.shields.io/github/license/jonas-fuchs/varvamp)](https://www.gnu.org/licenses/gpl-3.0)
![Static Badge](https://img.shields.io/badge/platform-linux_osx-blue)

## Overview

**BAMdash lets you create interactive coverage plots from your bam file with [`plotly`](https://plotly.com/)**
Expand All @@ -8,15 +13,19 @@
- **create** a interactive `html` for data exploration
- **create** a static image (`jpg`, `png`, `pdf`, `svg`) ready for publication
- **add** additional tracks (supported: `.vcf`, `.gb`, `.bed`)
- **annotate** tracks with coverage data and vcf with additional information if a `.gb` file is provided
- **export** annoated track data as tabular files (`.bed`, `.vcf`) or json (`.gb`)
- **developed** for viral genomics
- **customize** all plotting parameters

**Feel free to report any bugs or request new features as issues!**

## Example
<img src="./example.png" alt="example" />
<img src="./example.gif" alt="example" />

## Installation

### via pip (recommened, coming soon):
### via pip (recommened):
```shell
pip install bamdash
```
Expand Down Expand Up @@ -52,7 +61,7 @@ full usage:
-h, --help show this help message and exit
-b , --bam bam file location
-r , --reference reference id
-r , --reference seq reference id
-t [track_1 ...], --tracks [track_1 ...]
file location of tracks
-c 5, --coverage 5 minimum coverage
Expand All @@ -62,6 +71,7 @@ full usage:
export as png, jpg, pdf, svg
-d px px, --dimensions px px
width and height of the static image in px
--dump, --no-dump dump annotated track data (default: False)
-v, --version show program's version number and exit
```

Expand Down Expand Up @@ -96,7 +106,7 @@ average_line_width = 1

# track customize
track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank)
track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors)
track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors)
strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand
strand_marker_size = 8
strand_marker_line_width = 1
Expand All @@ -121,6 +131,9 @@ To apply these new settings just repeat the installation procedure in the BAMdas
pip install .
```

<a href="https://www.buymeacoffee.com/jofox" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>


---

**Important disclaimer:**
Expand Down
2 changes: 1 addition & 1 deletion bamdash/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""interactively visualize coverage and tracks"""
_program = "bamdash"
__version__ = "0.0"
__version__ = "0.1"
Binary file modified bamdash/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
175 changes: 136 additions & 39 deletions bamdash/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
import sys
import argparse
import math
import json

# LIBS
import plotly.io as pio
import pandas as pd
from plotly.subplots import make_subplots

# BAMDASH
Expand Down Expand Up @@ -39,7 +42,7 @@ def get_args(sysargs):
required=True,
type=str,
metavar=" ",
help="reference id"
help="seq reference id"
)
parser.add_argument(
"-t",
Expand Down Expand Up @@ -83,6 +86,12 @@ def get_args(sysargs):
nargs=2,
help="width and height of the static image in px"
)
parser.add_argument(
"--dump",
action=argparse.BooleanOptionalAction,
default=False,
help="dump annotated track data"
)
parser.add_argument(
"-v",
"--version",
Expand All @@ -102,21 +111,47 @@ def main(sysargs=sys.argv[1:]):
"""
# parse args
args = get_args(sysargs)
# define subplot number and track heights

# define subplot number, track heights and parse data
coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage)
track_heights = [1]
track_data = []
# extract data and check if ref was found
if args.tracks is not None:
number_of_tracks = len(args.tracks)+1
for track in args.tracks:
if track.endswith("vcf"):
track_heights = track_heights + [config.vcf_track_proportion]
vcf_data = [data.vcf_to_df(track, args.reference), "vcf"]
if vcf_data[0].empty:
print("WARNING: vcf data does not contain the seq reference id")
number_of_tracks -= 1
else:
track_heights = track_heights + [config.vcf_track_proportion]
track_data.append(vcf_data)
elif track.endswith("gb"):
track_heights = track_heights + [config.gb_track_proportion]
gb_dict, seq = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage)
if gb_dict:
track_heights = track_heights + [config.gb_track_proportion]
track_data.append([gb_dict, "gb", seq])
else:
print("WARNING: gb data does not contain the seq reference id")
number_of_tracks -= 1
elif track.endswith("bed"):
track_heights = track_heights + [config.bed_track_proportion]
bed_data = [data.bed_to_dict(track, coverage_df, args.reference, args.coverage), "bed"]
if bed_data[0]["bed annotations"]:
track_heights = track_heights + [config.bed_track_proportion]
track_data.append(bed_data)
else:
print("WARNING: bed data does not contain the seq reference id")
number_of_tracks -= 1
else:
sys.exit("one of the track types is not supported")
sys.exit("one of the track types is not supported (supported are *.vcf, *.bed and *.gb")
else:
number_of_tracks = 1

# annotate if one gb and vcfs are in tracks
track_data = data.annotate_vcfs_in_tracks(track_data)

# define layout
fig = make_subplots(
rows=number_of_tracks,
Expand All @@ -126,54 +161,96 @@ def main(sysargs=sys.argv[1:]):
vertical_spacing=config.plot_spacing,
)
# create coverage plot
coverage_df, title = data.bam_to_coverage_df(args.bam, args.reference, args.coverage)
plotting.create_coverage_plot(fig, 1, coverage_df)
# create track plots
if args.tracks is not None:
for index, track in enumerate(args.tracks):
if track_data:
for index, track in enumerate(track_data):
row = index+2
if track.endswith("vcf"):
vcf_df = data.vcf_to_df(track, args.reference)
plotting.create_vcf_plot(fig, row, vcf_df)
elif track.endswith("gb"):
gb_dict = data.genbank_to_dict(track, coverage_df, args.reference, args.coverage)
plotting.create_track_plot(fig, row, gb_dict, config.box_gb_size, config.box_gb_alpha)
elif track.endswith("bed"):
bed_dict = data.bed_to_dict(track, coverage_df, args.reference, args.coverage)
plotting.create_track_plot(fig, row, bed_dict, config.box_bed_size, config.box_bed_alpha)
if track[1] == "vcf":
plotting.create_vcf_plot(fig, row, track[0])
elif track[1] == "gb":
plotting.create_track_plot(fig, row, track[0], config.box_gb_size, config.box_gb_alpha)
elif track[1] == "bed":
plotting.create_track_plot(fig, row, track[0], config.box_bed_size, config.box_bed_alpha)

# define own templates
pio.templates["plotly_dark_custom"], pio.templates["plotly_white_custom"] = pio.templates["plotly_dark"], pio.templates["plotly_white"]
# change params
pio.templates["plotly_dark_custom"].update(
layout=dict(yaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"),
xaxis=dict(linecolor="white", tickcolor="white", zerolinecolor="rgb(17,17,17)"),
updatemenudefaults=dict(bgcolor="rgb(115, 115, 115)")
)
)
pio.templates["plotly_white_custom"].update(
layout=dict(yaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"),
xaxis=dict(linecolor="black", tickcolor="black", zerolinecolor="white"),
updatemenudefaults=dict(bgcolor="rgb(204, 204, 204)")
)
)

# global formatting
fig.update_layout(
plot_bgcolor="white",
template="plotly_white_custom",
hovermode="x unified",
title=dict(
text=title,
x=1,
font=dict(
family="Arial",
size=16,
color='#000000'
)
),
font=dict(
family="Arial",
size=16,
)
family=config.font,
size=config.font_size,
),
# Add buttons
updatemenus=[
dict(
type="buttons",
direction="left",
buttons=[
dict(
args=["yaxis.type", "linear"],
label="linear",
method="relayout"
),
dict(
args=["yaxis.type", "log"],
label="log",
method="relayout"
),
dict(args=[{"template": pio.templates["plotly_dark_custom"], "visible": True}],
label="dark",
method="relayout"),
dict(args=[{"template": pio.templates["plotly_white_custom"], "visible": True}],
label="light",
method="relayout"),
],
pad={"r": 10, "t": 1},
showactive=False,
xanchor="left",
y=1.15,
yanchor="top"
)
],
# add global stats as annotation
annotations=[
dict(text=title, y=1.14, yref="paper",
align="center", showarrow=False)
]
)
# global x axes
fig.update_xaxes(
mirror=False,
ticks="outside",
showline=True,
linecolor="black",
range=[0, max(coverage_df["position"])]
linewidth=1,
ticks="outside",
minor_ticks="outside",
range=[0, max(coverage_df["position"])],
showgrid=False,
)
# global y axis
fig.update_yaxes(
mirror=False,
ticks="outside",
showline=True,
linecolor="black"
linewidth=1,
ticks="outside",
minor_ticks="outside",
showgrid=False
)
# if a range slider is shown, do not display the xaxis title
# (will be shown underneath)
Expand All @@ -189,14 +266,34 @@ def main(sysargs=sys.argv[1:]):
else:
# last x axis
fig.update_xaxes(title_text="genome position", row=number_of_tracks, col=1)

# html export
fig.write_html(f"{args.reference}_plot.html")
# static image export
if args.export_static is not None:
# static image specific options
if config.show_log:
if config.show_log: # correct log layout
fig["layout"]["yaxis"]["type"] = "log"
fig["layout"]["yaxis"]["range"] = (0, math.log(fig["layout"]["yaxis"]["range"][1], 10))
fig.update_yaxes(dtick=1, row=1)
fig.update_layout(updatemenus=[dict(visible=False)])
fig.update_layout(updatemenus=[dict(visible=False)]) # no buttons
fig.update_layout(annotations=[dict(visible=False)]) # no annotations
# write static image
pio.kaleido.scope.mathjax = None # fix so no weird box is shown
fig.write_image(f"{args.reference}_plot.{args.export_static}", width=args.dimensions[0], height=args.dimensions[1])

# dump track data
vcf_track_count, bed_track_count, gb_track_count = 0, 0, 0
if args.dump and track_data:
for track in track_data:
if track[1] == "vcf":
track[0].to_csv(f"{args.reference}_vcf_data_{vcf_track_count}.tabular", sep="\t", header=True, index=False)
vcf_track_count += 1
elif track[1] == "bed":
bed_df = pd.DataFrame.from_dict(track[0]["bed annotations"], orient="index")
bed_df.drop("track", axis=1, inplace=True)
bed_df.to_csv(f"{args.reference}_bed_data_{bed_track_count}.tabular", sep="\t", header=True, index=False)
bed_track_count += 1
elif track[1] == "gb":
with open(f"{args.reference}_gb_data_{gb_track_count}.json", "w") as fp:
json.dump(track[0], fp)
gb_track_count += 1
10 changes: 6 additions & 4 deletions bamdash/scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,25 @@
gb_track_proportion = 0.5
bed_track_proportion = 0.2
plot_spacing = 0.05
font = "Arial"
font_size = 16

# coverage customize
coverage_fill_color = "rgba(255, 212, 135, 0.2)"
coverage_fill_color = "rgba(255, 212, 135, 0.4)"
coverage_line_color = "rgba(224, 168, 68, 1)"
average_line_color = "grey"
average_line_width = 1

# track customize
track_color_scheme = "agsunset" # for mutiple annotations tracks (genebank)
track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no name colors)
track_color_single = "rgb(145, 145, 145)" # for single tracks (any rgb value, but no named colors)
strand_types = ["triangle-right", "triangle-left", "diamond-wide"] # +, -, undefined strand
strand_marker_size = 8
strand_marker_line_width = 1
strand_marker_line_color = "rgba(0, 0, 0, 0.2)"
box_bed_alpha = [0.6, 0.6] # alpha values for boxes (bed)
box_bed_alpha = [0.7, 0.7] # alpha values for boxes (bed)
box_bed_size = [0.4, 0.4] # size values for boxes (bed)
box_gb_alpha = [0.6, 0.8] # alpha values for boxes (gb)
box_gb_alpha = [0.7, 0.8] # alpha values for boxes (gb)
box_gb_size = [0.4, 0.3] # size values for boxes (gb)

# variant customize
Expand Down
Loading

0 comments on commit b9cc0b8

Please sign in to comment.