Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update parameters and callsets #34

Closed
wants to merge 12 commits into from
18 changes: 11 additions & 7 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Lint workflow
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .
snakefile: workflow/Snakefile
Expand All @@ -40,42 +40,46 @@ jobs:
- uses: actions/checkout@v2

- name: Test workflow (tumor/normal)
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config-tumor_normal/config.yaml --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache"
show-disk-usage-on-error: true

- name: Test report
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config-tumor_normal/config.yaml --report report.zip"

- name: Test workflow (tumor/normal, ffpe)
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config-tumor_normal_ffpe/config.yaml --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp"
stagein: |
rm -rf .test/results
show-disk-usage-on-error: true

- name: Test workflow (tumor only)
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config-tumor_only/config.yaml --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp"
stagein: |
rm -rf .test/results
show-disk-usage-on-error: true

- name: Test workflow (tumor only, ffpe)
uses: snakemake/snakemake-github-action@v1.24.0
uses: snakemake/snakemake-github-action@v1.25.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config-tumor_only_ffpe/config.yaml --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp"
stagein: |
rm -rf .test/results
rm -rf .test/results
show-disk-usage-on-error: true
4 changes: 2 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ params:
# which might be misleading because all reads of an amplicon have the same start
# position, strand etc. (--omit-strand-bias, --omit-read-position-bias,
# --omit-softclip-bias, --omit-read-orientation-bias).
call: ""
call: "--omit-read-position-bias"
# Add extra arguments for varlociraptor preprocess. By default, we limit the depth to 200.
# Increase this value for panel sequencing!
preprocess: "--max-depth 200"
preprocess: "--max-depth 30000"
freebayes:
min_alternate_fraction: 0.01 # Reduce for calling variants with lower VAFs
2 changes: 1 addition & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module dna_seq_varlociraptor:
github(
"snakemake-workflows/dna-seq-varlociraptor",
path="workflow/Snakefile",
tag="v4.0.1",
tag="v5.1.0",
)
config:
config
Expand Down
28 changes: 18 additions & 10 deletions workflow/resources/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ref:
# Ensembl species name
species: homo_sapiens
# Ensembl release
release: 109
release: 110
# Genome build
build: GRCh38

Expand Down Expand Up @@ -108,7 +108,7 @@ calling:
novel: >-
not (ID and ID.startswith('rs'))
pathogenic_risk_factor_drug_response: >-
(not {'risk_factor', 'pathogenic', 'drug_response'}.isdisjoint(ANN['CLIN_SIG'])) and
(not {'risk_factor', 'likely_pathogenic','pathogenic', 'drug_response'}.isdisjoint(ANN['CLIN_SIG'])) and
(ANN['IMPACT'] in {'LOW', 'MODERATE', 'HIGH'})
potentially_pathogenic: >-
(ANN['IMPACT'] in {'LOW', 'MODERATE', 'HIGH'}) and (
Expand Down Expand Up @@ -377,12 +377,20 @@ annotations:
vep:
# Consider removing --everything if VEP is slow for you (e.g. for WGS),
# and think carefully about which annotations you need.
params: --everything --check_existing
plugins:
# Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
- LoFtool
- REVEL
candidate_calls:
params: --everything --check_existing
plugins:
# Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
- LoFtool
- REVEL
final_calls:
params: --everything --check_existing
plugins:
# Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
- LoFtool
- REVEL

params:
cutadapt: ""
Expand All @@ -397,9 +405,9 @@ params:
# which might be misleading because all reads of an amplicon have the sample start
# position, strand etc. (--omit-strand-bias, --omit-read-position-bias,
# --omit-softclip-bias, --omit-read-orientation-bias).
call: ""
call: "--omit-read-position-bias"
# Add extra arguments for varlociraptor preprocess. By default, we limit the depth to 200.
# Increase this value for panel sequencing!
preprocess: "--max-depth 200"
preprocess: "--max-depth 30000"
freebayes:
min_alternate_fraction: 0.05 # Reduce for calling variants with lower VAFs
28 changes: 17 additions & 11 deletions workflow/resources/config/scenario.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,20 @@ __definitions__:
samples = params.samples.set_index("alias")
if "ffpe" not in samples.columns:
samples["ffpe"] = pd.NA
- sex = samples.loc["tumor", "sex"]
- sex = samples.loc[["tumor"], "sex"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the brackets?

Copy link
Contributor Author

@FelixMoelder FelixMoelder Nov 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of groups with just one entry sample.loc["tumor", "sex"] will just return sex as a string.
But if there are multiple entries for a group sex will become a series.
In the previous implementation rendering the scenario only worked for groups with a single entry.
Changing sex to sample.loc[["tumor"], "sex"] will always return a series allowing to render single and multiple entries correctly.

Edit: In your other comment you mentioned that each alias should only occur once. So if we handle multiple panels by prefix this change probably also becomes unnecessary.

- |
if pd.isna(sex) or sex not in ["male", "female"]:
raise ValueError(f"Unsupported sex in sample sheet (also ensure that sample sheet is entirely tab separated): {sex}")
- is_ffpe = samples.loc["tumor", "ffpe"]
if pd.isna(sex).any() or len(sex.unique()) != 1 or sex.iloc[0] not in ["male", "female"]:
raise ValueError(f"Unsupported sex in sample sheet (also ensure that sample sheet is entirely tab separated): {sex}")
- is_ffpe = samples.loc[["tumor"], "ffpe"].all()
- |
if len(samples.loc[["tumor"], "ffpe"].unique()) != 1:
raise ValueError(f"All samples within a group must to be either ffpe or not.")
- |
if len(samples.loc[["tumor"], "purity"].unique()) != 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each alias should occur only once in a group. We should also check for that when validating the sample sheet. If there are two panels for a patient we could name the two tumors tumor_panelname1 and 2. the scenario could support that by looking for the prefix tumor.

raise ValueError(f"All samples within a group need to have the same purity.")
- |
def contamination():
return 1.0 - float(samples.loc["tumor", "purity"])
return 1.0 - float(samples.loc[["tumor"], "purity"].iloc[0])

species:
heterozygosity: 0.001
Expand All @@ -41,7 +47,7 @@ species:
?if "normal" in samples.index:
samples:
tumor:
sex: ?sex
sex: ?sex.iloc[0]
somatic-effective-mutation-rate: 1e-6
inheritance:
clonal:
Expand All @@ -51,7 +57,7 @@ species:
by: normal
fraction: ?contamination()
normal:
sex: ?sex
sex: ?sex.iloc[0]
somatic-effective-mutation-rate: 1e-6

events:
Expand All @@ -61,7 +67,7 @@ species:
loh_or_amplification: "normal:0.5 & tumor:[0.9,1.0["
germline: "(normal:0.5 & tumor:[0.0,0.9[) | (normal:1.0 & tumor:[0.0,1.0])"
?if is_ffpe:
ffpe_artifact: "($ffpe_subst) & tumor:]0.0,0.05["
ffpe_artifact: "normal:0.0 & (($ffpe_subst) & tumor:]0.0,0.05[)"
somatic_tumor_low: "normal:0.0 & ((($ffpe_subst) & tumor:]0.05,0.1[) | (!($ffpe_subst) & tumor:]0.0,0.1[))"
?else:
somatic_tumor_low: "normal:0.0 & tumor:]0.0,0.1["
Expand All @@ -71,19 +77,19 @@ species:
tumor:
resolution: 0.01
universe: "[0.0,1.0]"
sex: ?sex
sex: ?sex.iloc[0]
contamination:
by: normal
fraction: ?contamination()
normal:
universe: "0.0 | 0.5 | 1.0"
sex: ?sex
sex: ?sex.iloc[0]

events:
somatic_tumor_high: "normal:0.0 & tumor:[0.1,1.0]"
germline: "(normal:0.5 & tumor:0.5) | (normal:1.0 & tumor:1.0)"
?if is_ffpe:
ffpe_artifact: "($ffpe_subst) & tumor:]0.0,0.05["
ffpe_artifact: "normal:0.0 & (($ffpe_subst) & tumor:]0.0,0.05[)"
somatic_tumor_low: "normal:0.0 & ((($ffpe_subst) & tumor:]0.05,0.1[) | (!($ffpe_subst) & tumor:]0.0,0.1[))"
?else:
somatic_tumor_low: "normal:0.0 & tumor:]0.0,0.1["
Loading