From c675eb166b14cbecb8d88c01568ed53b6a2cf117 Mon Sep 17 00:00:00 2001 From: "Evan P. Troendle" Date: Thu, 5 Oct 2023 11:54:30 +0100 Subject: [PATCH 1/4] Add files via upload Update syntax compatible with nextflow v23.04.4 for illumina workflow --- modules/illumina.nf | 28 ++++++++++++++-------------- modules/qc.nf | 2 +- modules/upload.nf | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/illumina.nf b/modules/illumina.nf index 47727fc0..dc6b9593 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -12,10 +12,10 @@ process readTrimming { cpus 2 input: - tuple(sampleName, path(forward), path(reverse)) + tuple(val(sampleName), path(forward), path(reverse)) output: - tuple(sampleName, path("*_val_1.fq.gz"), path("*_val_2.fq.gz")) optional true + tuple(val(sampleName), path("*_val_1.fq.gz"), path("*_val_2.fq.gz")) optional true script: """ @@ -38,7 +38,7 @@ process indexReference { path(ref) output: - tuple path('ref.fa'), path('ref.fa.*') + tuple(path('ref.fa'), path('ref.fa.*')) script: """ @@ -62,10 +62,10 @@ process readMapping { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.sorted.bam", mode: 'copy' input: - tuple sampleName, path(forward), path(reverse), path(ref), path("*") + tuple(val(sampleName), path(forward), path(reverse), path(ref), path("*")) output: - tuple(sampleName, path("${sampleName}.sorted.bam")) + tuple(val(sampleName), path("${sampleName}.sorted.bam")) script: """ @@ -82,11 +82,11 @@ process trimPrimerSequences { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.mapped.primertrimmed.sorted.bam", mode: 'copy' input: - tuple sampleName, path(bam), path(bedfile) + tuple(val(sampleName), path(bam), path(bedfile)) output: - tuple sampleName, path("${sampleName}.mapped.bam"), emit: mapped - tuple sampleName, path("${sampleName}.mapped.primertrimmed.sorted.bam" ), emit: ptrim + tuple(val(sampleName), path("${sampleName}.mapped.bam"), emit: mapped) + tuple(val(sampleName), path("${sampleName}.mapped.primertrimmed.sorted.bam" ), emit: ptrim) script: if (params.allowNoprimer){ @@ -128,10 +128,10 @@ process callVariants { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.variants.tsv", mode: 'copy' input: - tuple(sampleName, path(bam), path(ref)) + tuple(val(sampleName), path(bam), path(ref)) output: - tuple sampleName, path("${sampleName}.variants.tsv"), emit: variants + tuple(val(sampleName), path("${sampleName}.variants.tsv"), emit: variants) script: """ @@ -147,10 +147,10 @@ process makeConsensus { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.primertrimmed.consensus.fa", mode: 'copy' input: - tuple(sampleName, path(bam)) + tuple(val(sampleName), path(bam)) output: - tuple(sampleName, path("${sampleName}.primertrimmed.consensus.fa")) + tuple(val(sampleName), path("${sampleName}.primertrimmed.consensus.fa")) script: """ @@ -169,10 +169,10 @@ process cramToFastq { */ input: - tuple sampleName, file(cram) + tuple(val(sampleName), file(cram)) output: - tuple sampleName, path("${sampleName}_1.fastq.gz"), path("${sampleName}_2.fastq.gz") + tuple(val(sampleName), path("${sampleName}_1.fastq.gz"), path("${sampleName}_2.fastq.gz")) script: """ diff --git a/modules/qc.nf b/modules/qc.nf index 923d9809..95cee31b 100644 --- a/modules/qc.nf +++ b/modules/qc.nf @@ -4,7 +4,7 @@ process makeQCCSV { publishDir "${params.outdir}/qc_plots", pattern: "${sampleName}.depth.png", mode: 'copy' input: - tuple sampleName, path(bam), path(fasta), path(ref) + tuple(val(sampleName), path(bam), path(fasta), path(ref)) output: path "${params.prefix}.${sampleName}.qc.csv", emit: csv diff --git a/modules/upload.nf b/modules/upload.nf index ae6a9c84..3aff534c 100644 --- a/modules/upload.nf +++ b/modules/upload.nf @@ -4,7 +4,7 @@ process collateSamples { publishDir "${params.outdir}/qc_pass_climb_upload/${params.prefix}", pattern: "${sampleName}", mode: 'copy' input: - tuple(sampleName, path(bam), path(fasta)) + tuple(val(sampleName), path(bam), path(fasta)) output: path("${sampleName}") From faf8ad038ee3eaefbdbbb5399f6d892ca6aa61b1 Mon Sep 17 00:00:00 2001 From: "Evan P. Troendle" Date: Thu, 5 Oct 2023 11:55:40 +0100 Subject: [PATCH 2/4] Update main.nf Update syntax compatible with nextflow version 23.04.4 --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 12d77382..da397967 100644 --- a/main.nf +++ b/main.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow // enable dsl2 -nextflow.preview.dsl = 2 +nextflow.enable.dsl = 2 // include modules include {printHelp} from './modules/help.nf' From bf3381f0fd7d0b36f56ac70be53989f5ebe0a61b Mon Sep 17 00:00:00 2001 From: "Evan P. Troendle" Date: Mon, 9 Oct 2023 17:34:02 +0100 Subject: [PATCH 3/4] Add pangolin and freyja into the illumina pipeline. TODO: add aggregation of pangolin and freyja outputs. --- conf/illumina.config | 2 +- environments/illumina/environment.yml | 20 +++++---- environments/illumina/environment.yml.bak | 18 ++++++++ modules/illumina.nf | 53 ++++++++++++++++++++++- workflows/illuminaNcov.nf | 11 ++++- 5 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 environments/illumina/environment.yml.bak diff --git a/conf/illumina.config b/conf/illumina.config index c88248bb..0abc206d 100644 --- a/conf/illumina.config +++ b/conf/illumina.config @@ -49,7 +49,7 @@ params { // iVar frequency threshold to call variant (ivar variants: -t ) ivarMinFreqThreshold = 0.25 - // iVar minimum mapQ to call variant (ivar variants: -q) + // iVar minimum baseQ to call variant (ivar variants: -q) ivarMinVariantQuality = 20 // Typing frequency threshold to call aa consequences of variant. Set to ivarFreqThreshold for consistency with consensus diff --git a/environments/illumina/environment.yml b/environments/illumina/environment.yml index 5f7d7ef9..548dd66b 100644 --- a/environments/illumina/environment.yml +++ b/environments/illumina/environment.yml @@ -4,15 +4,17 @@ channels: - bioconda - defaults dependencies: - - python=3 + - python - biopython=1.74 - libxcb - - matplotlib=3.3.3 - - pandas=0.23.0=py36_1 - - bwa=0.7.17=pl5.22.0_2 - - samtools=1.10 - - bcftools=1.10 - - trim-galore=0.6.5 - - ivar=1.3 + - matplotlib=3 + - pandas=2 + - bwa=0.7.17 + - samtools=1.18 + - bcftools=1.18 + - trim-galore=0.6.10 + - ivar=1.4.2 - pyvcf=0.6.8 - - pyyaml=5.3.1 + - pyyaml=6.0.1 + - freyja=1.4.7 + - pangolin=4.3 diff --git a/environments/illumina/environment.yml.bak b/environments/illumina/environment.yml.bak new file mode 100644 index 00000000..5f7d7ef9 --- /dev/null +++ b/environments/illumina/environment.yml.bak @@ -0,0 +1,18 @@ +name: artic-ncov2019-illumina +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3 + - biopython=1.74 + - libxcb + - matplotlib=3.3.3 + - pandas=0.23.0=py36_1 + - bwa=0.7.17=pl5.22.0_2 + - samtools=1.10 + - bcftools=1.10 + - trim-galore=0.6.5 + - ivar=1.3 + - pyvcf=0.6.8 + - pyyaml=5.3.1 diff --git a/modules/illumina.nf b/modules/illumina.nf index dc6b9593..8dde56f0 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -121,6 +121,22 @@ process trimPrimerSequences { """ } +process getDepths { + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.depths.tsv", mode: 'copy' + + input: + tuple(val(sampleName), path(bam), path(ref)) + + output: + tuple(val(sampleName), path("${sampleName}.depths.tsv"), emit: depths) + + script: + """ + samtools mpileup -aa -A -d 0 -Q 0 -q ${params.ivarMinVariantQuality} -B -f ${ref} ${bam} | cut -f1-4 > "${sampleName}.depths.tsv" + """ +} process callVariants { tag { sampleName } @@ -150,7 +166,7 @@ process makeConsensus { tuple(val(sampleName), path(bam)) output: - tuple(val(sampleName), path("${sampleName}.primertrimmed.consensus.fa")) + tuple(val(sampleName), path("${sampleName}.primertrimmed.consensus.fa"), emit: consensus) script: """ @@ -160,6 +176,41 @@ process makeConsensus { """ } +process callLineage { + + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.pangolin.csv", mode: 'copy' + + input: + tuple(val(sampleName), path(consensus)) + + output: + tuple(val(sampleName), path("${sampleName}.pangolin.csv")) + + script: + """ + pangolin ${consensus} --outfile ${sampleName}.pangolin.csv --verbose + """ +} + +process freyjaDemix { + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.freyja.demix.tsv", mode: 'copy' + + input: + tuple(val(sampleName), path(variants), path(depths)) + + output: + tuple(val(sampleName), path("${sampleName}.freyja.demix.tsv")) + + script: + """ + freyja demix ${variants} ${depths} --output ${sampleName}.freyja.demix.tsv + """ +} + process cramToFastq { /** * Converts CRAM to fastq (http://bio-bwa.sourceforge.net/) diff --git a/workflows/illuminaNcov.nf b/workflows/illuminaNcov.nf index dbc8fb5d..12fb2056 100644 --- a/workflows/illuminaNcov.nf +++ b/workflows/illuminaNcov.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow // enable dsl2 -nextflow.preview.dsl = 2 +nextflow.enable.dsl = 2 // import modules include {articDownloadScheme } from '../modules/artic.nf' @@ -9,8 +9,11 @@ include {readTrimming} from '../modules/illumina.nf' include {indexReference} from '../modules/illumina.nf' include {readMapping} from '../modules/illumina.nf' include {trimPrimerSequences} from '../modules/illumina.nf' +include {getDepths} from '../modules/illumina.nf' include {callVariants} from '../modules/illumina.nf' +include {freyjaDemix} from '../modules/illumina.nf' include {makeConsensus} from '../modules/illumina.nf' +include {callLineage} from '../modules/illumina.nf' include {cramToFastq} from '../modules/illumina.nf' include {makeQCCSV} from '../modules/qc.nf' @@ -95,8 +98,14 @@ workflow sequenceAnalysis { callVariants(trimPrimerSequences.out.ptrim.combine(ch_preparedRef.map{ it[0] })) + getDepths(trimPrimerSequences.out.ptrim.combine(ch_preparedRef.map{ it[0] })) + + freyjaDemix(callVariants.out.variants.join(getDepths.out.depths)) + makeConsensus(trimPrimerSequences.out.ptrim) + callLineage(makeConsensus.out.consensus) + makeQCCSV(trimPrimerSequences.out.ptrim.join(makeConsensus.out, by: 0) .combine(ch_preparedRef.map{ it[0] })) From c9f2986f6dd14f0f711ac246e4559a0eb3977ea9 Mon Sep 17 00:00:00 2001 From: "Evan P. Troendle" Date: Wed, 11 Oct 2023 08:41:23 +0100 Subject: [PATCH 4/4] Add error-handling strategies into illumina pipeline --- modules/illumina.nf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/illumina.nf b/modules/illumina.nf index 8dde56f0..87766949 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -143,6 +143,9 @@ process callVariants { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.variants.tsv", mode: 'copy' + errorStrategy { sleep(Math.pow(2, task.attempt) * 1 as long); return 'retry' } + maxRetries 9999999999 + input: tuple(val(sampleName), path(bam), path(ref)) @@ -182,6 +185,9 @@ process callLineage { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.pangolin.csv", mode: 'copy' + errorStrategy { sleep(Math.pow(2, task.attempt) * 1 as long); return 'retry' } + maxRetries -1 + input: tuple(val(sampleName), path(consensus)) @@ -190,7 +196,7 @@ process callLineage { script: """ - pangolin ${consensus} --outfile ${sampleName}.pangolin.csv --verbose + pangolin ${consensus} -t 1 --outfile ${sampleName}.pangolin.csv --verbose """ } @@ -199,6 +205,8 @@ process freyjaDemix { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.freyja.demix.tsv", mode: 'copy' + errorStrategy 'ignore' + input: tuple(val(sampleName), path(variants), path(depths))