diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml deleted file mode 100644 index bd9f7bf..0000000 --- a/.github/workflows/download_pipeline.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Test successful pipeline download with 'nf-core download' - -# Run the workflow when: -# - dispatched manually -# - when a PR is opened or reopened to master branch -# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. -on: - workflow_dispatch: - inputs: - testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." - required: true - default: "dev" - pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - dev - pull_request_target: - branches: - - main - - dev - -env: - NXF_ANSI_LOG: false - -jobs: - download: - runs-on: ubuntu-latest - steps: - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 - with: - singularity-version: 3.8.3 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev - - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - - name: Download the pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./ - run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ - --compress "none" \ - --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ - --container-cache-utilisation 'amend' \ - --download-configuration - - - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Run the downloaded pipeline (stub) - id: stub_run_pipeline - continue-on-error: true - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - - name: Run the downloaded pipeline (stub run not supported) - id: run_pipeline - if: ${{ job.steps.stub_run_pipeline.status == failure() }} - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 177172b..19ddb83 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v3 - name: Install editorconfig-checker - run: npm install -g editorconfig-checker + run: npm install -g editorconfig-checker@3.0.2 - name: Run ECLint check run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') diff --git a/CHANGELOG.md b/CHANGELOG.md index ca9294f..087b1b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.3.1](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye (patch 1) - [2024-09-24] + +### Enhancements & fixes + +- Fixed bug in handling CRAM HiC inputs introduced in 1.1.0 +- Fixed bug in handling PacBio FASTQ inputs introduced in 1.3.0 + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `bbtools` | | 39.01 | +| `seqtk` | 1.4 | | + ## [[1.3.0](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye - [2024-08-23] ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 4a33c7c..c2313c7 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,6 +10,10 @@ ## Pipeline tools +- [BBTools](http://sourceforge.net/projects/bbmap/) + + > Bushnell B. BBTools software package. 2014. http://sourceforge.net/projects/bbmap/ + - [Blast](https://pubmed.ncbi.nlm.nih.gov/20003500/) > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PMID: 20003500; PMCID: PMC2803857. @@ -18,7 +22,7 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium. 2019 May;314–24. doi: 10.1109/IPDPS.2019.00041. -- [CRUMBLE] +- [CRUMBLE](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6330002/) > Bonfield JK, McCarthy SA, Durbin R. Crumble: reference free lossy compression of sequence quality values. Bioinformatics. 2019 Jan;35(2):337-339. doi: 10.1093/bioinformatics/bty608. PubMed PMID: 29992288; PMCID: PMC6330002. @@ -30,14 +34,10 @@ > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. -- [SeqKit] +- [SeqKit](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5051824/) > Shen W, Le S, Li Y, Hu F. SeqKit: A cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PLoS One. 2016 Oct 5;11(10):e0163962. doi: 10.1371/journal.pone.0163962. PubMed PMID: 27706213; PMCID: PMC5051824. -- [Seqtk] - - > Li H. Toolkit for processing sequences in FASTA/Q formats. GitHub Repository. 2012. https://github.com/lh3/seqtk. Accessed August 2024. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/LICENSE b/LICENSE index e238724..257404f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,8 @@ MIT License -Copyright (c) @priyanka-surana +Copyright (c) 2022-2024 Genome Research Ltd. +except `bin/filter_five_end.pl`: +Copyright (c) 2017 Arima Genomics, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/conf/base.config b/conf/base.config index cd54c75..2f1cef5 100644 --- a/conf/base.config +++ b/conf/base.config @@ -16,7 +16,7 @@ process { // pipeline to self-heal from MEMLIMIT/RUNLIMIT. // Default - cpus = 1 + cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 50.MB * task.attempt, 'memory' ) } time = { check_max( 30.min * task.attempt, 'time' ) } @@ -41,6 +41,11 @@ process { memory = { check_max( ((meta.datatype == "pacbio_clr" || meta.datatype == "ont") ? 2.GB : 1.GB) * task.attempt, 'memory' ) } } + // minimum 1GB memory + withName: 'BBMAP_FILTERBYNAME' { + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + } + withName: 'SAMTOOLS_COLLATETOFASTA' { cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) } memory = { check_max( 1.GB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } @@ -58,6 +63,12 @@ process { time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } } + withName: SAMTOOLS_ADDREPLACERG { + cpus = { log_increase_cpus(2, 6*task.attempt, 1, 2) } + memory = { check_max( 4.GB + 850.MB * log_increase_cpus(2, 6*task.attempt, 1, 2) * task.attempt + 0.6.GB * Math.ceil( meta.read_count / 100000000 ), 'memory' ) } + time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } + } + withName: BLAST_BLASTN { time = { check_max( 2.hour * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } memory = { check_max( 100.MB + 20.MB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } @@ -109,6 +120,11 @@ process { memory = { check_max( 1.GB * Math.ceil( 30 * fasta.size() / 1e+9 ) * task.attempt, 'memory' ) } } + withName: GENERATE_CRAM_CSV { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + withName: CRUMBLE { // No correlation between memory usage and the number of reads or the genome size. // Most genomes seem happy with 1 GB, then some with 2 GB, then some with 5 GB. diff --git a/conf/modules.config b/conf/modules.config index 4f84f9d..6244108 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -16,6 +16,10 @@ process { ext.args = '-F 0x200 -nt' } + withName: BBMAP_FILTERBYNAME { + ext.args = 'include=f' + } + withName: SAMTOOLS_MERGE { beforeScript = { "export REF_PATH=spoof"} ext.args = { "-c -p" } @@ -46,14 +50,6 @@ process { ext.args = "--output-fmt cram" } - withName: '.*:.*:ALIGN_HIC:BWAMEM2_MEM' { - ext.args = { "-5SPCp -R ${meta.read_group}" } - } - - withName: '.*:.*:ALIGN_ILLUMINA:BWAMEM2_MEM' { - ext.args = { "-p -R ${meta.read_group}" } - } - withName: ".*:ALIGN_ILLUMINA:.*:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" { ext.args = "" ext.args1 = { "-F 0x200 -nt" } @@ -95,16 +91,29 @@ process { // NOTE: minimap2 uses the decimal system ! 1G = 1,000,000,000 bp // NOTE: Math.ceil returns a double, but fortunately minimap2 accepts floating point values. // NOTE: minimap2 2.25 raises the default to 8G, which means higher memory savings on smaller genomes - withName: '.*:.*:ALIGN_HIFI:MINIMAP2_ALIGN' { - ext.args = { "-ax map-hifi --cs=short -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } + + withName: ".*:ALIGN_HIFI:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-hifi --cs=short -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } } - withName: '.*:.*:ALIGN_CLR:MINIMAP2_ALIGN' { - ext.args = { "-ax map-pb -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } + withName: ".*:ALIGN_CLR:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-pb -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } } - withName: '.*:.*:ALIGN_ONT:MINIMAP2_ALIGN' { - ext.args = { "-ax map-ont -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } + withName: ".*:ALIGN_ONT:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-ont -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } } withName: '.*:CONVERT_STATS:SAMTOOLS_CRAM' { diff --git a/modules.json b/modules.json index 8e24d3e..ea8b364 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/filterbyname": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "blast/blastn": { "branch": "master", "git_sha": "583edaf97c9373a20df05a3b7be5a6677f9cd719", @@ -91,11 +96,6 @@ "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", "installed_by": ["modules"] }, - "seqtk/subseq": { - "branch": "master", - "git_sha": "730f3aee80d5f8d0b5fc532202ac59361414d006", - "installed_by": ["modules"] - }, "untar": { "branch": "master", "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", diff --git a/modules/nf-core/bbmap/filterbyname/environment.yml b/modules/nf-core/bbmap/filterbyname/environment.yml new file mode 100644 index 0000000..dfd8936 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.01 diff --git a/modules/nf-core/bbmap/filterbyname/main.nf b/modules/nf-core/bbmap/filterbyname/main.nf new file mode 100644 index 0000000..7267908 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/main.nf @@ -0,0 +1,71 @@ +process BBMAP_FILTERBYNAME { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0': + 'biocontainers/bbmap:39.01--h5c4e2a8_0' }" + + input: + tuple val(meta), path(reads) + val(names_to_filter) + val(output_format) + val(interleaved_output) + + output: + tuple val(meta), path("*.${output_format}"), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}" + def output = (meta.single_end || interleaved_output) ? + "out=${prefix}.${output_format}" : + "out1=${prefix}_1.${output_format} out2=${prefix}_2.${output_format}" + def names_command = names_to_filter ? "names=${names_to_filter}": "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[filterbyname] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + filterbyname.sh \\ + -Xmx${avail_mem}g \\ + $input \\ + $output \\ + $names_command \\ + $args \\ + | tee ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def filtered = (meta.single_end || interleaved_output) ? + "echo '' | gzip > ${prefix}.${output_format}" : + "echo '' | gzip >${prefix}_1.${output_format} ; echo '' | gzip >${prefix}_2.${output_format}" + + """ + $filtered + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bbmap/filterbyname/meta.yml b/modules/nf-core/bbmap/filterbyname/meta.yml new file mode 100644 index 0000000..b7b8641 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/meta.yml @@ -0,0 +1,70 @@ +name: bbmap_filterbyname +description: Filter out sequences by sequence header name(s) +keywords: + - fastq + - fasta + - filter +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and + paired-end data, respectively. + - - names_to_filter: + type: string + description: | + String containing names of reads to filter out of the fastq files. + - - output_format: + type: string + description: | + String with the format of the output file, e.g. fastq.gz, fasta, fasta.bz2 + - - interleaved_output: + type: boolean + description: | + Whether to produce an interleaved fastq output file +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${output_format}": + type: file + description: The trimmed/modified fastq reads + pattern: "*${output_format}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: filterbyname.sh log file + pattern: "*.filterbyname.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tokarevvasily" + - "@sppearce" + +maintainers: + - "@sppearce" diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test new file mode 100644 index 0000000..17c7ea5 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test @@ -0,0 +1,218 @@ +nextflow_process { + + name "Test Process BBMAP_FILTERBYNAME" + script "../main.nf" + process "BBMAP_FILTERBYNAME" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/filterbyname" + + test("paired end fastq.bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.bz2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("single end fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fasta - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz filter") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap new file mode 100644 index 0000000..e06845a --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "single end fasta": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:54.50002639" + }, + "paired end fastq.bz2": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:31.368676493" + }, + "paired end fastq.bz2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "versions": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:42.854788269" + }, + "single end fastq.gz filter": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:24.280900344" + }, + "single end fastq.gz - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:43.274477064" + }, + "paired end fastq.gz filter interleaved - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:12:05.324554457" + }, + "single end fasta - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:13.161430777" + }, + "paired end fastq.gz filter interleaved": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:54.599067108" + } +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/tags.yml b/modules/nf-core/bbmap/filterbyname/tests/tags.yml new file mode 100644 index 0000000..707f910 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/tags.yml @@ -0,0 +1,2 @@ +bbmap/filterbyname: + - "modules/nf-core/bbmap/filterbyname/**" diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml deleted file mode 100644 index 7abe364..0000000 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: seqtk_subseq -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf deleted file mode 100644 index d5caebc..0000000 --- a/modules/nf-core/seqtk/subseq/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SEQTK_SUBSEQ { - tag "$sequences" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : - 'biocontainers/seqtk:1.4--he4a0461_1' }" - - input: - tuple val(meta), path(sequences) - path filter_list - - output: - tuple val(meta), path("*.gz"), emit: sequences - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - seqtk \\ - subseq \\ - $args \\ - $sequences \\ - $filter_list | \\ - gzip --no-name > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - echo "" | gzip > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml deleted file mode 100644 index de4a841..0000000 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: seqtk_subseq -description: Select only sequences that match the filtering condition -keywords: - - filtering - - selection - - fastx -tools: - - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format - homepage: https://github.com/lh3/seqtk - documentation: https://docs.csc.fi/apps/seqtk/ - tool_dev_url: https://github.com/lh3/seqtk - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq,fq.gz,fa,fa.gz}" - - filter_list: - type: file - description: BED file or a text file with a list of sequence names - pattern: "*.{bed,lst}" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq.gz,fa.gz}" -authors: - - "@sidorov-si" -maintainers: - - "@sidorov-si" diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test deleted file mode 100644 index fa8fad6..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -nextflow_process { - - name "Test Process SEQTK_SUBSEQ" - script "modules/nf-core/seqtk/subseq/main.nf" - process "SEQTK_SUBSEQ" - config "./standard.config" - - tag "modules" - tag "modules_nfcore" - tag "seqtk" - tag "seqtk/subseq" - - test("sarscov2_subseq_fa") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2_subseq_fa_stub") { - options "-stub" - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap deleted file mode 100644 index 75b3793..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap +++ /dev/null @@ -1,60 +0,0 @@ -{ - "sarscov2_subseq_fa": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:36.155954" - }, - "sarscov2_subseq_fa_stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:44.222329" - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config deleted file mode 100644 index e8d7dc3..0000000 --- a/modules/nf-core/seqtk/subseq/tests/standard.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: SEQTK_SUBSEQ { - ext.prefix = { ".filtered" } - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml deleted file mode 100644 index 74056ba..0000000 --- a/modules/nf-core/seqtk/subseq/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -seqtk/subseq: - - "modules/nf-core/seqtk/subseq/**" diff --git a/nextflow.config b/nextflow.config index d143247..536987e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -236,7 +236,7 @@ manifest { description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.3.0' + version = '1.3.1' doi = '10.5281/zenodo.6563577' } diff --git a/seq_cache_populate.pl b/seq_cache_populate.pl deleted file mode 100644 index e69de29..0000000 diff --git a/subworkflows/local/align_ont.nf b/subworkflows/local/align_ont.nf index ef1a021..751fcbc 100644 --- a/subworkflows/local/align_ont.nf +++ b/subworkflows/local/align_ont.nf @@ -2,7 +2,11 @@ // Align Nanopore read files against the genome // -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' +include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce' +include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' @@ -14,17 +18,54 @@ workflow ALIGN_ONT { main: ch_versions = Channel.empty() + ch_merged_bam = Channel.empty() + // Convert FASTQ to CRAM + CONVERT_CRAM ( reads, fasta ) + ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions ) - // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( reads, fasta, true, "bai", false, false ) - ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) + SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam ) + ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions ) + SAMTOOLS_ADDREPLACERG.out.cram + | set { ch_reads_cram } - // Collect all alignment output by sample name - MINIMAP2_ALIGN.out.bam + // Index the CRAM file + SAMTOOLS_INDEX ( ch_reads_cram ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions ) + + ch_reads_cram + | join ( SAMTOOLS_INDEX.out.crai ) + | set { ch_reads_cram_crai } + + + // + // MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT + // + GENERATE_CRAM_CSV( ch_reads_cram_crai ) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) + + // + // SUBWORKFLOW: mapping hic reads using minimap2 or bwamem2 + // + MINIMAP2_MAPREDUCE ( + fasta, + GENERATE_CRAM_CSV.out.csv + ) + ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions ) + ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam) + + + ch_merged_bam + | combine( ch_reads_cram_crai ) + | map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] } + | set { ch_merged_bam } + + + // Collect all BAM output by sample name + ch_merged_bam | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } - | groupTuple ( by: [0] ) + | groupTuple( by: [0] ) | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } | branch { meta, bams -> @@ -36,7 +77,7 @@ workflow ALIGN_ONT { // Merge, but only if there is more than 1 file SAMTOOLS_MERGE ( ch_bams.multi_bams, [ [], [] ], [ [], [] ] ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) // Convert merged BAM to CRAM and calculate indices and statistics diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf index f472a6c..59e039c 100644 --- a/subworkflows/local/align_pacbio.nf +++ b/subworkflows/local/align_pacbio.nf @@ -3,7 +3,11 @@ // include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio' -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' +include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce' +include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' @@ -16,21 +20,56 @@ workflow ALIGN_PACBIO { main: ch_versions = Channel.empty() - + ch_merged_bam = Channel.empty() // Filter BAM and output as FASTQ FILTER_PACBIO ( reads, db ) ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions ) + // Convert FASTQ to CRAM + CONVERT_CRAM ( FILTER_PACBIO.out.fastq, fasta ) + ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions ) + + SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam ) + ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions ) + + SAMTOOLS_ADDREPLACERG.out.cram + | set { ch_reads_cram } + + // Index the CRAM file + SAMTOOLS_INDEX ( ch_reads_cram ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions ) + + ch_reads_cram + | join ( SAMTOOLS_INDEX.out.crai ) + | set { ch_reads_cram_crai } + + + // + // MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT + // + GENERATE_CRAM_CSV( ch_reads_cram_crai ) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) + + // + // SUBWORKFLOW: mapping pacbio reads using minimap2 + // + MINIMAP2_MAPREDUCE ( + fasta, + GENERATE_CRAM_CSV.out.csv + ) + ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions ) + ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam) - // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, fasta, true, "bai", false, false ) - ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) + ch_merged_bam + | combine( ch_reads_cram_crai ) + | map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] } + | set { ch_merged_bam } - // Collect all alignment output by sample name - MINIMAP2_ALIGN.out.bam + // Collect all BAM output by sample name + ch_merged_bam | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } - | groupTuple ( by: [0] ) + | groupTuple( by: [0] ) | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } | branch { meta, bams -> @@ -42,7 +81,7 @@ workflow ALIGN_PACBIO { // Merge, but only if there is more than 1 file SAMTOOLS_MERGE ( ch_bams.multi_bams, [ [], [] ], [ [], [] ] ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) // Convert merged BAM to CRAM and calculate indices and statistics diff --git a/subworkflows/local/bwamem2_mapreduce.nf b/subworkflows/local/bwamem2_mapreduce.nf index 13711fb..572c9d5 100644 --- a/subworkflows/local/bwamem2_mapreduce.nf +++ b/subworkflows/local/bwamem2_mapreduce.nf @@ -76,7 +76,7 @@ workflow BWAMEM2_MAPREDUCE { fasta, [ [], [] ] ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) emit: diff --git a/subworkflows/local/filter_pacbio.nf b/subworkflows/local/filter_pacbio.nf index acb21fa..5edb338 100644 --- a/subworkflows/local/filter_pacbio.nf +++ b/subworkflows/local/filter_pacbio.nf @@ -9,7 +9,7 @@ include { BLAST_BLASTN } from '../../modules/nf-core/blast/ include { PACBIO_FILTER } from '../../modules/local/pacbio_filter' include { SAMTOOLS_FILTERTOFASTQ } from '../../modules/local/samtools_filtertofastq' include { SEQKIT_FQ2FA } from '../../modules/nf-core/seqkit/fq2fa' -include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq' +include { BBMAP_FILTERBYNAME } from '../../modules/nf-core/bbmap/filterbyname' workflow FILTER_PACBIO { @@ -67,7 +67,7 @@ workflow FILTER_PACBIO { ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions.first() ) - // Filter the BAM files and convert to FASTQ + // Filter the input BAM and output as interleaved FASTA SAMTOOLS_CONVERT.out.bam | join ( SAMTOOLS_CONVERT.out.csi ) | join ( PACBIO_FILTER.out.list ) @@ -81,7 +81,7 @@ workflow FILTER_PACBIO { ch_versions = ch_versions.mix ( SAMTOOLS_FILTERTOFASTQ.out.versions.first() ) - // Filter inputs provided as FASTQ + // Filter inputs provided as FASTQ and output as interleaved FASTQ ch_reads.fastq | join(PACBIO_FILTER.out.list) | multiMap { meta, fastq, list -> \ @@ -90,12 +90,12 @@ workflow FILTER_PACBIO { } | set { ch_reads_fastq } - SEQTK_SUBSEQ ( ch_reads_fastq.fastqs, ch_reads_fastq.lists ) - ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) + BBMAP_FILTERBYNAME ( ch_reads_fastq.fastqs, ch_reads_fastq.lists , "fastq", true) + ch_versions = ch_versions.mix ( BBMAP_FILTERBYNAME.out.versions.first() ) // Merge filtered outputs as ch_output_fastq - SEQTK_SUBSEQ.out.sequences + BBMAP_FILTERBYNAME.out.reads | concat ( SAMTOOLS_FILTERTOFASTQ.out.fastq ) | set { ch_filtered_fastq } diff --git a/subworkflows/local/minimap2_mapreduce.nf b/subworkflows/local/minimap2_mapreduce.nf index 35b5aae..7f6bb43 100644 --- a/subworkflows/local/minimap2_mapreduce.nf +++ b/subworkflows/local/minimap2_mapreduce.nf @@ -38,7 +38,8 @@ workflow MINIMAP2_MAPREDUCE { .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> tuple([ id: cram_id.id, - chunk_id: cram_id.id + "_" + cram_info[5] + chunk_id: cram_id.id + "_" + cram_info[5], + genome_size: ref_id.genome_size ], file(cram_info[0]), cram_info[1], @@ -92,7 +93,7 @@ workflow MINIMAP2_MAPREDUCE { fasta, [ [], [] ] ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) emit: diff --git a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf index 92485e0..67a8254 100644 --- a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf @@ -256,6 +256,7 @@ def toolCitationText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", + "BBtools (Buschnell 2014),", "blastn (Camacho et al. 2009),", "bwa-mem2 (Vasimuddin et al. 2019),", "Crumble (Bonfield et al. 2019),", @@ -270,6 +271,7 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "