From e02722ac531076e92d6b5e7a9ccc71205f620764 Mon Sep 17 00:00:00 2001 From: Peter Kruczkiewicz Date: Fri, 1 Nov 2024 16:49:16 -0500 Subject: [PATCH] fix: Illumina PE read headers for IRMA --- CHANGELOG.md | 4 ++++ modules/local/cat_illumina_fastq.nf | 21 +++++++++++++++++---- nextflow.config | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd218e9..67ca6fe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[3.5.3](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.5.3)] - 2024-11-01 + +This patch release fixes an issue ([#22](https://github.com/peterk87/nf-flu/issues/22)) with Illumina paired-end read analysis by IRMA producing empty consensus sequences when the forward and reverse reads do not contain "1:N:0:." or "2:N:0:." in the FASTQ header lines. + ## [[3.5.2](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.5.2)] - 2024-10-18 This patch release fixes a few issues when running the pipeline. diff --git a/modules/local/cat_illumina_fastq.nf b/modules/local/cat_illumina_fastq.nf index ff302283..9f93c406 100644 --- a/modules/local/cat_illumina_fastq.nf +++ b/modules/local/cat_illumina_fastq.nf @@ -59,34 +59,47 @@ process CAT_ILLUMINA_FASTQ { // append 1:N:0:. or 2:N:0:. to forward and reverse reads if "[12]:N:.*" // not present in the FASTQ header for compatability with IRMA assembly """ +function modify_fastq_header() { + local replacement="\$1" + awk -v repl="\$replacement" ' + NR % 4 == 1 { + # Only process the first line of each 4-line block + if (\$0 ~ /^@/ && \$0 !~ /[12]:N:.*/) { + sub(/\\s*\$/, " " repl ":N:0:."); # Append " :N:0:." + } + } + { print } + ' +} + touch ${prefix}_1.merged.fastq.gz touch ${prefix}_2.merged.fastq.gz if [[ ${read1.size} > 0 ]]; then cat ${read1.join(' ')} \\ - | perl -ne 'if (\$_ =~ /^@.* .*/ && !(\$_ =~ /^@.* [12]:N:.*/)){ chomp \$_; print "\$_ 1:N:0:.\n"; } else { print "\$_"; }' \\ + | modify_fastq_header 1 \\ | gzip -ck \\ >> ${prefix}_1.merged.fastq.gz fi if [[ ${read1gz.size} > 0 ]]; then zcat ${read1gz.join(' ')} \\ - | perl -ne 'if (\$_ =~ /^@.* .*/ && !(\$_ =~ /^@.* [12]:N:.*/)){ chomp \$_; print "\$_ 1:N:0:.\n"; } else { print "\$_"; }' \\ + | modify_fastq_header 1 \\ | gzip -ck \\ >> ${prefix}_1.merged.fastq.gz fi if [[ ${read2.size} > 0 ]]; then cat ${read2.join(' ')} \\ - | perl -ne 'if (\$_ =~ /^@.* .*/ && !(\$_ =~ /^@.* [12]:N:.*/)){ chomp \$_; print "\$_ 2:N:0:.\n"; } else { print "\$_"; }' \\ + | modify_fastq_header 2 \\ | gzip -ck \\ >> ${prefix}_2.merged.fastq.gz fi if [[ ${read2gz.size} > 0 ]]; then zcat ${read2gz.join(' ')} \\ - | perl -ne 'if (\$_ =~ /^@.* .*/ && !(\$_ =~ /^@.* [12]:N:.*/)){ chomp \$_; print "\$_ 2:N:0:.\n"; } else { print "\$_"; }' \\ + | modify_fastq_header 2 \\ | gzip -ck \\ >> ${prefix}_2.merged.fastq.gz fi diff --git a/nextflow.config b/nextflow.config index f0f540da..86a83143 100644 --- a/nextflow.config +++ b/nextflow.config @@ -155,7 +155,7 @@ manifest { description = 'Influenza A virus genome assembly pipeline' homePage = 'https://github.com/CFIA-NCFAD/nf-flu' author = 'Peter Kruczkiewicz, Hai Nguyen' - version = '3.5.2' + version = '3.5.3' nextflowVersion = '!>=22.10.1' mainScript = 'main.nf' doi = '10.5281/zenodo.13892044'