nf-core · vagkaratzas · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
@@ -27,4 +27,8 @@ params {
     skip_gtdbtk                 = true
     gtdbtk_min_completeness     = 0
     skip_concoct                = true
+
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
 }
diff --git a/docs/output.md b/docs/output.md
@@ -707,6 +707,9 @@ Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correc
 
 </details>
 
+The pipeline can also generate downstream pipeline input samplesheets.
+These are stored in `<outdir>/downstream_samplesheets`.
+
 ### MultiQC
 
 <details markdown="1">
@@ -751,3 +754,25 @@ Summary tool-specific plots and tables of following tools are currently displaye
 </details>
 
 [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
+
+### Downstream samplesheets
+
+The pipeline can also generate input files for the following downstream
+pipelines:
+
+- [nf-core/funcscan](https://nf-co.re/funcscan)
+- [nf-core/taxprofiler](https://nf-co.re/taxprofiler)
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `downstream_samplesheets/`
+  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembly FASTA files produced by MAG (MEGAHIT, SPAdes, SPAdesHybrid)
+  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler preprocessed short reads csv with paths to database directories or `.fast1.gz` relative to the results directory
+
+</details>
+
+:::warning
+Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
+They may not be complete (e.g. some columns may need to be manually filled in).
+:::
diff --git a/nextflow.config b/nextflow.config
@@ -194,6 +194,9 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets = false
+    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
 }
 
 // Load base.config by default for all pipelines

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -83,6 +83,25 @@
                 }
             }
         },
+        "generate_samplesheet_options": {
+            "title": "Downstream pipeline samplesheet generation options",
+            "type": "object",
+            "fa_icon": "fas fa-align-justify",
+            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
+            "properties": {
+                "generate_downstream_samplesheets": {
+                    "type": "boolean",
+                    "description": "Turn on generation of samplesheets for downstream pipelines.",
+                    "fa_icon": "fas fa-toggle-on"
+                },
+                "generate_pipeline_samplesheets": {
+                    "type": "string",
+                    "default": "funcscan,taxprofiler",
+                    "description": "Specify which pipeline to generate a samplesheet for.",
+                    "fa_icon": "fas fa-toolbox"
+                }
+            }
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -914,6 +933,9 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         },
+        {
+            "$ref": "#/definitions/generate_samplesheet_options"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -0,0 +1,86 @@
+//
+// Subworkflow with functionality specific to the nf-core/mag pipeline
+//
+
+workflow SAMPLESHEET_TAXPROFILER {
+    take:
+    ch_reads
+
+    main:
+    def fastq_rel_path = '/'
+        if (params.bbnorm) {
+            fastq_rel_path = '/bbmap/bbnorm/'
+        } else if (!params.keep_phix) {
+            fastq_rel_path = '/QC_shortreads/remove_phix/'
+        }
+        else if (params.host_fasta) {
+            fastq_rel_path = '/QC_shortreads/remove_host/'
+        }
+        else if (!params.skip_clipping) {
+            fastq_rel_path = '/QC_shortreads/fastp/'
+        }
+        ch_list_for_samplesheet = ch_reads
+            .map {
+                meta, fastq ->
+                    def sample              = meta.id
+                    def run_accession       = meta.id
+                    def instrument_platform = ""
+                    def fastq_1             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
+                    def fastq_2             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
+                    def fasta               = ""
+                [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
+            }
+            .tap{ ch_header }
+
+    ch_header
+        .first()
+        .map{ it.keySet().join(",") }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
+        .collectFile(
+            name:"${params.outdir}/downstream_samplesheet/taxprofiler.csv",
+            newLine: true,
+            sort: false
+        )
+}
+
+workflow SAMPLESHEET_FUNCSCAN {
+    take:
+    ch_assemblies
+
+    main:
+    ch_list_for_samplesheet = ch_assemblies
+                            .map {
+                                meta, filename ->
+                                    def sample = meta.id
+                                    def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
+                                [ sample: sample, fasta: fasta ]
+                            }
+                            .tap{ ch_header }
+
+    ch_header
+        .first()
+        .map{ it.keySet().join(",") }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
+        .collectFile(
+            name:"${params.outdir}/downstream_samplesheet/funcscan.csv",
+            newLine: true,
+            sort: false
+        )
+}
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    ch_reads
+    ch_assemblies
+
+    main:
+    def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
+
+    if ( downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads ) { // save_clipped_reads must be true
+        SAMPLESHEET_TAXPROFILER(ch_reads)
+    }
+
+    if ( downstreampipeline_names.contains('funcscan') ) {
+        SAMPLESHEET_FUNCSCAN(ch_assemblies)
+    }
+}
diff --git a/workflows/mag.nf b/workflows/mag.nf
@@ -13,17 +13,18 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { BINNING_PREPARATION             } from '../subworkflows/local/binning_preparation'
-include { BINNING                         } from '../subworkflows/local/binning'
-include { BINNING_REFINEMENT              } from '../subworkflows/local/binning_refinement'
-include { BUSCO_QC                        } from '../subworkflows/local/busco_qc'
-include { VIRUS_IDENTIFICATION            } from '../subworkflows/local/virus_identification'
-include { CHECKM_QC                       } from '../subworkflows/local/checkm_qc'
-include { GUNC_QC                         } from '../subworkflows/local/gunc_qc'
-include { GTDBTK                          } from '../subworkflows/local/gtdbtk'
-include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna'
-include { DOMAIN_CLASSIFICATION           } from '../subworkflows/local/domain_classification'
-include { DEPTHS                          } from '../subworkflows/local/depths'
+include { BINNING_PREPARATION              } from '../subworkflows/local/binning_preparation'
+include { BINNING                          } from '../subworkflows/local/binning'
+include { BINNING_REFINEMENT               } from '../subworkflows/local/binning_refinement'
+include { BUSCO_QC                         } from '../subworkflows/local/busco_qc'
+include { VIRUS_IDENTIFICATION             } from '../subworkflows/local/virus_identification'
+include { CHECKM_QC                        } from '../subworkflows/local/checkm_qc'
+include { GUNC_QC                          } from '../subworkflows/local/gunc_qc'
+include { GTDBTK                           } from '../subworkflows/local/gtdbtk'
+include { ANCIENT_DNA_ASSEMBLY_VALIDATION  } from '../subworkflows/local/ancient_dna'
+include { DOMAIN_CLASSIFICATION            } from '../subworkflows/local/domain_classification'
+include { DEPTHS                           } from '../subworkflows/local/depths'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
 
 //
 // MODULE: Installed directly from nf-core/modules
@@ -1002,6 +1003,13 @@ workflow MAG {
         }
     }
 
+    //
+    // Samplesheet generation
+    //
+    if ( params.generate_downstream_samplesheets ) {
+        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_short_reads_assembly, ch_assemblies )
+    }
+
     //
     // Collate and save software versions
     //