From 1d7ff9345b5573fe9dd8080cba5bbd8e3d1c2dfa Mon Sep 17 00:00:00 2001 From: Delphine-L Date: Wed, 3 Jan 2024 14:02:20 +0100 Subject: [PATCH 1/7] addition of workflow and tests for mitogenome assembly --- .../Mitogenome-assembly-VGP0/.dockstore.yml | 12 + .../Mitogenome-Assembly-VGP0-tests.yml | 29 ++ .../Mitogenome-Assembly-VGP0.ga | 348 ++++++++++++++++++ .../test-data/contigs_stats.tsv | 3 + 4 files changed, 392 insertions(+) create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/.dockstore.yml create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/.dockstore.yml b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/.dockstore.yml new file mode 100644 index 000000000..5a461b516 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/.dockstore.yml @@ -0,0 +1,12 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /Mitogenome-Assembly-VGP0.ga + testParameterFiles: + - /Mitogenome-Assembly-VGP0-tests.yml + authors: + - name: VGP + url: https://vertebrategenomeproject.org + - name: Galaxy diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml new file mode 100644 index 000000000..d0c54faee --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml @@ -0,0 +1,29 @@ +- doc: Test outline for Mitogenome-Assembly-VGP0 + job: + Collection of Pacbio Data: + class: Collection + collection_type: list + elements: + - class: File + identifier: pacbio_01.fasta.gz + location: https://www.dropbox.com/scl/fi/jm4g9koi4wek8vj76xzfx/pacbio_01.fasta.gz?rlkey=993pysom6vndt0jx7kfx7lqu0&dl=1 + Species name (latin name): Theretra latreillii lucasii + Email adress: lariviere.delphine@gmail.com + outputs: + contigs_stats: + path: test-data/contigs_stats.tsv + compare: diff + mitogenome_coverage: + asserts: + has_size: + value: 19000 + delta: 2000 + mitogenome_annotation: + asserts: + has_size: + value: 68000 + delta: 5000 + mitogenome_genbank: + assert: + has_n_line: + n: 480 diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga new file mode 100644 index 000000000..bcbc24da7 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga @@ -0,0 +1,348 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "comments": [], + "creator": [ + { + "class": "Organization", + "name": "VGP", + "url": "https://vertebrategenomeproject.org" + }, + { + "class": "Organization", + "name": "Galaxy" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "version":"0.1", + "name": "Mitogenome-Assembly-VGP0", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Collection of Pacbio Data" + } + ], + "label": "Collection of Pacbio Data", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0.0, + "top": 0.0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": \"\", \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "861c3a49-1055-4030-9a91-e53cbf1ac436", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Latin name of species", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Latin name of species", + "name": "Species name (latin name)" + } + ], + "label": "Species name (latin name)", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 131.640625, + "top": 108.3515625 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "27257feb-04b0-48f9-8bd3-a3a74536eab2", + "when": null, + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "65196658-7776-4165-944b-c7876483edf4" + } + ] + }, + "2": { + "annotation": "Required for NCBI database query", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Required for NCBI database query", + "name": "Email adress" + } + ], + "label": "Email adress", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 200.78125, + "top": 290.66015625 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "4ddb385f-ecd7-4d48-bc7b-e9c7971a1e62", + "when": null, + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "28bbbdb1-62b7-4d3a-9076-09cc835464cc" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/mitohifi/mitohifi/3+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "operation_mode|email": { + "id": 2, + "output_name": "output" + }, + "operation_mode|species": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool MitoHiFi", + "name": "operation_mode" + }, + { + "description": "runtime parameter for tool MitoHiFi", + "name": "operation_mode" + } + ], + "label": null, + "name": "MitoHiFi", + "outputs": [ + { + "name": "fasta_reference", + "type": "fasta" + }, + { + "name": "genbank_reference", + "type": "genbank" + } + ], + "position": { + "left": 463.26953125, + "top": 242.234375 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/mitohifi/mitohifi/3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "99ddbf037d98", + "name": "mitohifi", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"operation_mode\": {\"command\": \"find_reference\", \"__current_case__\": 0, \"species\": {\"__class__\": \"ConnectedValue\"}, \"email\": {\"__class__\": \"ConnectedValue\"}, \"min_length\": \"15000\", \"exact_species\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3+galaxy0", + "type": "tool", + "uuid": "e59aea2d-9bc3-4e2e-9af2-711f0cdea373", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/mitohifi/mitohifi/3+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "operation_mode|input_option|input_reads": { + "id": 0, + "output_name": "output" + }, + "operation_mode|reference_fasta": { + "id": 3, + "output_name": "fasta_reference" + }, + "operation_mode|reference_genbank": { + "id": 3, + "output_name": "genbank_reference" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool MitoHiFi", + "name": "operation_mode" + }, + { + "description": "runtime parameter for tool MitoHiFi", + "name": "operation_mode" + } + ], + "label": null, + "name": "MitoHiFi", + "outputs": [ + { + "name": "mitogenome_fasta", + "type": "fasta" + }, + { + "name": "mitogenome_genbank", + "type": "genbank" + }, + { + "name": "mitogenome_annotation", + "type": "png" + }, + { + "name": "mitogenome_coverage", + "type": "png" + }, + { + "name": "contigs_stats", + "type": "tsv" + }, + { + "name": "hifiasm", + "type": "fasta" + }, + { + "name": "hifiasm_filtered", + "type": "fasta" + }, + { + "name": "hifiasm_contigs", + "type": "fasta" + } + ], + "position": { + "left": 767.6689707083681, + "top": 134.09873055610626 + }, + "post_job_actions": { + "HideDatasetActionhifiasm": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "hifiasm" + }, + "HideDatasetActionhifiasm_contigs": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "hifiasm_contigs" + }, + "HideDatasetActionhifiasm_filtered": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "hifiasm_filtered" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/mitohifi/mitohifi/3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "99ddbf037d98", + "name": "mitohifi", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"operation_mode\": {\"command\": \"mitohifi\", \"__current_case__\": 1, \"input_option\": {\"input\": \"pacbio\", \"__current_case__\": 0, \"input_reads\": {\"__class__\": \"ConnectedValue\"}, \"bloom_filter\": \"0\"}, \"reference_fasta\": {\"__class__\": \"ConnectedValue\"}, \"reference_genbank\": {\"__class__\": \"ConnectedValue\"}, \"genetic_code\": \"2\", \"advanced_options\": {\"query_blast\": \"70\", \"circular_size\": null, \"circular_offset\": null, \"outputs\": null}, \"output_zip\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3+galaxy0", + "type": "tool", + "uuid": "ccd83410-608a-4b9d-a43a-11cecd6c2d42", + "when": null, + "workflow_outputs": [ + { + "label": "mitogenome_annotation", + "output_name": "mitogenome_annotation", + "uuid": "85061673-df5e-480d-920a-a11c72bf8f56" + }, + { + "label": "mitogenome_genbank", + "output_name": "mitogenome_genbank", + "uuid": "fe4d1d3d-9caa-45f6-944e-c4b101105dd1" + }, + { + "label": "contigs_stats", + "output_name": "contigs_stats", + "uuid": "31807e4b-e616-4742-92e0-934e6c3c09ae" + }, + { + "label": "mitogenome_coverage", + "output_name": "mitogenome_coverage", + "uuid": "8393c665-a328-4698-93e5-198c6fea52dd" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/compress_file/compress_file/0.1.0", + "errors": null, + "id": 5, + "input_connections": { + "input": { + "id": 4, + "output_name": "mitogenome_fasta" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Compress file(s)", + "name": "input" + } + ], + "label": null, + "name": "Compress file(s)", + "outputs": [ + { + "name": "output_file", + "type": "gz" + } + ], + "position": { + "left": 1065.5131364872464, + "top": 265.5261773629027 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/compress_file/compress_file/0.1.0", + "tool_shed_repository": { + "changeset_revision": "09ea79f9f260", + "name": "compress_file", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.0", + "type": "tool", + "uuid": "5207adc9-f36a-42e9-8631-e01da77ac446", + "when": null, + "workflow_outputs": [ + { + "label": "output_file", + "output_name": "output_file", + "uuid": "6f918a08-1ca1-4d6e-a4d8-7e65e13a65ed" + } + ] + } + }, + "tags": [ + "Reviewed", + "VGP" + ], + "uuid": "57920bf9-1741-4170-badf-39fdfcdaa2e9", + "version": 4 +} \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv new file mode 100644 index 000000000..4d9feeaf9 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv @@ -0,0 +1,3 @@ +# Related mitogenome is 15354 bp long and has 37 genes +contig_id frameshifts_found annotation_file length(bp) number_of_genes was_circular +final_mitogenome ND5;ATP6;ND3;ND4;COX2;ND1;ND4L;ND2;COX1;COX3;CYTB final_mitogenome.gb 15316 36 True From add30b8814b0d9823522cc6183f54a4998ebdf6e Mon Sep 17 00:00:00 2001 From: Delphine-L Date: Wed, 3 Jan 2024 14:19:11 +0100 Subject: [PATCH 2/7] replace tests on contigs stats --- .../Mitogenome-Assembly-VGP0-tests.yml | 5 +++-- .../Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) delete mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml index d0c54faee..bb2a17005 100644 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml @@ -11,8 +11,9 @@ Email adress: lariviere.delphine@gmail.com outputs: contigs_stats: - path: test-data/contigs_stats.tsv - compare: diff + asserts: + has_text: + text: "15316 36 True" mitogenome_coverage: asserts: has_size: diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv deleted file mode 100644 index 4d9feeaf9..000000000 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/test-data/contigs_stats.tsv +++ /dev/null @@ -1,3 +0,0 @@ -# Related mitogenome is 15354 bp long and has 37 genes -contig_id frameshifts_found annotation_file length(bp) number_of_genes was_circular -final_mitogenome ND5;ATP6;ND3;ND4;COX2;ND1;ND4L;ND2;COX1;COX3;CYTB final_mitogenome.gb 15316 36 True From 06c6a2fbbd7c4c6e8bbd28b5530b4c41c7648613 Mon Sep 17 00:00:00 2001 From: Delphine-L Date: Wed, 3 Jan 2024 14:48:09 +0100 Subject: [PATCH 3/7] addition of changelog and README --- .../Mitogenome-assembly-VGP0/CHANGELOG.MD | 6 ++++++ .../Mitogenome-assembly-VGP0/README.MD | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD create mode 100644 workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/README.MD diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD new file mode 100644 index 000000000..4f536dc9c --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD @@ -0,0 +1,6 @@ +# Changelog + + +## [0.1] - 2024-01-03 + +- Creation of workflow for Mitochondrial DNA assebly with MitoHifi. \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/README.MD b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/README.MD new file mode 100644 index 000000000..ac0a1e232 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/README.MD @@ -0,0 +1,18 @@ +# Assembly of Mitochondrial DNA from PacBio HiFi reads + +Generate mitochondrial assembly based on PacBio Hifi Reads. + +## Inputs + +1. Hifi long reads [fastq] +2. Latin name of a related Species +3. Email adress required for NCBI database query + +## Outputs + +1. Contigs Statistics +2. Images : + 1. Mitogenome Coverage + 2. Mitogenome Annotation +3. Genbank file of the assembled mitogenome +4. Fasta file of the assembled mitogenome \ No newline at end of file From 1516a2b338769ef47e6fae3e283bfd0a53f11bdd Mon Sep 17 00:00:00 2001 From: Delphine-L Date: Wed, 3 Jan 2024 15:12:10 +0100 Subject: [PATCH 4/7] Replacing dropbox link with zenodo link --- .../Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml index bb2a17005..972ed0da3 100644 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml @@ -6,7 +6,7 @@ elements: - class: File identifier: pacbio_01.fasta.gz - location: https://www.dropbox.com/scl/fi/jm4g9koi4wek8vj76xzfx/pacbio_01.fasta.gz?rlkey=993pysom6vndt0jx7kfx7lqu0&dl=1 + location: https://zenodo.org/records/10454765/files/pacbio_01.fasta.gz?download=1 Species name (latin name): Theretra latreillii lucasii Email adress: lariviere.delphine@gmail.com outputs: From c868539fa2d29ec9a85c78f790a22cddde17aca7 Mon Sep 17 00:00:00 2001 From: Delphine Lariviere Date: Thu, 25 Jan 2024 10:09:41 -0500 Subject: [PATCH 5/7] Update workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD Co-authored-by: Marius van den Beek --- workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD index 4f536dc9c..04c1d03ae 100644 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/CHANGELOG.MD @@ -3,4 +3,4 @@ ## [0.1] - 2024-01-03 -- Creation of workflow for Mitochondrial DNA assebly with MitoHifi. \ No newline at end of file +- Creation of workflow for Mitochondrial DNA assembly with MitoHifi. \ No newline at end of file From 57bdb5e6e10a1e3e3e42228b25273f07002e405a Mon Sep 17 00:00:00 2001 From: Delphine Lariviere Date: Thu, 25 Jan 2024 11:46:37 -0500 Subject: [PATCH 6/7] using a galaxyproject email adress --- .../Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml index 972ed0da3..9ed2a53dd 100644 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0-tests.yml @@ -8,7 +8,7 @@ identifier: pacbio_01.fasta.gz location: https://zenodo.org/records/10454765/files/pacbio_01.fasta.gz?download=1 Species name (latin name): Theretra latreillii lucasii - Email adress: lariviere.delphine@gmail.com + Email adress: iwc@galaxyproject.org outputs: contigs_stats: asserts: From 942c350e29f53180d5ead14a9317fabc74215624 Mon Sep 17 00:00:00 2001 From: Marius van den Beek Date: Wed, 7 Feb 2024 16:09:03 +0100 Subject: [PATCH 7/7] Set the release version --- .../Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga index bcbc24da7..2830bcef7 100644 --- a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0.ga @@ -15,8 +15,8 @@ ], "format-version": "0.1", "license": "CC-BY-4.0", - "version":"0.1", "name": "Mitogenome-Assembly-VGP0", + "release": "0.1", "steps": { "0": { "annotation": "",