-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipeline.xml
executable file
·98 lines (92 loc) · 7.48 KB
/
pipeline.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
<?xml version="1.0" encoding="UTF-8"?>
<pipeline xmlns="http://www.sing-group.org/compi/pipeline-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<version>2.0.0-SNAPSHOT</version>
<params>
<param name="working_dir" shortName="working_dir" global="true">The executable of the HISAT2 index command.</param>
<param name="hisat2_index" shortName="hisat2_index" defaultValue="/opt/hisat2-2.0.5/hisat2-build" global="true">The executable of the HISAT2 index command.</param>
<param name="genome_index_image" shortName="genome_index_image" defaultValue="singgroup/dewe:1.3.0" global="true">The executable of the HISAT2 command.</param>
<param name="hisat2" shortName="hisat2" defaultValue="/opt/hisat2-2.0.5/hisat2" global="true">The executable of the HISAT2 command.</param>
<param name="alignment_image" shortName="alignment_image" defaultValue="singgroup/dewe:1.3.0" global="true">The executable of the HISAT2 command.</param>
<param name="samtools" shortName="samtools" defaultValue="/opt/samtools-1.3.1/samtools" global="true">The executable of the samtools command.</param>
<param name="samtools_image" shortName="samtools_image" defaultValue="singgroup/dewe:1.3.0" global="true">The executable of the HISAT2 command.</param>
<param name="htseq_count" shortName="htseq_count" defaultValue="htseq-count" global="true">The executable of the htseq-count command.</param>
<param name="htseq_count_image" shortName="htseq_count_image" defaultValue="pegi3s/htseq:0.12.3" global="true">The HTSeq Docker image</param>
<param name="delite_image" shortName="delite_image" defaultValue="bioinfolabcro/delite:v.1.2" global="true">The DElite Docker image</param>
<param name="sva_image" shortName="sva_image" defaultValue="pegi3s/r_sva:3.54.0" global="true">The R SVA Docker image</param>
<!-- User input files and directories -->
<param name="genome_fasta" shortName="genome_fasta" global="true">The reference genome.</param>
<param name="reference_annotation" shortName="reference_annotation" global="true">The path to the reference GTF file for the analysis.</param>
<param name="genome_index_dir" shortName="genome_index_dir" defaultValue="compi/hisat2-indexes" global="true">The directory where genome indexes should be created.</param>
<param name="genome_index" shortName="genome_index" defaultValue="genome_index" global="true">The name for the genome index.</param>
<param name="user_contrasts_file" shortName="user_contrasts_file" defaultValue="config/contrasts.tsv" global="true">The path to the contrasts file provided by the user (relative to the working directory).</param>
<param name="batch_correction" shortName="batch_correction" defaultValue="none" global="true">The factor (column in metadata) for batch correction (or "interaction" to use a combination of all of them). If "none", no batch correction is applied.</param>
<param name="samples_dir" shortName="samples_dir" defaultValue="samples" global="true">The directory containing the samples reads (relative to the working directory).</param>
<param name="metadata_file" shortName="metadata_file" defaultValue="metadata.tsv" global="true">The metadata file name (relative to the samples directory).</param>
<!-- Internal files and directories -->
<param name="samples_files_list" shortName="samples_files_list" defaultValue="compi/samples.txt" global="true">The file where the merged htseq-count results are stored.</param>
<param name="contrasts_file" shortName="contrasts_file" defaultValue="compi/contrasts.tsv" global="true">The path to the contrasts file provided by the user (relative to the working directory).</param>
<param name="samples_alignment_dir" shortName="samples_alignment_dir" defaultValue="compi/aligned-reads" global="true">The directory where aligned samples reads should be placed.</param>
<param name="samples_htseqcount_dir" shortName="samples_htseqcount_dir" defaultValue="compi/htseq-count" global="true">The directory where htseq-count results should be placed.</param>
<param name="dea_dir" shortName="deas_dir" defaultValue="compi/dea" global="true">The directory where DEA results should be placed.</param>
<param name="scripts_dir" shortName="sd" defaultValue="/scripts" global="true">Path of the directory containing the pipeline scripts.</param>
</params>
<tasks>
<task id="initialization" src="tasks/initialization.sh"></task>
<task id="genome-index" after="initialization">
${hisat2_index} ${working_dir}/${genome_fasta} ${working_dir}/${genome_index_dir}/${genome_index}
</task>
<foreach
id="alignment" after="genome-index"
of="command"
in="${scripts_dir}/list_samples.sh ${working_dir}/${samples_dir}/${metadata_file} ${working_dir}/${samples_files_list}"
as="sample_name"
>
${hisat2} --dta -x ${working_dir}/${genome_index_dir}/${genome_index} -1 ${working_dir}/${samples_dir}/${sample_name}_1.fastq.gz -2 ${working_dir}/${samples_dir}/${sample_name}_2.fastq.gz -S ${working_dir}/${samples_alignment_dir}/${sample_name}.sam
</foreach>
<foreach
id="samtools" after="*alignment"
of="command"
in="${scripts_dir}/list_samples.sh ${working_dir}/${samples_dir}/${metadata_file} ${working_dir}/${samples_files_list}"
as="sample_name"
>
${samtools} sort -o ${working_dir}/${samples_alignment_dir}/${sample_name}.bam ${working_dir}/${samples_alignment_dir}/${sample_name}.sam
</foreach>
<foreach
id="htseq-count" after="*samtools"
of="command"
in="${scripts_dir}/list_samples.sh ${working_dir}/${samples_dir}/${metadata_file} ${working_dir}/${samples_files_list}"
as="sample_name"
>
${htseq_count} --format bam --order pos --mode intersection-strict --stranded reverse --minaqual 1 --type exon --idattr gene_id ${working_dir}/${samples_alignment_dir}/${sample_name}.bam ${working_dir}/${reference_annotation} > ${working_dir}/${samples_htseqcount_dir}/${sample_name}.tsv
</foreach>
<task id="prepare-deas" after="htseq-count" src="tasks/prepare_deas.sh"></task>
<foreach id="batch-correction" after="prepare-deas" src="tasks/batch_correction.sh"
of="command"
in="ls ${working_dir}/${dea_dir}"
as="contrast_dir"
if="[[ ${batch_correction} != 'none' ]]"
></foreach>
<foreach id="delite" after="batch-correction"
of="command"
in="ls ${working_dir}/${dea_dir}"
as="contrast_dir"
>
reference=$(cat ${working_dir}/${dea_dir}/${contrast_dir}/reference.txt)
docker run --rm -v "${working_dir}/${dea_dir}/${contrast_dir}:/home/DElite" ${delite_image} \
counts_file=counts.tsv \
metadata_file=metadata.tsv \
condition=class \
reference_condition=${reference}
</foreach>
</tasks>
<metadata>
<task-description id="initialization">Initializes the directories for the pipeline outputs and pulls the Docker images.</task-description>
<task-description id="genome-index">Creates the genome index for the reference genome.</task-description>
<task-description id="alignment">Aligns each pair of FASTQ files using HISAT2.</task-description>
<task-description id="samtools">Converts the HISAT2 alignments into BAM format using samtools.</task-description>
<task-description id="htseq-count">Runs htseq-count to obtain the raw counts for subsequent analyses.</task-description>
<task-description id="prepare-deas">Creates one DEA folder for each specified contrast.</task-description>
<task-description id="batch-correction">Performs batch correction in each DEA folder.</task-description>
<task-description id="delite">Runs DElite in each DEA folder.</task-description>
</metadata>
</pipeline>