diff --git a/.gitignore b/.gitignore index 528813e..b1aba65 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,11 @@ example_command.sh *html todo.txt bin/__pycache__/ +.venv/ + +# Ignore all .pyc files +*.pyc +*/__pycache__/ +__pycache__ + + diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 38dece7..5540005 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,19 +6,15 @@ version: 2 # Set the version of Python and other tools you might need -# build: -# os: ubuntu-22.04 -# tools: -# python: "3.10" +build: + os: ubuntu-22.04 + tools: + python: "3.11" mkdocs: configuration: mkdocs.yml # Optionally declare the Python requirements required to build your docs python: - install: - - requirements: docs/requirements.txt - # system_packages: true - -# Build all formats -# formats: all \ No newline at end of file + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index 5919940..2c50186 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,59 @@ Additionally, the generated documentation also includes a table of contents for Overall, this repo helps improve the documentation process for pipelines and tools, making it easier for others to understand and use them. +## Requirements -# To build the container +Python 3.11.0 or higher is required to run this program. -Go to the env folder and ruh the script, -`bash get_containers.sh` +## Installation -*Note: Creating containers requires sudo permission* +To install the required packages, run the following command: -# To test +`pip install -r env/requirements.txt` -- Clone the repo to your local system. -- run the command like this, -`bash bin/runMe.sh -e` -- run the example command like given below, -`bash example_command.sh` +## Usage + +The pipeline documentation can be generated using the following command: + +`python main.py -h` + +To create an example command, use the following command: + +`python main.py -e` + +## Pipeline YAML Structure + +The pipeline.yaml file contains all the relevant information about the pipeline or tool, including the description, inputs, outputs, parameters, and usage. This information is used to generate the MD documents, which provide clear and detailed information about the pipeline or tool. + +The pipeline.yaml file has the following structure: + +``` +sections: # list of sections + - name: + headings: # list of headings (currently only two levels of headings are supported) + - name: # name of the heading + type: text # data tyoe + content: | # content of the heading + The program aims to simplify the process of creating and maintaining documentation for software projects using MkDocs and Read the Docs. +``` + +Each section has a name and a list of headings. Each heading has a name, type, and content. The name is the name of the heading, the type is the data type, and the content is the content of the heading. The content can be plain text or a list of items. + +The following data types are supported: + +- text: printed as is +- list: printed as a list +- dictionary: printed as a key value pair +- dict: printed as a key value pair +- image: printed as an image +- table: printed as a table (can be used to display dict as a table) +- file: printed as a table (can be used for text files) +- versions: printed as a table (can be used for versions.yaml file) + +## Examples + +Example pipeline.yaml can be found at + +``` +static/templates/pipeline.yaml +``` diff --git a/bin/formatted_tool_desc.txt b/bin/formatted_tool_desc.txt deleted file mode 100644 index 1682bbd..0000000 --- a/bin/formatted_tool_desc.txt +++ /dev/null @@ -1,18 +0,0 @@ -Process Tool Version Type Container_Name Description Contact_Person -bwa_umi Sentieon BWA v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - - Sentieon UMI v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - - Sentieon UTIL v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - - SAMtools v1.9 Open-Source SomaticPanelPipeline_2021-06-24.sif "Tools (written in C using htslib) for manipulating next-generation sequencing data" https://github.com/samtools/samtools/issues -bwa_align Sentieon BWA-MEM v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - - BWA-MEM v0.7.17-r1188 Open-Source SomaticPanelPipeline_2021-06-24.sif "Read alignment and mapping" https://github.com/lh3/bwa/issues -markdup Sentieon DRIVER v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - -lowcov Sambamba v0.8.0 Open-Source SomaticPanelPipeline_2021-06-24.sif "Tools for working with SAM/BAM data" https://github.com/biod/sambamba/issues -freebayes freebayes v1.6.0 Open-Source SomaticPanelPipeline_2021-06-24.sif "Bayesian haplotype-based polymorphism discovery." https://github.com/freebayes/freebayes/issues -vardict VarDict v Open-Source SomaticPanelPipeline_2021-06-24.sif "VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants." https://github.com/AstraZeneca-NGS/VarDict/issues -tnscope Sentieon DRIVER v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." - -pindel Pindel v0.2.5b9, 20160729 Open-Source SomaticPanelPipeline_2021-06-24.sif "Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads." kaiye@xjtu.edu.cn -concatenate_vcfs VCFtools v0.1.16 Open-Source SomaticPanelPipeline_2021-06-24.sif "A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project." https://github.com/vcftools/vcftools/issues - Vt v0.5 Open-Source SomaticPanelPipeline_2021-06-24.sif "vt is a variant tool set that discovers short variants from Next Generation Sequencing data." Adiran (atks@umich.edu) -cnvkit CNVkit v0.9.9 Open-Source SomaticPanelPipeline_2021-06-24.sif "Genome-wide copy number from high-throughput sequencing" https://github.com/etal/cnvkit/issues -gene_plot CNVkit v0.9.5 Open-Source SomaticPanelPipeline_2021-06-24.sif "Genome-wide copy number from high-throughput sequencing" https://github.com/etal/cnvkit/issues -annotate_vep VEP v103.1 Open-Source ensembl-vep_release_103.sif "The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants" https://github.com/Ensembl/ensembl-vep/issues diff --git a/bin/functions.R b/bin/functions.R deleted file mode 100644 index e696f98..0000000 --- a/bin/functions.R +++ /dev/null @@ -1,27 +0,0 @@ -#camel <- function(x){ #function for camel case -# str_to_title((paste(unlist(str_split(x, '_')), collapse=' '))) -#} - - -camel <- function(x) { - words <- unlist(str_split(x, '_')) - new_words <- str_to_title(paste(words, collapse = ' ')) -} - -round_df <- function(df, digits) { - nums <- vapply(df, is.numeric, FUN.VALUE = logical(1)) - - df[,nums] <- round(df[,nums], digits = digits) - - return(df) -} - -highlight_text <- function(x, color,weight="normal",type="normal") { - sprintf("%s", color,weight,type,x) -} - -get_unique_keys <- function(data) {#function to get unique keys from the data in yaml dict - unique_keys <- unique(gsub("\\..*","",colnames(as.data.frame(data)))) - return(unique_keys) -} - diff --git a/bin/info.Rmd b/bin/info.Rmd deleted file mode 100644 index 4382839..0000000 --- a/bin/info.Rmd +++ /dev/null @@ -1,16 +0,0 @@ - -# Pipeline Info - -```{r info, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -info_df = as.data.frame(t(data.frame(params$pipeline$info))) -#new_variables=c() -#for (i in rownames(info_df)) { -# new_variables=c(new_variables,camel(i)) -#} -info_df = cbind(rownames(info_df),info_df) -colnames(info_df) = c('Info','Description') -info_df$Info <- sapply(info_df$Info, camel) -info_df$Info = paste('**',info_df$Info,'**',sep='') -kable(info_df,booktabs=TRUE, escape=FALSE,row.names=FALSE) %>% - kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),full_width = F, position = "center") -``` diff --git a/bin/input_data.Rmd b/bin/input_data.Rmd deleted file mode 100644 index 8151b04..0000000 --- a/bin/input_data.Rmd +++ /dev/null @@ -1,38 +0,0 @@ -# Input Data - -```{r Input Data, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -id_keys=get_unique_keys(params$pipeline$input_data) - -for (id_element in id_keys) { - - if (id_element == 'input_desc') { - - cat(params$pipeline$input_data$input_desc, sep='\n') - - } else if(id_element == 'input_csv') { - - cat("Table: Example input data csv file format", sep='\n') - cat('\n') - input_csv_df = read.table(file = params$pipeline$input_data$input_csv, sep = ',', header = TRUE, stringsAsFactors = FALSE, check.names=F) - print(kable(input_csv_df,booktabs=TRUE, escape=FALSE,row.names=FALSE) %>% - kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),full_width = F, position = "center")) - - } else if(id_element == 'column_descriptions'){ - - cd_df = data.frame(t(data.frame(params$pipeline$input_data$column_descriptions))) - cd_df = cbind(rownames(cd_df),cd_df) - colnames(cd_df) = c('Column Name','Description') - cat("Table: Column description for input data csv file", sep='\n') - cat('\n') - print(kable(cd_df,booktabs=TRUE, escape=FALSE,row.names=FALSE) %>% - kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),full_width = F, position = "center")) - - } else { - - new_id_element_name = camel(id_element) - cat(paste('## ', new_id_element_name, sep=''), sep = '\n') - cat('\n') - cat(params$pipeline$input_data[[id_element]], sep = '\n') - } -} -``` \ No newline at end of file diff --git a/bin/introduction.Rmd b/bin/introduction.Rmd deleted file mode 100644 index 2a6a383..0000000 --- a/bin/introduction.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -# Introduction - -`r params$pipeline$introduction` diff --git a/bin/main.Rmd b/bin/main.Rmd deleted file mode 100644 index c5b9084..0000000 --- a/bin/main.Rmd +++ /dev/null @@ -1,76 +0,0 @@ ---- -title: | - ![](`r params$pipeline$logo`){width=0.5in} - `r params$pipeline$info$name` -output: - rmdformats::readthedown: - toc_depth: 5 - toc_float: true - highlight: tango - fig_width: 7 - fig_height: 6 - fig_caption: true - keep_md: true - thumbnails: true - lightbox: false - gallery: false - url: blue - number_sections: true -author: '`r params$pipeline$info$author`' -link-citations: true -date: '`r Sys.Date()`' ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -knitr::opts_knit$set(root.dir = paste(params$pipeline$info$server_location,'tmp',sep='/')) -library(knitr) -library(rmarkdown) -library(kableExtra) -library(DT) -library(dplyr) -library(htmltools) -library(stringr) -source(paste(root,'bin/functions.R', sep='/')) -``` - - -```{r,echo = FALSE} -table_no = 1 -figure_no = 1 -``` - -```{r var, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -params_keys=get_unique_keys(params$pipeline) -child_rmds = c() - -for (key in params_keys) { - - if (key == 'logo') { - next - } else { - if (params$pipeline[key] != '') { - child_rmds = append(child_rmds, paste(key,'.Rmd', sep='')) - } - } -} - -``` - -```{r child documents, message=FALSE, warning=FALSE, echo=FALSE, results='asis'} - -for (i in 1:length(child_rmds)) { - - child_env <- new.env() - - child_res <- knitr::knit_child(child_rmds[i], envir = child_env, quiet = TRUE) - - cat(child_res, sep = '\n') - child_md_name=gsub('.*/', '', child_rmds[i]) - child_md_name=gsub('Rmd$', 'md', child_md_name) - child_md_out = paste(params$pipeline$info$server_location,'/tmp/',child_md_name,sep='') - cat(child_res, file=child_md_out, sep = '\n') -} -``` - - diff --git a/bin/map_software_stack.py b/bin/map_software_stack.py deleted file mode 100644 index 436e9e3..0000000 --- a/bin/map_software_stack.py +++ /dev/null @@ -1,108 +0,0 @@ -import sys -import os -import yaml -import argparse - -# Add arguments -parser = argparse.ArgumentParser(description='Description of your script') - -parser.add_argument('-i', '--input_versions_yaml', type=str, help='Tool versions file in yaml format') -parser.add_argument('-td', '--tool_descriptons', type=str, help='Tool Description Mapping file') -parser.add_argument('-o', '--output', type=str, help='output file name') - -args = parser.parse_args() - - -# Load the YAML data -def load_yaml(version_yaml): - with open(version_yaml, 'r') as versions_data: - versions_dict = yaml.load(versions_data, Loader=yaml.SafeLoader) - - return versions_dict - -#prepare the mapping dict -def read_tool_descriptions(description_file): - # Initialize an empty dictionary to store the column data - data_dict = {} - - # Open the file in read mode - with open(description_file, 'r') as file: - # Read the first line of the file (the header) - header = file.readline().strip() - - # Split the header into a list of column names - columns = header.split('\t') - - # Initialize an empty list for each column in the header - for column in columns: - data_dict[column] = [] - - # Loop over the remaining lines in the file - for line in file: - # Split the line into a list of values - values = line.strip().split('\t') - - # Loop over each column in the header and append the corresponding value - for i, column in enumerate(columns): - data_dict[column].append(values[i]) - - # Return the dictionary of column data - return data_dict - - -versions_dict = load_yaml(args.input_versions_yaml) -descriptions_dict = read_tool_descriptions(args.tool_descriptons) - - -""" -{ - 'bwa_umi': - { - 'Sentieon BWA': - { - 'version': 'v202010.01', - 'container': 'SomaticPanelPipeline_2021-06-24.sif' - }, - 'Sentieon UMI': - { - 'version': 'v202010.01', - 'container': 'SomaticPanelPipeline_2021-06-24.sif' - } - }, - 'annotate_vep': - { - 'VEP': - { - 'version': 'v103.1', - 'container': 'ensembl-vep_release_103.sif' - }, - } -} - -""" - -if os.path.isfile(args.output): - os.remove(args.output) - -def map_tool_descriptions(version_dict,mapping_dict,output_file): - - with open(output_file, 'a+') as out_handle: - out_handle.write(f"Process\tTool\tVersion\tType\tContainer_Name\tDescription\tContact_Person\n") - - for process in version_dict: - out_var = process - for tool in version_dict[process]: - - v = version_dict[process][tool]['version'] - if str(v).startswith('v') or str(v).startswith('V'): - version = v - else: - version = f"v{v}" - - mapping_index = mapping_dict['Tool'].index(tool) - out_var += f"\t{tool}\t{version}\t{mapping_dict['Type'][mapping_index]}\t{version_dict[process][tool]['container']}\t{mapping_dict['Short_Description'][mapping_index]}\t{mapping_dict['External_Contact_Person'][mapping_index]}\n" - - out_handle.write(f"{out_var}") - - -map_tool_descriptions(versions_dict,descriptions_dict,args.output) diff --git a/bin/min_requirements.Rmd b/bin/min_requirements.Rmd deleted file mode 100644 index d1c1c12..0000000 --- a/bin/min_requirements.Rmd +++ /dev/null @@ -1,12 +0,0 @@ -# Minimum Requirements - -```{r Minimum Requirements, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} - -mr_keys=get_unique_keys(params$pipeline$min_requirements) - -for (mr_element in mr_keys) { - new_mr_element_name = camel(mr_element) - cat(paste('**', new_mr_element_name, '**: ', params$pipeline$min_requirements[[mr_element]], sep=''), sep = '\n') - cat('\n') -} -``` diff --git a/bin/output_data.Rmd b/bin/output_data.Rmd deleted file mode 100644 index 68ac09e..0000000 --- a/bin/output_data.Rmd +++ /dev/null @@ -1,30 +0,0 @@ -# Output Data - -```{r Output Data, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -od_keys=get_unique_keys(params$pipeline$output_data) - -for (od_element in od_keys) { - - if (od_element == 'output_desc') { - - cat(params$pipeline$output_data$output_desc, sep='\n') - - } else if(od_element == 'output_files'){ - - cd_df = data.frame(t(data.frame(params$pipeline$output_data$output_files))) - cd_df = cbind(rownames(cd_df),cd_df) - colnames(cd_df) = c('Output File Type','Description') - cat('Table: Output files and their description', sep='\n') - cat('\n') - print(kable(cd_df,booktabs=TRUE, escape=FALSE,row.names=FALSE) %>% - kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),full_width = F, position = "center")) - - } else { - - new_od_element_name = camel(od_element) - cat(paste('## ', new_od_element_name, sep=''), sep = '\n') - cat('\n') - cat(params$pipeline$output_data[[od_element]], sep = '\n') - } -} -``` \ No newline at end of file diff --git a/bin/pipeline_components.Rmd b/bin/pipeline_components.Rmd deleted file mode 100644 index 6121bfd..0000000 --- a/bin/pipeline_components.Rmd +++ /dev/null @@ -1,22 +0,0 @@ -# Pipeline Components - -```{r Pipeline Components, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -pc_keys=get_unique_keys(params$pipeline$pipeline_components) - -for (pc_element in pc_keys) { - - if (pc_element == 'pipeline_components_desc') { - - cat(params$pipeline$pipeline_components$pipeline_components_desc, sep='\n') - cat('\n') - - } else { - - new_pc_element_name = camel(pc_element) - cat(paste('## ', new_pc_element_name, sep=''), sep = '\n') - cat('\n') - cat(params$pipeline$pipeline_components[[pc_element]], sep = '\n') - - } -} -``` \ No newline at end of file diff --git a/bin/profiles.Rmd b/bin/profiles.Rmd deleted file mode 100644 index b2f9e1b..0000000 --- a/bin/profiles.Rmd +++ /dev/null @@ -1,14 +0,0 @@ -# Profiles - -```{r profiles, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -profiles=get_unique_keys(params$pipeline$profiles) -res <- vector(mode = "list", length = length(profiles)) - - -for (i in 1:length(profiles)) { - profile = profiles[i] - res[i] <- knitr::knit_child('profiles_child.Rmd', quiet = TRUE, envir = environment()) -} - -cat(unlist(res), sep = '\n') -``` \ No newline at end of file diff --git a/bin/profiles_child.Rmd b/bin/profiles_child.Rmd deleted file mode 100644 index 5203e3d..0000000 --- a/bin/profiles_child.Rmd +++ /dev/null @@ -1,24 +0,0 @@ -## `r profile` - -**Profile Name:** `r params$pipeline$profiles[[profile]]$profile_name` - -### Description - -`r params$pipeline$profiles[[profile]]$profile_description` - -### Usage - -`r params$pipeline$profiles[[profile]]$profile_usage` - - -### Validation Data - -To validate the pipeline, please access the validation data stored on the server using the path provided below, - -**`r params$pipeline$profiles[[profile]]$profile_validatation_data_path`** - -### Test Data - -To test the pipeline, please access the downsampled test data stored on the server using the path provided below, - -**`r params$pipeline$profiles[[profile]]$profile_test_data_path`** \ No newline at end of file diff --git a/bin/runMe.sh b/bin/runMe.sh deleted file mode 100644 index a122665..0000000 --- a/bin/runMe.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/bash - -################################################################################ -version=$(cat $(dirname $0)/version.txt) -author='Ram Sai Nanduri' -git_repo='https://github.com/ramsainanduri/pipeline_documentation.git' -pd_path=$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P |sed 's/\/bin$//g') -example_command="bash $(realpath $0) -g ${git_repo} -o ${pd_path}/pipeline_documentation.html -y ${pd_path}/templates/pipeline.yaml -s ${pd_path}/envs/pipeline_documentation_v1.0.sif" -################################################################################ - - -################################################################################ -# Help # -################################################################################ - -Help() -{ - # Display Help - echo -e "Description:\n\tThis script produces documentation in HTML format and creates a \"docs\" folder containing the necessary files to display the \"readme\" in the ReadTheDocs format.\n" - echo -e "This script is developed and maintained by:\n\tAuthor:\t\t${author}\n\tVersion:\t${version}\n" - echo -e "USAGE:\n\t${example_command}" - echo - echo "parameters: $(basename $0) [-e|h|g|o|s|v|y]" - echo " -e create example_command.sh" - echo " -h print this help." - echo " -g github url to your project/pipeline" - echo " -o full path to the output file" - echo " -s full path to the singularity container" - echo " -v print version" - echo " -y full path to the pipeline yaml file" - echo -} - -# Get the flags -while getopts ':g:o:s:y:evh' flag -do - case "${flag}" in - g) github_project_URL=${OPTARG};; - o) output_file=${OPTARG};; - s) singularity_container=${OPTARG};; - y) yaml_file=${OPTARG};; - e) #create example_command.sh - echo -e "${example_command}\n" > ${pd_path}/example_command.sh - exit;; - v) #print version - echo -e "${version}" - exit;; - h) # display Help - Help - exit;; - - \?) # incorrect flag - echo -e "Error: Invalid flag\n" - Help - exit;; - esac -done - - -#pipeline documentation repo path and files -date=$(date '+%Y%m%d') -rmd_file=${pd_path}"/bin/main.Rmd" -template_mkdocs_yaml=${pd_path}"/templates/template.mkdocs.yaml" -local_project_location=$(grep 'location:' ${yaml_file} | sed 's/server_location:.* //g' | sed 's/ //g' | sed "s/\'//g" |sed 's/\"//g') -github_project_name=$(basename ${github_project_URL} | sed 's/.git//g') -tmp_dir=${local_project_location}"/tmp" -md_output=$(echo ${output_file} |sed 's/.html$/.md/g') -md_destination=${local_project_location}'/docs/index.md' - -#singularity specific -binds=${pd_path}','${local_project_location} -singularity_cmd='singularity run --bind' - -#main Rmd cmd - -versions_yaml=$(grep 'software_stack:' ${yaml_file} | rev |cut -f1 -d' ' |rev) -tool_description_file=${pd_path}"/bin/tool_descriptions.tsv" -mapped_tools_versions=${tmp_dir}"/tool_versions.mapped.tsv" -tools_mapping_cmd="python3 "${pd_path}"/bin/map_software_stack.py -i "${versions_yaml}" -td "${tool_description_file}" -o "${mapped_tools_versions} - -#Generate script file -rm ${local_project_location}/run.sh -echo -e "mkdir -p ${tmp_dir}" >> ${local_project_location}/run.sh -echo -e "cp ${pd_path}/bin/*Rmd ${tmp_dir}" >> ${local_project_location}/run.sh -echo -e "\n${rmd_cmd}\n" >> ${local_project_location}/run.sh - -rmd_cmd='R -e "library(yaml);params=yaml::read_yaml('\'${yaml_file}\'');root='\'${pd_path}/\'';mapped_tools_versions='\'${mapped_tools_versions}\'';params;mapped_tools_versions;rmarkdown::render('\'$rmd_file\'', output_file='\'${output_file}\'', intermediates_dir='\'${tmp_dir}\'', knit_root_dir='\'${tmp_dir}\'', clean=F, envir = parent.frame())"' - -#Generate script file -rm ${local_project_location}/run.sh -echo -e "mkdir -p ${tmp_dir}" >> ${local_project_location}/run.sh -echo -e "${tools_mapping_cmd}" >> ${local_project_location}/run.sh -echo -e "cp ${pd_path}/bin/*Rmd ${tmp_dir}" >> ${local_project_location}/run.sh -echo -e "\n${rmd_cmd}\n" >> ${local_project_location}/run.sh - -#Copying the markdown file to the projects docs folder -echo -e "rm -rf ${local_project_location}/docs" >> ${local_project_location}/run.sh -echo -e "rm ${local_project_location}/mkdocs.yml" >> ${local_project_location}/run.sh -echo -e "mkdocs new ${local_project_location}" >> ${local_project_location}/run.sh - -echo -e "echo -e \"site_name: ${github_project_name}\\\nsite_url: ${github_project_URL}\\\n\" > ${local_project_location}/mkdocs.yml " >> ${local_project_location}/run.sh - -echo -e "grep -v \"site_name\|site_url\" ${template_mkdocs_yaml} >> ${local_project_location}/mkdocs.yml" >> ${local_project_location}/run.sh - -echo -e "cp ${pd_path}/templates/requirements.txt ${local_project_location}/docs/" >> ${local_project_location}/run.sh -echo -e "cp ${pd_path}/templates/readthedocs.yaml ${local_project_location}/.readthedocs.yaml" >> ${local_project_location}/run.sh - -#echo -e "cp ${md_output} ${md_destination}" >> ${local_project_location}/run.sh - -echo -e "cp ${pd_path}/templates/github_pages.template.yaml ${local_project_location}/docs/_config.yaml" >> ${local_project_location}/run.sh -echo -e "cp ${tmp_dir}/*.md ${local_project_location}/docs/" >> ${local_project_location}/run.sh -echo -e "mv ${local_project_location}/docs/info.md ${local_project_location}/docs/index.md" >> ${local_project_location}/run.sh -echo -e "rm ${local_project_location}/docs/main.knit.md" >> ${local_project_location}/run.sh - -echo -e "rm -rf ${tmp_dir}" >> ${local_project_location}/run.sh -echo -e "rm ${md_output}" >> ${local_project_location}/run.sh - -#eval ${i}='something' - -#Running the script in a container -main_cmd=${singularity_cmd}' '${binds}' '${singularity_container}' bash '${local_project_location}'/run.sh' -echo -e "\n${main_cmd}\n" -$main_cmd -rm ${local_project_location}/run.sh \ No newline at end of file diff --git a/bin/scope.Rmd b/bin/scope.Rmd deleted file mode 100644 index 947fc37..0000000 --- a/bin/scope.Rmd +++ /dev/null @@ -1,12 +0,0 @@ -# Scope - -```{r Scope, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -scope_keys=get_unique_keys(params$pipeline$scope) - -for (scope_element in scope_keys) { - new_scope_element_name = camel(scope_element) - cat(paste('## ', new_scope_element_name, sep=''), sep = '\n') - cat('\n') - cat(params$pipeline$scope[[scope_element]], sep = '\n') -} -``` diff --git a/bin/software_stack.Rmd b/bin/software_stack.Rmd deleted file mode 100644 index ae0938e..0000000 --- a/bin/software_stack.Rmd +++ /dev/null @@ -1,7 +0,0 @@ -# Software Stack - -```{r software stack, echo=FALSE,warning=FALSE, message=FALSE, results='asis'} -software_list = read.table(file = mapped_tools_versions, sep = '\t', header = TRUE, stringsAsFactors = FALSE, check.names=F) -kable(software_list,booktabs=TRUE, escape=FALSE,row.names=FALSE) %>% - kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),full_width = F, position = "center") -``` \ No newline at end of file diff --git a/bin/tool_descriptions.tsv b/bin/tool_descriptions.tsv deleted file mode 100644 index 9429c96..0000000 --- a/bin/tool_descriptions.tsv +++ /dev/null @@ -1,26 +0,0 @@ -Tool Type URL External_Contact_Person Short_Description Reference -BWA-MEM Open-Source https://github.com/lh3/bwa https://github.com/lh3/bwa/issues "Read alignment and mapping" https://doi.org/10.48550/arXiv.1303.3997 -CNVkit Open-Source https://cnvkit.readthedocs.io/en/stable/ https://github.com/etal/cnvkit/issues "Genome-wide copy number from high-throughput sequencing" https://doi.org/10.1371/journal.pcbi.1004873 -DELLY Open-Source https://github.com/dellytools/delly https://github.com/dellytools/delly/issues "Structural variant discovery by integrated paired-end and split-read analysis" https://doi.org/10.1093/bioinformatics/bts378 -freebayes Open-Source https://github.com/freebayes/freebayes https://github.com/freebayes/freebayes/issues "Bayesian haplotype-based polymorphism discovery." https://doi.org/10.48550/arXiv.1207.390 -GATK Open-Source https://gatk.broadinstitute.org/hc/en-us https://github.com/broadinstitute/gatk/issues "Variant Discovery in High-Throughput Sequencing Data" http://dx.doi.org/10.1101/gr.107524.110 -Manta Open-Source https://github.com/Illumina/manta https://github.com/Illumina/manta/issues "Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads."  https://doi.org/10.1093/bioinformatics/btv710 -MELT Open-Source https://melt.igs.umaryland.edu/index.php - "The Mobile Element Locator Tool (MELT): Perform transposon analysis" https://doi.org/10.1101%2Fgr.218032.116 -Pindel Open-Source https://github.com/genome/pindel kaiye@xjtu.edu.cn "Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads." https://doi.org/10.1093%2Fbioinformatics%2Fbtp394 -Sambamba Open-Source https://github.com/biod/sambamba https://github.com/biod/sambamba/issues "Tools for working with SAM/BAM data" https://doi.org/10.1093/bioinformatics/btv098 -SAMtools Open-Source https://github.com/samtools/samtools https://github.com/samtools/samtools/issues "Tools (written in C using htslib) for manipulating next-generation sequencing data" https://doi.org/10.1093/bioinformatics/btp352 -Sentieon Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 -SVDB Open-Source https://github.com/J35P312/SVDB https://github.com/J35P312/SVDB/issues "SVDB is a toolkit for constructing and querying structural variant databases. The databases are constructed using the output vcf files from structural variant callers such as TIDDIT, Manta, Fermikit or Delly. SVDB may also be used to merge SV vcf files from multiple callers or individuals." - -VarDict Open-Source https://github.com/AstraZeneca-NGS/VarDict https://github.com/AstraZeneca-NGS/VarDict/issues "VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants." https://doi.org/10.1093/nar/gkw227 -VEP Open-Source https://github.com/Ensembl/ensembl-vep https://github.com/Ensembl/ensembl-vep/issues "The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants" https://doi.org/10.1186/s13059-016-0974-4 -VCFtools Open-Source https://github.com/vcftools/vcftools https://github.com/vcftools/vcftools/issues "A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project." http://dx.doi.org/10.1093/bioinformatics/btr330 -Vt Open-Source https://genome.sph.umich.edu/wiki/Vt Adiran (atks@umich.edu) "vt is a variant tool set that discovers short variants from Next Generation Sequencing data." https://doi.org/10.1093/bioinformatics/btv112 -R Open-Source https://www.r-project.org/ https://www.r-project.org/ "R is a free software environment for statistical computing and graphics." - -SnpEff Open-Source https://github.com/pcingola/SnpEff https://github.com/pcingola/SnpEff/issues "SnpEff is a variant annotation and effect prediction tool." https://pcingola.github.io/SnpEff/adds/SnpEff_paper.pdf -CDM Open-Soource https://github.com/Clinical-Genomics-Lund/cmd-data-management - "Quality control reporting for CMD lab" - -Coyote Open-Soource https://github.com/Clinical-Genomics-Lund/coyote - "Presentation and intreprtation of variants" - -Sentieon BWA Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 -Sentieon UMI Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 -Sentieon UTIL Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 -Sentieon BWA-MEM Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 -Sentieon DRIVER Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 diff --git a/bin/usage.Rmd b/bin/usage.Rmd deleted file mode 100644 index 1826fcd..0000000 --- a/bin/usage.Rmd +++ /dev/null @@ -1,3 +0,0 @@ -# Usage - -`r params$pipeline$usage` \ No newline at end of file diff --git a/bin/version.txt b/bin/version.txt deleted file mode 100644 index 992977a..0000000 --- a/bin/version.txt +++ /dev/null @@ -1 +0,0 @@ -v1.1.0 \ No newline at end of file diff --git a/bin/workflow.Rmd b/bin/workflow.Rmd deleted file mode 100644 index bb27c05..0000000 --- a/bin/workflow.Rmd +++ /dev/null @@ -1,5 +0,0 @@ -# Analysis Workflow - -Schematic diagram showing the main steps of the analysis method followed to perform the data analysis. - -![workflow](`r params$pipeline$workflow`) \ No newline at end of file diff --git a/docs/_config.yaml b/configs/default.github.pages.yaml similarity index 100% rename from docs/_config.yaml rename to configs/default.github.pages.yaml diff --git a/templates/template.mkdocs.yaml b/configs/default.mkdocs.yml similarity index 53% rename from templates/template.mkdocs.yaml rename to configs/default.mkdocs.yml index 5eb5139..1d10096 100644 --- a/templates/template.mkdocs.yaml +++ b/configs/default.mkdocs.yml @@ -1,7 +1,3 @@ -site_name: pipeline_documentation -site_url: https://github.com/ramsainanduri/pipeline_documentation.git -github_url: https://github.com/ramsainanduri/pipeline_documentation.git - theme: name: readthedocs logo: https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/rs-logo-color.svg @@ -19,6 +15,9 @@ plugins: options: docstring_style: sphinx markdown_extensions: + - abbr + - attr_list + - pymdownx.details - toc: permalink: True - markdown_include.include: @@ -28,17 +27,7 @@ markdown_extensions: extra: generator: false -nav: - - 'Pipeline Information': 'index.md' - - Introduction: 'introduction.md' - - Scope: 'scope.md' - - 'Input Data': 'input_data.md' - - 'Output Data': 'output_data.md' - - 'Minimum Requirements': 'min_requirements.md' - - Usage: 'usage.md' - - 'Pipeline Components': 'pipeline_components.md' - - 'Software Stack': 'software_stack.md' - - Profiles: 'profiles.md' - - Workflow: 'workflow.md' +extra_css: + - extra.css copyright: Copyright © 2023 | Center for Molecular Diagnostics (CMD) | Region Skåne | Sölvegatan 23B, Lund. diff --git a/templates/readthedocs.yaml b/configs/default.readthedocs.yaml similarity index 64% rename from templates/readthedocs.yaml rename to configs/default.readthedocs.yaml index 38dece7..5540005 100644 --- a/templates/readthedocs.yaml +++ b/configs/default.readthedocs.yaml @@ -6,19 +6,15 @@ version: 2 # Set the version of Python and other tools you might need -# build: -# os: ubuntu-22.04 -# tools: -# python: "3.10" +build: + os: ubuntu-22.04 + tools: + python: "3.11" mkdocs: configuration: mkdocs.yml # Optionally declare the Python requirements required to build your docs python: - install: - - requirements: docs/requirements.txt - # system_packages: true - -# Build all formats -# formats: all \ No newline at end of file + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/introduction.md b/docs/Introduction.md similarity index 97% rename from docs/introduction.md rename to docs/Introduction.md index c8f1526..4678b3d 100644 --- a/docs/introduction.md +++ b/docs/Introduction.md @@ -1,5 +1,7 @@ +# Introduction -# Introduction + +## Introduction This automatic documentation generation is a time-saving tool for developers and teams, as it eliminates the need to manually create and maintain documentation. It also helps ensure that documentation is up-to-date and consistent, as changes made to the pipeline.yaml file used for the document updation in a simple and easy way. @@ -10,4 +12,5 @@ The HTML document is visually appealing and easy to navigate, with links to diff Additionally, the generated documentation also includes a table of contents for easy navigation, and sections for examples. Overall, this repo helps improve the documentation process for pipelines and tools, making it easier for others to understand and use them. - + + diff --git a/docs/Min_requirements.md b/docs/Min_requirements.md new file mode 100644 index 0000000..8cb8838 --- /dev/null +++ b/docs/Min_requirements.md @@ -0,0 +1,38 @@ +# Min_requirements + + +## Operating System + +The pipeline is designed to run on Linux operating systems, such as Ubuntu and CentOS... + + + +## Number of CPUs + +The pipeline requires at least 4 CPUs to run efficiently... + + + +## Memory + +The pipeline requires at least 16 GB of memory to run efficiently... + + + +## Disk Space + +The pipeline requires at least 100 GB of disk space to run efficiently... + + + +## Singularity + +The pipeline requires Singularity version 3.0 or higher to run efficiently... + + + +## Python + +The pipeline requires Python version 3.6 or higher to run efficiently... + + diff --git a/docs/pipeline_components.md b/docs/Pipeline Components.md similarity index 78% rename from docs/pipeline_components.md rename to docs/Pipeline Components.md index 0469693..e7f154f 100644 --- a/docs/pipeline_components.md +++ b/docs/Pipeline Components.md @@ -1,33 +1,50 @@ - # Pipeline Components -To automate the documentation for a pipeline, the following pipeline components are included, #Can have specific keys based on the pipeline, free free to modify, add, delete keys in this section depending on the pipeline + +## Pipeline Components Description + +To automate the documentation for a pipeline, the following pipeline components are included. + ## Data Retrieval This step involves retrieving data related to the pipeline, such as the code, input data, and output data. + + ## Parsing This step involves parsing the code and the input and output data to extract relevant information, such as the pipeline components, their parameters, and their inputs and outputs. + + ## Template Generation This step involves generating a template for the documentation based on the parsed information. The template should include sections for the pipeline components, their descriptions, their parameters, and their inputs and outputs. + + ## Documentation Generation -This step involves generating the actual documentation by populating the template with the parsed information. The documentation should be generated in a format that is easily readable and accessible, such as HTML or PDF. +This step involves generating the actual documentation by populating the parsed information in respective markdown files. The documentation should be generated in a format that is easily readable and accessible. + + ## Version Control This step involves using version control software, such as Git, to track changes to the documentation over time, and to maintain a history of the documentation. + + ## Workflow Management This step involves using workflow management software, such as Snakemake or Nextflow, to automate the pipeline components and manage dependencies between the components. -## Maintenance And Support + + +## Maintenance and Support This step involves maintaining and supporting the documentation over time, including regular updates and bug fixes. + + diff --git a/docs/scope.md b/docs/Scope.md similarity index 70% rename from docs/scope.md rename to docs/Scope.md index 241c801..07ed304 100644 --- a/docs/scope.md +++ b/docs/Scope.md @@ -1,39 +1,53 @@ - # Scope + ## Purpose The program aims to simplify the process of creating and maintaining documentation for software projects using MkDocs and Read the Docs. + + ## Audience -The program is designed for developers, technical writers, and project managers who want to create high-quality documentation with minimal effort. +The program is designed for developers, technical writers, and project managers who want to create high-quality documentation... + + ## Functionality -The program will automatically generate documentation for a software project using the MkDocs framework, which allows for the creation of user-friendly, responsive documentation sites. The documentation site will be hosted on Read the Docs, which provides a robust platform for hosting and managing documentation. +The program will automatically generate documentation for a software project using the MkDocs framework... + + ## Features -The program will include the following features: - Automated creation of documentation using MkDocs - Integration with Read the Docs for hosting and management of documentation sites - Customizable templates for documentation sites - Support for multiple documentation versions and languages - Integration with source control systems such as Git + ## Technology Stack -The program will be developed using Python and will use the MkDocs and Read the Docs APIs to generate and host documentation sites. +The program will be developed using Python and will use the MkDocs and Read the Docs APIs to generate and host documentation sites... + + ## Limitations -The program may be limited by the capabilities of MkDocs and Read the Docs, and may not be suitable for projects with complex documentation requirements. +The program may be limited by the capabilities of MkDocs and Read the Docs, and may not be suitable for projects with complex documentation requirements... + + + +## Maintenance and Support + +The program will be maintained and supported by the development team, who will provide regular updates and bug fixes. Documentation and user support will also be provided... -## Maintenance And Support -The program will be maintained and supported by the development team, who will provide regular updates and bug fixes. Documentation and user support will also be provided. ## Expected Outcomes -The program is expected to simplify the process of creating and maintaining documentation, reduce the amount of time and effort required to create documentation sites, and improve the overall quality of documentation for software projects. +The program is expected to simplify the process of creating and maintaining documentation, reduce the amount of time and effort required to create documentation sites, and improve the overall quality of documentation for software projects... + + diff --git a/docs/Usage.md b/docs/Usage.md new file mode 100644 index 0000000..b6f5808 --- /dev/null +++ b/docs/Usage.md @@ -0,0 +1,16 @@ +# Usage + + +## How to run the pipeline + +To generate online Markdown(MD) file in your project folder, clone the repo and execute the python script src/main.py. + +To learn how to parse parameters for the script, run, + +`python3 src/main.py -h` + +To create an example script using the provided template files in the repo location, run, + +`python3 src/main.py -e` + + diff --git a/docs/Workflow Diagram.md b/docs/Workflow Diagram.md new file mode 100644 index 0000000..74ae86e --- /dev/null +++ b/docs/Workflow Diagram.md @@ -0,0 +1,7 @@ +# Workflow Diagram + + +## Workflow Flowchart + +![template_workflow.png](https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/template_workflow.png) + diff --git a/docs/extra.css b/docs/extra.css new file mode 100644 index 0000000..af60c18 --- /dev/null +++ b/docs/extra.css @@ -0,0 +1,63 @@ +.wy-nav-content { + max-width: 80% !important; +} + +h1,h2 { + margin-top: 50px; + margin-bottom: 10px !important; +} + +h3 { + font-size: 18px; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +h4 { + font-size: 17px; + font-style: italic; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +h5 { + font-size: 16px; + font-style: italic; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +p { + margin-top: 5px !important; +} + +* { + margin-bottom: 0px !important; +} + +table tr { + text-align: left !important; +} + +table td { + min-width: 10px; + max-width: 800px; + text-align: left !important; +} + +table td:last-child { + width: 100%; +} + +hr { + margin-top: 50px; + border: 1px solid black; +} + +.twemoji { + width: 20px; + color: black; +} \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index e4bb1f0..85691ed 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,38 +1,34 @@ +## Pipeline Description - -# Pipeline Info - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
+
Info Description
**Name** Pipeline Documentation
**Version** v1.1.0
**Author** Ram Sai Nanduri
**Author Email** Ram.Nanduri@skane.se
**Git Repo** https://github.com/ramsainanduri/pipeline_documentation
**Server Location** /data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
InfoDescription
namePipeline Documentation
versionv1.1.0
authorRam Sai Nanduri
author_emailRam.Nanduri@skane.se
git_repohttps://github.com/ramsainanduri/pipeline_documentation
+ diff --git a/docs/input_data.md b/docs/input_data.md deleted file mode 100644 index 55b08a9..0000000 --- a/docs/input_data.md +++ /dev/null @@ -1,65 +0,0 @@ - -# Input Data - -The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV -file that includes metadata. Below is an example of the CSV file format that is expected, along with a detailed description of each column. - -Table: Example input data csv file format - - - - - - - - - - - - - - - - - - - - - - -
sample_id type assay platform read1 read2
example template documentation mkdocs example.1.fastq.gz example.2.fastq.gz
Table: Column description for input data csv file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Column Name Description
sample_id Text representating the name or id of the sample being analysed
type Type of the sample, eg. tumor or normal
assay Assay of the sample, eg. tumorWGS, myeloid, solidtumor etc
platform Name of the paltform used for sequencing, eg. illumina
read1 Full path to the read 1 fastq file
read2 Full path to the read 2 fastq file
diff --git a/docs/min_requirements.md b/docs/min_requirements.md deleted file mode 100644 index e81155b..0000000 --- a/docs/min_requirements.md +++ /dev/null @@ -1,11 +0,0 @@ - -# Minimum Requirements - -**Configs**: pipeline.yaml, list_of_softwares.tsv - - -**Os**: POSIX based - -**Cpus**: 1 - -**Singularity**: >= 3.8.0 diff --git a/docs/output_data.md b/docs/output_data.md deleted file mode 100644 index dc399db..0000000 --- a/docs/output_data.md +++ /dev/null @@ -1,33 +0,0 @@ - -# Output Data - -This pipeline spits out various files from different process, the important once are given below with a brief descripton - -Table: Output files and their description - - - - - - - - - - - - - - - - - - - - - - - - - - -
Output File Type Description
BAM A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome
VCF A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data.
HTML_Report An HTML documentation report is a text-based file format used to present information in a web browser, including text, images, and hyperlinks, typically used for displaying project documentation and results
Markdown_Files A Markdown file is a lightweight markup language used to format and structure plain text documents, often used for creating documentation, README files, and notes
diff --git a/docs/profiles.md b/docs/profiles.md deleted file mode 100644 index 9735521..0000000 --- a/docs/profiles.md +++ /dev/null @@ -1,53 +0,0 @@ - -# Profiles - - -## profile1 - -**Profile Name:** Profile 1 - -### Description - -Profile 1 is used for Solid Panel - -### Usage - -add --profile "solid" to the nextflow command - - -### Validation Data - -To validate the pipeline, please access the validation data stored on the server using the path provided below, - -**/full/path/to/profile1/validation/data** - -### Test Data - -To test the pipeline, please access the downsampled test data stored on the server using the path provided below, - -**/full/path/to/profile1/test/data** - -## profile2 - -**Profile Name:** Profile 2 - -### Description - -Profile 1 is used for AML Panel - -### Usage - -add --profile "myeloid" to the nextflow command - - -### Validation Data - -To validate the pipeline, please access the validation data stored on the server using the path provided below, - -**/full/path/to/profile2/validation/data** - -### Test Data - -To test the pipeline, please access the downsampled test data stored on the server using the path provided below, - -**/full/path/to/profile2/test/data** diff --git a/docs/profiles/Myeloid.md b/docs/profiles/Myeloid.md new file mode 100644 index 0000000..3d524bb --- /dev/null +++ b/docs/profiles/Myeloid.md @@ -0,0 +1,302 @@ +## Myeloid + + +### Description + +The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV file that includes metadata... + + + +### Input CSV + +
+ + + + + + + + + + + + + + + + + + + + + +
sample_idtypeassayplatformread1read2
exampletemplatedocumentationmkdocsexample.1.fastq.gzexample.2.fastq.gz
+
+ +### Column Descriptions + +- **sample_id:** Text representing the name or id of the sample being analyzed +- **type:** Type of the sample, e.g., tumor or normal +- **assay:** Assay of the sample, e.g., tumorWGS, myeloid, solidtumor, etc. +- **platform:** Name of the platform used for sequencing, e.g., illumina +- **read1:** Full path to the read 1 fastq file +- **read2:** Full path to the read 2 fastq file + + +### Output Files + +- **BAM:** A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome +- **VCF:** A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. + + +### Software Versions + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ProcessToolVersionExternal_Contact_Person
AGGREGATE_VCFSperlv5.26.2-
ANNOTATE_VEPperlv5.26.1-
BQSR_UMIsentieonv202112-
BWA_UMIbwav0.7.17-r1188https://github.com/lh3/bwa/issues
sentieonv202112-
CNVKIT_BATCHcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_CALLcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_GENScnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_PLOTcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CONCATENATE_VCFSvcftoolsv0.1.16https://github.com/vcftools/vcftools/issues
vt-decomposev0.5Adiran (atks@umich.edu)
vt-normalizev0.5Adiran (atks@umich.edu)
CONTAMINATIONperlv5.28.1-
CUSTOM_DUMPSOFTWAREVERSIONSpythonv3.11.0-
yamlv6.0-
FILTER_FOR_CNVbedtoolsv2.30.0https://github.com/arq5x/bedtools2/issues
bgzipv1.12https://github.com/samtools/htslib/issues
perlv5.26.2-
tabixv1.12https://github.com/samtools/htslib/issues
FREEBAYESfreebayesv1.3.5https://github.com/freebayes/freebayes/issues
perlv5.26.2-
vcffilterv1.0.2https://github.com/biopet/vcffilter/issues
LOWCOVsambambav0.8.0https://github.com/biod/sambamba/issues
MARKDUPsentieonv202112-
MARK_GERMLINESperlv5.26.2-
MERGE_GENSbedtoolsv2.30.0https://github.com/arq5x/bedtools2/issues
bgzipv1.12https://github.com/samtools/htslib/issues
tabixv1.12https://github.com/samtools/htslib/issues
PON_FILTERperlv5.26.2-
SENTIEON_QCsentieonv202112-
TNSCOPEsentieonv202010.01-
VARDICTperlv5.26.2-
vardictv1.8.2https://github.com/AstraZeneca-NGS/VarDict/issues
WorkflowNextflowv23.04.2https://github.com/nextflow-io/nextflow/issues
SomaticPanelPipelinev1.0devhttps://github.com/Clinical-Genomics-Lund/SomaticPanelPipeline/issues
+
diff --git a/docs/profiles/Solid.md b/docs/profiles/Solid.md new file mode 100644 index 0000000..25d4364 --- /dev/null +++ b/docs/profiles/Solid.md @@ -0,0 +1,472 @@ +## Solid + + +### Description + +The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV file that includes metadata... + + + +### Input CSV + +
+ + + + + + + + + + + + + + + + + + + + + +
sample_idtypeassayplatformread1read2
exampletemplatedocumentationmkdocsexample.1.fastq.gzexample.2.fastq.gz
+
+ +### Column Descriptions + +- **sample_id:** Text representing the name or id of the sample being analyzed +- **type:** Type of the sample, e.g., tumor or normal +- **assay:** Assay of the sample, e.g., tumorWGS, myeloid, solidtumor, etc. +- **platform:** Name of the platform used for sequencing, e.g., illumina +- **read1:** Full path to the read 1 fastq file +- **read2:** Full path to the read 2 fastq file + + +### Output Files + +- **BAM:** A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome +- **VCF:** A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. +- **HTML_Report:** An HTML documentation report is a text-based file format used to present information in a web browser, including text, images, and hyperlinks, typically used for displaying project documentation and results +- **Markdown_Files:** A Markdown file is a lightweight markup language used to format and structure plain text documents, often used for creating documentation, README files, and notes + + +### Software Versions + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ProcessToolVersionExternal_Contact_Person
AGGREGATE_VCFSperlv5.26.2-
ANNOTATE_VEPperlv5.26.1-
BIOMARKERS_TO_JSONpythonv3.9.2-
BQSR_UMIsentieonv202112-
BWA_UMIbwav0.7.17-r1188https://github.com/lh3/bwa/issues
sentieonv202112-
CNVKIT2SCARHRDperlv5.26.2-
CNVKIT_BACKBONEcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_BATCHcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_CALLcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_CALL_TCcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_EXONScnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_GENScnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CNVKIT_PLOTcnvkitv0.9.9https://github.com/etal/cnvkit/issues
pythonv3.7.1-
CONCATENATE_VCFSvcftoolsv0.1.16https://github.com/vcftools/vcftools/issues
vt-decomposev0.5Adiran (atks@umich.edu)
vt-normalizev0.5Adiran (atks@umich.edu)
CONTAMINATIONperlv5.28.1-
COYOTE_SEGMENTSperlv5.26.2-
CUSTOM_DUMPSOFTWAREVERSIONSpythonv3.11.0-
yamlv6.0-
FFPE_PON_FILTERperlv5.26.2-
FILTER_FOR_CNVbedtoolsv2.30.0https://github.com/arq5x/bedtools2/issues
bgzipv1.12https://github.com/samtools/htslib/issues
perlv5.26.2-
tabixv1.12https://github.com/samtools/htslib/issues
FILTER_MANTAperlv5.26.2-
FILTER_MANTA_TUMORperlv5.26.2-
FREEBAYESfreebayesv1.3.5https://github.com/freebayes/freebayes/issues
perlv5.26.2-
vcffilterv1.0.2https://github.com/biopet/vcffilter/issues
GATK2VCFpythonv3.9.2-
GATKCOV_BAFgatk4v4.1.9.0-SNAPSHOThttps://github.com/broadinstitute/gatk/issues
GATKCOV_CALLgatk4v4.1.9.0-SNAPSHOThttps://github.com/broadinstitute/gatk/issues
GATKCOV_COUNTgatk4v4.1.9.0-SNAPSHOThttps://github.com/broadinstitute/gatk/issues
GENEFUSEgenefusev0.8.0-
GENEFUSE_JSON_TO_VCFpythonv3.9.2-
JOIN_FUSIONSsvdbv2.2.0https://github.com/J35P312/SVDB/issues
JOIN_TUMORsvdbv2.2.0https://github.com/J35P312/SVDB/issues
LOWCOVsambambav0.8.0https://github.com/biod/sambamba/issues
MANTAmantav1.6.0https://github.com/Illumina/manta/issues
pythonv2.7.15-
MANTA_FUSIONSmantav1.6.0https://github.com/Illumina/manta/issues
pythonv2.7.15-
MARKDUPsentieonv202112-
MARK_GERMLINESperlv5.26.2-
MERGE_GATK_TUMORperlv5.26.2-
MERGE_GENSbedtoolsv2.30.0https://github.com/arq5x/bedtools2/issues
bgzipv1.12https://github.com/samtools/htslib/issues
tabixv1.12https://github.com/samtools/htslib/issues
MSISENSORmsisensor-prov1.2.0-
PON_FILTERperlv5.26.2-
SCARHRDRscriptv4.1.0-
SENTIEON_QCsentieonv202112-
SEQTKseqtkv1.3-r106https://github.com/lh3/seqtk/issues
SNPEFFsnpEffv4.3thttps://github.com/pcingola/SnpEff/issues
VARDICTperlv5.26.2-
vardictv1.8.2https://github.com/AstraZeneca-NGS/VarDict/issues
WorkflowNextflowv23.04.2https://github.com/nextflow-io/nextflow/issues
SomaticPanelPipelinev1.0devhttps://github.com/Clinical-Genomics-Lund/SomaticPanelPipeline/issues
+
diff --git a/docs/requirements.txt b/docs/requirements.txt index 8f0f216..2a728c4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,66 +1,25 @@ -# -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: -# -# pip-compile docs/requirements.in -# click==8.1.3 - # via mkdocs ghp-import==2.1.0 - # via mkdocs griffe==0.22.0 - # via mkdocstrings-python importlib-metadata==4.12.0 - # via mkdocs -jinja2==3.1.2 - # via - # mkdocs - # mkdocstrings +jinja2<3.1.0 markdown==3.3.7 - # via - # markdown-include - # mkdocs - # mkdocs-autorefs - # mkdocstrings - # pymdown-extensions markdown-include==0.6.0 - # via -r docs/requirements.in markupsafe==2.1.1 - # via - # jinja2 - # mkdocstrings mergedeep==1.3.4 - # via mkdocs -mkdocs==1.3.0 - # via - # -r docs/requirements.in - # mkdocs-autorefs - # mkdocstrings +mkdocs==1.4.2 mkdocs-autorefs==0.4.1 - # via mkdocstrings mkdocstrings[python]==0.19.0 - # via - # -r docs/requirements.in - # mkdocstrings-python mkdocstrings-python==0.7.1 - # via mkdocstrings packaging==21.3 - # via mkdocs -pymdown-extensions==9.5 - # via mkdocstrings pyparsing==3.0.9 - # via packaging python-dateutil==2.8.2 - # via ghp-import pyyaml==6.0 - # via - # mkdocs - # pyyaml-env-tag pyyaml-env-tag==0.1 - # via mkdocs six==1.16.0 - # via python-dateutil watchdog==2.1.9 - # via mkdocs -zipp==3.8.0 - # via importlib-metadata \ No newline at end of file +pymdown-extensions==9.10 +mkdocs-schema-reader==0.11.1 +mkdocs-simple-hooks==0.1.5 +mdx_spanner==0.0.5 +mkdocs-yaml-schema-plugin==0.2.3 \ No newline at end of file diff --git a/docs/software_stack.md b/docs/software_stack.md deleted file mode 100644 index 3535a1a..0000000 --- a/docs/software_stack.md +++ /dev/null @@ -1,177 +0,0 @@ - -# Software Stack - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Process Tool Version Type Container_Name Description Contact_Person
bwa_umi Sentieon BWA v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
Sentieon UMI v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
Sentieon UTIL v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
SAMtools v1.9 Open-Source SomaticPanelPipeline_2021-06-24.sif Tools (written in C using htslib) for manipulating next-generation sequencing data https://github.com/samtools/samtools/issues
bwa_align Sentieon BWA-MEM v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
BWA-MEM v0.7.17-r1188 Open-Source SomaticPanelPipeline_2021-06-24.sif Read alignment and mapping https://github.com/lh3/bwa/issues
markdup Sentieon DRIVER v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
lowcov Sambamba v0.8.0 Open-Source SomaticPanelPipeline_2021-06-24.sif Tools for working with SAM/BAM data https://github.com/biod/sambamba/issues
freebayes freebayes v1.6.0 Open-Source SomaticPanelPipeline_2021-06-24.sif Bayesian haplotype-based polymorphism discovery. https://github.com/freebayes/freebayes/issues
vardict VarDict v Open-Source SomaticPanelPipeline_2021-06-24.sif VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants. https://github.com/AstraZeneca-NGS/VarDict/issues
tnscope Sentieon DRIVER v202010.01 Commercial SomaticPanelPipeline_2021-06-24.sif Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
pindel Pindel v0.2.5b9, 20160729 Open-Source SomaticPanelPipeline_2021-06-24.sif Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads. kaiye@xjtu.edu.cn
concatenate_vcfs VCFtools v0.1.16 Open-Source SomaticPanelPipeline_2021-06-24.sif A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project. https://github.com/vcftools/vcftools/issues
Vt v0.5 Open-Source SomaticPanelPipeline_2021-06-24.sif vt is a variant tool set that discovers short variants from Next Generation Sequencing data. Adiran (atks@umich.edu)
cnvkit CNVkit v0.9.9 Open-Source SomaticPanelPipeline_2021-06-24.sif Genome-wide copy number from high-throughput sequencing https://github.com/etal/cnvkit/issues
gene_plot CNVkit v0.9.5 Open-Source SomaticPanelPipeline_2021-06-24.sif Genome-wide copy number from high-throughput sequencing https://github.com/etal/cnvkit/issues
annotate_vep VEP v103.1 Open-Source ensembl-vep_release_103.sif The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants https://github.com/Ensembl/ensembl-vep/issues
diff --git a/docs/usage.md b/docs/usage.md deleted file mode 100644 index ba373ed..0000000 --- a/docs/usage.md +++ /dev/null @@ -1,13 +0,0 @@ - -# Usage - -To generate offline HTML documentation and an online Markdown(MD) file in your project folder, clone the repo and execute the bash script bin/runMe.sh. - -To learn how to parse parameters for the script, run, - -`bin/runMe.sh -h` - -To create an example script using the provided template files in the repo location, run, - -`bin/runMe.sh -e` - diff --git a/docs/workflow.md b/docs/workflow.md deleted file mode 100644 index 54cc757..0000000 --- a/docs/workflow.md +++ /dev/null @@ -1,6 +0,0 @@ - -# Analysis Workflow - -Schematic diagram showing the main steps of the analysis method followed to perform the data analysis. - -![workflow](https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/template_workflow.png) diff --git a/envs/get_containers.sh b/envs/get_containers.sh deleted file mode 100644 index ac8cb4e..0000000 --- a/envs/get_containers.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# Run me in envs folder to get sinuglarities needed -# Please adjust paths of sif location in appropriate config_file -# requires sudo permissions - -sudo singularity build pipeline_documentation_v1.0.sif docker://ramsainanduri/ubuntu_pipeline_documentation:v1.0 diff --git a/envs/recipies/pipeline_documentation b/envs/recipies/pipeline_documentation deleted file mode 100644 index f389d31..0000000 --- a/envs/recipies/pipeline_documentation +++ /dev/null @@ -1,56 +0,0 @@ -Bootstrap: docker -From: ubuntu:20.04 - -%post - export DEBIAN_FRONTEND=noninteractive - - apt-get -y update - apt-get -y install language-pack-sv-base - echo 'export LANG=sv_SE.UTF-8' >>$SINGULARITY_ENVIRONMENT - - apt-get -y install gnupg - apt-get -y install software-properties-common apt-utils - apt-get -y update - - apt-get -y upgrade - apt-get install -y vim nano less - - apt-get -y install libcurl4-openssl-dev - apt-get -y install libssl-dev - apt-get -y install libxml2-dev - apt-get -y install pandoc - apt-get -y install texlive-latex-base - apt-get -y install texlive-fonts-recommended - apt-get -y install texlive-fonts-extra - - apt-get -y install texlive-latex-extra - apt-get -y install curl wget - apt-get -y install libfontconfig1-dev - apt-get -y install pip - pip install mkdocs - pip install pandas - pip install markdown-include - pip install mkdocstrings - - apt-get -y install r-base - - R --slave -e 'install.packages("rmarkdown", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("kableExtra", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("knitr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("forcats", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("stringr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("dplyr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("purrr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("readr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("tidyr", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("rlang", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("tibble", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("tidyverse", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("ggplot2", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("DT", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("readxl", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("rmdformats", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("plotly", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("reshape2", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'install.packages("cowplot", repos="https://ftp.acc.umu.se/mirror/CRAN/")' - R --slave -e 'tinytex::install_tinytex(force = TRUE)' \ No newline at end of file diff --git a/envs/requirements.txt b/envs/requirements.txt new file mode 100644 index 0000000..434d10b --- /dev/null +++ b/envs/requirements.txt @@ -0,0 +1,4 @@ +## Requirements +pyyaml==6.0.1 +pandas==2.1.2 +tabulate==0.9.0 diff --git a/main.py b/main.py new file mode 100644 index 0000000..3b6d960 --- /dev/null +++ b/main.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import argparse +import os +from src.classes import MkDocsProfileGenerator +from src.config import Config +import shutil + +import argparse +import os + +class ArgumentParser: + """ + A command line argument parser for a pipeline documentation tool. + + Args: + config (object): An object containing configuration values for the pipeline documentation tool. + + Attributes: + config (object): An object containing configuration values for the pipeline documentation tool. + parser (argparse.ArgumentParser): An instance of the argparse.ArgumentParser class. + + Methods: + setup_arguments(): Adds command line arguments to the parser. + parse_args(): Parses command line arguments and returns the result. + """ + + def __init__(self, config=None): + self.config = config + self.parser = argparse.ArgumentParser(description="Your script description") + self.setup_arguments() + + def setup_arguments(self): + self.parser.add_argument("-e", "--example", action="store_true", help="Create example_command.sh") + self.parser.add_argument("-g", "--github", default=self.config.GITHUB_URL, help="GitHub URL to your project/pipeline") + self.parser.add_argument("-m", "--mkdocs_config", default=self.config.MKDOCS_YAML, help="project directory") + self.parser.add_argument("-p", "--project_dir", default=os.getcwd(), help="project directory") + self.parser.add_argument("-r", "--readthedocs_config", default=self.config.READTHEDOCS_YAML, help="project directory") + self.parser.add_argument("-t", "--tool_descriptions", default=self.config.TOOL_DESCRIPTIONS, help="Tool Description Mapping file") + self.parser.add_argument("-y", "--yaml", default=self.config.PIPELINE_YAML, help="Full path to the pipeline YAML file") + self.parser.add_argument("-v", "--version", action="store_true", help="Print version") + + def parse_args(self): + return self.parser.parse_args() + +def main(): + """ + Entry point of the script. Parses command line arguments and generates documentation or other tasks based on the arguments. + + Args: + None + + Returns: + None + """ + cfg = Config() + arg_parser = ArgumentParser(cfg) + args = arg_parser.parse_args() + + # Update the config with the arguments + cfg.mkdocs = args.mkdocs_config + cfg.readthedocs = args.readthedocs_config + mkdocs_config = cfg.load_mkdocs() + # readthedocs_config = cfg.load_readthedocs() + + if args.example: + # Create example_command.sh + example_command = f"python {__file__} -g {args.github} -y {args.yaml}" + with open("example_command.sh", "w") as example_file: + example_file.write(f"{example_command}\n") + print(f"Created example_command.sh: {example_command}") + elif args.version: + print("Your script version") + else: + # Implement the logic for generating documentation or other tasks based on the arguments + # Create the MkDocs profile + mkdocs_profile = MkDocsProfileGenerator(args.project_dir, args.yaml, args.tool_descriptions, mkdocs_config) + mkdocs_profile.generate() + + shutil.copyfile(cfg.readthedocs, os.path.join(args.project_dir, ".readthedocs.yaml")) + shutil.copyfile(cfg.PYTHON_REQUIREMENTS, os.path.join(args.project_dir, "docs/requirements.txt")) + shutil.copyfile(cfg.EXTRA_CSS, os.path.join(args.project_dir, "docs/extra.css")) + + +if __name__ == "__main__": + main() diff --git a/mkdocs.yml b/mkdocs.yml index 4472586..26d814e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,45 +1,44 @@ -site_name: pipeline_documentation -site_url: https://github.com/ramsainanduri/pipeline_documentation.git - -github_url: https://github.com/ramsainanduri/pipeline_documentation.git - +site_name: Pipeline Documentation +site_url: https://github.com/ramsainanduri/pipeline_documentation +github_url: https://github.com/ramsainanduri/pipeline_documentation +nav: +- Home: index.md +- Introduction: Introduction.md +- Scope: Scope.md +- Min_requirements: Min_requirements.md +- Usage: Usage.md +- Pipeline Components: Pipeline Components.md +- Profiles: + - Myeloid: profiles/Myeloid.md + - Solid: profiles/Solid.md +- Workflow Diagram: Workflow Diagram.md theme: name: readthedocs logo: https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/rs-logo-color.svg highlightjs: true hljs_languages: - - yaml - - rust + - yaml + - rust plugins: - - search: - indexing: 'full' - - mkdocstrings: - handlers: - # See: https://mkdocstrings.github.io/python/usage/ - python: - options: - docstring_style: sphinx +- search: + indexing: full +- mkdocstrings: + handlers: + python: + options: + docstring_style: sphinx markdown_extensions: - - toc: - permalink: True - - markdown_include.include: - base_path: . - - admonition - +- abbr +- attr_list +- pymdownx.details +- toc: + permalink: true +- markdown_include.include: + base_path: . +- admonition extra: generator: false - -nav: - - 'Pipeline Information': 'index.md' - - Introduction: 'introduction.md' - - Scope: 'scope.md' - - 'Input Data': 'input_data.md' - - 'Output Data': 'output_data.md' - - 'Minimum Requirements': 'min_requirements.md' - - Usage: 'usage.md' - - 'Pipeline Components': 'pipeline_components.md' - - 'Software Stack': 'software_stack.md' - - Profiles: 'profiles.md' - - Workflow: 'workflow.md' - -copyright: Copyright © 2023 | Center for Molecular Diagnostics (CMD) | Region Skåne | Sölvegatan 23B, Lund. +extra_css: +- extra.css +copyright: "Copyright © 2023 | Center for Molecular Diagnostics (CMD) | Region\ + \ Sk\xE5ne | S\xF6lvegatan 23B, Lund." diff --git a/pipeline_documentation.html b/pipeline_documentation.html deleted file mode 100644 index a0b1d1a..0000000 --- a/pipeline_documentation.html +++ /dev/null @@ -1,3314 +0,0 @@ - - - - - - - - - - - GMS-Myeloid Pipeline - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -

GMS-Myeloid Pipeline

- - - - - - - - -
-
-

Pipeline Details

-

Name: GMS-Myeloid Pipeline

-

Version: v1.0

-

Contact Person: Ram

-

Location: /home/ramsainanduri/Pipelines/Validation_Documents/mkdocs_test

-
-
-

Description

-

This is Mk Docs pipeline. Test follows…

-
-
-

Minimum Requirements

-

Nextflow version: 19

-
-
-

List of Software Used

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Info - -Description -
-Name - -Pipeline Documentation -
-Version - -v1.1.0 -
-Author - -Ram Sai Nanduri -
-Author Email - - -
-Git Repo - -https://github.com/ramsainanduri/pipeline_documentation -
-Server Location - -/data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation -
-
-
-

2 Introduction

-

This automatic documentation generation is a time-saving tool for developers and teams, as it eliminates the need to manually create and maintain documentation. It also helps ensure that documentation is up-to-date and consistent, as changes made to the pipeline.yaml file used for the document updation in a simple and easy way.

-

The pipeline.yaml file contains all the relevant information about the pipeline or tool, including the description, inputs, outputs, parameters, and usage. This information is used to generate the HTML and MD documents, which provide clear and detailed information about the pipeline or tool.

-

The HTML document is visually appealing and easy to navigate, with links to different sections and a search bar for quickly finding specific information. The MD document is plain text, but can be formatted with Markdown syntax for a more readable and structured format. The MD document can be uploaded to a readthedocs server for online documentation. It uses the mkdocs format, with the required “docs” folder and related files in the project root folder.

-

Additionally, the generated documentation also includes a table of contents for easy navigation, and sections for examples.

-

Overall, this repo helps improve the documentation process for pipelines and tools, making it easier for others to understand and use them.

-
-
-

3 Scope

-
-

3.1 Purpose

-

The program aims to simplify the process of creating and maintaining documentation for software projects using MkDocs and Read the Docs.

-
-
-

3.2 Audience

-

The program is designed for developers, technical writers, and project managers who want to create high-quality documentation with minimal effort.

-
-
-

3.3 Functionality

-

The program will automatically generate documentation for a software project using the MkDocs framework, which allows for the creation of user-friendly, responsive documentation sites. The documentation site will be hosted on Read the Docs, which provides a robust platform for hosting and managing documentation.

-
-
-

3.4 Features

-

The program will include the following features: - Automated creation of documentation using MkDocs - Integration with Read the Docs for hosting and management of documentation sites - Customizable templates for documentation sites - Support for multiple documentation versions and languages - Integration with source control systems such as Git

-
-
-

3.5 Technology Stack

-

The program will be developed using Python and will use the MkDocs and Read the Docs APIs to generate and host documentation sites.

-
-
-

3.6 Limitations

-

The program may be limited by the capabilities of MkDocs and Read the Docs, and may not be suitable for projects with complex documentation requirements.

-
-
-

3.7 Maintenance And Support

-

The program will be maintained and supported by the development team, who will provide regular updates and bug fixes. Documentation and user support will also be provided.

-
-
-

3.8 Expected Outcomes

-

The program is expected to simplify the process of creating and maintaining documentation, reduce the amount of time and effort required to create documentation sites, and improve the overall quality of documentation for software projects.

-
-
-
-

4 Input Data

-

The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV file that includes metadata. Below is an example of the CSV file format that is expected, along with a detailed description of each column.

-

Table: Example input data csv file format

- - - - - - - - - - - - - - - - - - - - - -
-sample_id - -type - -assay - -platform - -read1 - -read2 -
-example - -template - -documentation - -mkdocs - -example.1.fastq.gz - -example.2.fastq.gz -
-

Table: Column description for input data csv file

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Column Name - -Description -
-sample_id - -Text representating the name or id of the sample being analysed -
-type - -Type of the sample, eg. tumor or normal -
-assay - -Assay of the sample, eg. tumorWGS, myeloid, solidtumor etc -
-platform - -Name of the paltform used for sequencing, eg. illumina -
-read1 - -Full path to the read 1 fastq file -
-read2 - -Full path to the read 2 fastq file -
-
-
-

5 Output Data

-

This pipeline spits out various files from different process, the important once are given below with a brief descripton

-

Table: Output files and their description

- - - - - - - - - - - - - - - - - - - - - - - - - -
-Output File Type - -Description -
-BAM - -A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome -
-VCF - -A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. -
-HTML_Report - -An HTML documentation report is a text-based file format used to present information in a web browser, including text, images, and hyperlinks, typically used for displaying project documentation and results -
-Markdown_Files - -A Markdown file is a lightweight markup language used to format and structure plain text documents, often used for creating documentation, README files, and notes -
-
-
-

6 Minimum Requirements

-

Configs: pipeline.yaml, list_of_softwares.tsv

-

Os: POSIX based

-

Cpus: 1

-

Singularity: >= 3.8.0

-
-
-

7 Usage

-

To generate offline HTML documentation and an online Markdown(MD) file in your project folder, clone the repo and execute the bash script bin/runMe.sh.

-

To learn how to parse parameters for the script, run,

-

bin/runMe.sh -h

-

To create an example script using the provided template files in the repo location, run,

-

bin/runMe.sh -e

-
-
-

8 Pipeline Components

-

To automate the documentation for a pipeline, the following pipeline components are included, #Can have specific keys based on the pipeline, free free to modify, add, delete keys in this section depending on the pipeline

-
-

8.1 Data Retrieval

-

This step involves retrieving data related to the pipeline, such as the code, input data, and output data.

-
-
-

8.2 Parsing

-

This step involves parsing the code and the input and output data to extract relevant information, such as the pipeline components, their parameters, and their inputs and outputs.

-
-
-

8.3 Template Generation

-

This step involves generating a template for the documentation based on the parsed information. The template should include sections for the pipeline components, their descriptions, their parameters, and their inputs and outputs.

-
-
-

8.4 Documentation Generation

-

This step involves generating the actual documentation by populating the template with the parsed information. The documentation should be generated in a format that is easily readable and accessible, such as HTML or PDF.

-
-
-

8.5 Version Control

-

This step involves using version control software, such as Git, to track changes to the documentation over time, and to maintain a history of the documentation.

-
-
-

8.6 Workflow Management

-

This step involves using workflow management software, such as Snakemake or Nextflow, to automate the pipeline components and manage dependencies between the components.

-
-
-

8.7 Maintenance And Support

-

This step involves maintaining and supporting the documentation over time, including regular updates and bug fixes.

-
-
-
-

9 Software Stack

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Process - -Tool - -Version - -Type - -Container_Name - -Description - -Contact_Person -
-bwa_umi - -Sentieon BWA - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - -
    -
- -Sentieon UMI - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - - -
    -
- -Sentieon UTIL - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - - -
    -
- -SAMtools - -v1.9 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Tools (written in C using htslib) for manipulating next-generation sequencing data - -https://github.com/samtools/samtools/issues -
-bwa_align - -Sentieon BWA-MEM - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - - -
    -
- -BWA-MEM - -v0.7.17-r1188 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Read alignment and mapping - -https://github.com/lh3/bwa/issues -
-markdup - -Sentieon DRIVER - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - - -
    -
-lowcov - -Sambamba - -v0.8.0 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Tools for working with SAM/BAM data - -https://github.com/biod/sambamba/issues -
-freebayes - -freebayes - -v1.6.0 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Bayesian haplotype-based polymorphism discovery. - -https://github.com/freebayes/freebayes/issues -
-vardict - -VarDict - -v - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants. - -https://github.com/AstraZeneca-NGS/VarDict/issues -
-tnscope - -Sentieon DRIVER - -v202010.01 - -Commercial - -SomaticPanelPipeline_2021-06-24.sif - -Sentieon develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. - - -
    -
-pindel - -Pindel - -v0.2.5b9, 20160729 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads. - - -
-concatenate_vcfs - -VCFtools - -v0.1.16 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project. - -https://github.com/vcftools/vcftools/issues -
- -Vt - -v0.5 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -vt is a variant tool set that discovers short variants from Next Generation Sequencing data. - -Adiran () -
-cnvkit - -CNVkit - -v0.9.9 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Genome-wide copy number from high-throughput sequencing - -https://github.com/etal/cnvkit/issues -
-gene_plot - -CNVkit - -v0.9.5 - -Open-Source - -SomaticPanelPipeline_2021-06-24.sif - -Genome-wide copy number from high-throughput sequencing - -https://github.com/etal/cnvkit/issues -
-annotate_vep - -VEP - -v103.1 - -Open-Source - -ensembl-vep_release_103.sif - -The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants - -https://github.com/Ensembl/ensembl-vep/issues -
- -
-
- - - - - - - - - - - - - - - diff --git a/src/classes.py b/src/classes.py new file mode 100644 index 0000000..0afd6b1 --- /dev/null +++ b/src/classes.py @@ -0,0 +1,305 @@ +#!!/usr/bin/env python3 + +import os +import yaml +import shutil +import pandas as pd +from tabulate import tabulate + +class FileParser: + """ + A class that provides methods to parse different types of files and convert them to various formats. + + Attributes: + - file_extension_map (dict): A dictionary that maps file extensions to corresponding file reading and formatting functions. + - tool_desc_columns (list): A list of column names for tool descriptions. + - tool_col_headers (list): A list of column names for tool versions. + + Methods: + - parse(file_path, tool_descriptions=False): Parses the file at the given file path and returns a DataFrame or a subset of it based on the file extension. + - yaml_file(file_path): Reads a YAML file and returns its contents as a dictionary. + - read_file(file_path, sep='\t', tool_descriptions=False): Reads a CSV or TSV file and returns its contents as a DataFrame or a subset of it based on the tool_descriptions flag. + - create_df_from_dict(dict_data, columns=None): Converts a dictionary to a DataFrame. + - convert_df_to_markdown(df): Converts a DataFrame to a Markdown table. + - convert_df_to_html(df): Converts a DataFrame to an HTML table. + - read_image_file(file_path): Reads an image file and returns a Markdown image link. + - markdown_table(header, body): Creates a Markdown table from a header and a body. + - write_html_table_to_markdown(md_file, html_table, css_class='table table-striped table-bordered table-hover table-condensed table-responsive'): Writes an HTML table to a Markdown file with custom CSS styles. + - map_tool_descriptions(versions_dict, mapping_df): Maps tool descriptions to tool versions and returns a DataFrame. + """ + def __init__(self): + self.file_extension_map = { + '.txt': self.read_file, + '.tsv': self.read_file, + '.tab': self.read_file, + '.csv': self.read_file, + '.png': self.read_image_file, + '.jpg': self.read_image_file, + '.jpeg': self.read_image_file, + '.gif': self.read_image_file, + '.svg': self.read_image_file, + '.yaml': self.yaml_file, + '.yml': self.yaml_file + } + self.tool_desc_columns = ['Tool', 'URL', 'External_Contact_Person'] + self.tool_col_headers = ['Process', 'Tool', 'Version', 'External_Contact_Person'] + + def parse(self, file_path, tool_descriptions=False): + # Determine the file extension + file_extension = os.path.splitext(file_path)[-1].lower() + + # Check if the file extension is supported + if file_extension in self.file_extension_map: + if file_extension == '.csv': + # Call the corresponding file reading and formatting function + return self.file_extension_map[file_extension](file_path, sep=',', tool_descriptions=tool_descriptions) + elif file_extension in ['.yaml', '.yml']: + # Call the corresponding file reading and formatting function + return self.file_extension_map[file_extension](file_path) + else: + # Call the corresponding file reading and formatting function + return self.file_extension_map[file_extension](file_path, tool_descriptions=tool_descriptions) + else: + # Handle unsupported file types or extensions + return f"Unsupported file type: {file_extension}" + + def yaml_file(self, file_path): + with open(file_path, 'r') as yaml_data: + return yaml.load(yaml_data, Loader=yaml.SafeLoader) + + def read_file(self, file_path, sep='\t', tool_descriptions=False): + # Read the file content + df = pd.read_csv(file_path, sep=sep, header=0) + + if tool_descriptions: + subset_df = df.loc[:, self.tool_desc_columns] + return subset_df + else: + return df + + def create_df_from_dict(self, dict_data, columns=None): + if not columns: + columns = ['Key', 'Value'] + df = pd.DataFrame(dict_data.items(), columns=columns) + return df + + def convert_df_to_markdown(self, df): + # Convert the DataFrame to a Markdown table + markdown_table = tabulate(df, tablefmt='pipe', headers='keys', showindex=False) + return markdown_table + + def convert_df_to_html(self, df): + # Convert the DataFrame to a Markdown table + html_table = df.to_html(escape=False, index=False) + return html_table + + def read_image_file(self, file_path): + # Return a markdown image link for an image file + base_name = os.path.basename(file_path) + return f'![{base_name}]({file_path})\n\n' + + def markdown_table(self, header, body): + table = f"| {' | '.join(header)} |\n" + table += f"| {' | '.join(['---'] * len(header))} |\n" + for row in body: + table += f"| {' | '.join(row)} |\n" + return table + + def write_html_table_to_markdown(self, md_file, html_table, css_class='table table-striped table-bordered table-hover table-condensed table-responsive'): + # Write the HTML code with custom CSS styles + md_file.write(f'
\n') + md_file.write(f"{html_table}\n") + md_file.write('
\n') + + + def map_tool_descriptions(self, versions_dict, mapping_df): + # Convert the dictionary into a list of dictionaries + versions_list = [] + for process, tools in versions_dict.items(): + for tool, version in tools.items(): + if str(version).startswith('v') or str(version).startswith('V'): + continue + else: + version = f"v{version}" + versions_list.append({'Process': process, 'Tool': tool, 'Version': version}) + df_versions = pd.DataFrame(versions_list) + + # Merge the two DataFrames based on the "Tool" column + result_df = pd.merge(df_versions, mapping_df, on='Tool', how='left') + + # Fill any NaN values in the "Process" column with an empty string + result_df['Process'].fillna('', inplace=True) + + # Format the "Tool" column as a link + #result_df['Tool'] = result_df.apply(lambda row: f'[{row["Tool"]}]({row["URL"]})', axis=1) + result_df['Tool'] = result_df.apply(lambda row: f'{row.Tool}', axis=1) + + # Remove duplicate names in the "Process" column and replace them with empty values + result_df['Process'] = result_df['Process'].where(~result_df['Process'].duplicated(), '') + + # Replace NaN values with "-" + result_df.fillna('-', inplace=True) + + # Reorder the columns as per your desired output + result_df = result_df[self.tool_col_headers] + + return result_df + + +class MkDocsProfileGenerator(FileParser): + """ + A class for generating a MkDocs project structure and configuration based on a YAML input file. + + Args: + project_dir (str): The path to the directory where the MkDocs project will be created. + input_yaml_file (str): The path to the YAML input file. + tool_descriptions (str, optional): The path to a CSV file containing tool descriptions. Defaults to None. + mkdocs_cfg (dict, optional): A dictionary containing additional configuration settings for the MkDocs project. Defaults to None. + + Attributes: + project_dir (str): The path to the directory where the MkDocs project will be created. + content_dir (str): The path to the directory where the MkDocs project content will be stored. + input_yaml_file (str): The path to the YAML input file. + tool_descriptions (str): The path to a CSV file containing tool descriptions. + tools_df (pandas.DataFrame): A pandas DataFrame containing the tool descriptions. + mkdocs_cfg (dict): A dictionary containing additional configuration settings for the MkDocs project. + nav (list): A list containing the navigation structure for the MkDocs project. + + Methods: + generate_section_files(section_type, section_data): Generates the markdown files for a given section type and section data. + generate(): Generates the MkDocs project structure and configuration. + """ + def __init__(self, project_dir, input_yaml_file, tool_descriptions=None, mkdocs_cfg=None): + super().__init__() + self.project_dir = project_dir + self.content_dir = os.path.join(project_dir, 'docs') + self.input_yaml_file = input_yaml_file + self.tool_descriptions = tool_descriptions + self.tools_df = self.parse(tool_descriptions, tool_descriptions=True) + self.mkdocs_cfg = mkdocs_cfg + # Initialize the nav structure + self.nav = [{'Home': 'index.md'}] + + def generate_section_files(self, section_type, section_data): + + section_nav = [] + # Create the markdown files for each section + if section_type == 'profiles': + self.content_dir = os.path.join(self.project_dir, 'docs/profiles') + if not os.path.exists(self.content_dir): + os.makedirs(self.content_dir) + heading_level = 2 + else: + self.content_dir = os.path.join(self.project_dir, 'docs') + heading_level = 1 + + for section in section_data: + section_name = section.get('name') + section_path = os.path.join(self.content_dir, f"{section_name}.md") + if section_type == 'profiles': + section_nav.append({section_name: f"profiles/{section_name}.md"}) + else: + section_nav.append({section_name: f"{section_name}.md"}) + + with open(section_path, 'w') as section_file: + section_file.write(f"{'#' * heading_level} {section_name}\n\n") + headings = section.get('headings', []) + for heading in headings: + heading_name = heading.get('name') + heading_type = heading.get('type') + heading_content = heading.get('content') + + if heading_name and heading_type: + section_file.write(f"\n{'#' * (heading_level + 1)} {heading_name}\n\n") + + if heading_type == 'text': + section_file.write(f'{heading_content}\n\n') + + elif heading_type == 'list': + for item in heading_content: + section_file.write(f'- {item}\n') + section_file.write('\n') + + elif heading_type in ['dictionary', 'dict']: + for key, value in heading_content.items(): + section_file.write(f'- **{key}:** {value}\n') + section_file.write('\n') + + elif heading_type == 'table': + dict_df = self.create_df_from_dict(heading_content) + dict_table = self.convert_df_to_html(dict_df) + self.write_html_table_to_markdown(section_file, dict_table) + + elif heading_type == 'image': + section_file.write(self.read_image_file(heading_content)) + + elif heading_type == 'file': + file_df = self.parse(heading_content) + file_table = self.convert_df_to_html(file_df) + self.write_html_table_to_markdown(section_file, file_table) + + elif heading_type == 'versions': + versions_df = self.parse(heading_content) + if section_type == 'profiles': + versions_mapped_df = self.map_tool_descriptions(versions_df, self.tools_df) + else: + versions_mapped_df = versions_df + versions_table = self.convert_df_to_html(versions_mapped_df) + self.write_html_table_to_markdown(section_file, versions_table) + + if section_type == 'profiles': + self.nav.append({'Profiles': section_nav}) + else: + for elem in section_nav: + self.nav.append(elem) + + print(f"Created {section_type} section files.") + + + def generate(self): + if not os.path.exists(self.project_dir): + os.makedirs(self.project_dir) + + if os.path.exists(self.content_dir): + shutil.rmtree(self.content_dir) + + os.makedirs(self.content_dir) + + + pipeline_data = self.parse(self.input_yaml_file) + + for id in pipeline_data.keys(): + if id == 'pipeline': + # Create the index.md file + with open(os.path.join(self.content_dir, 'index.md'), 'w') as index_file: + index_file.write(f"## Pipeline Description\n\n") + info = pipeline_data.get('pipeline', {}).get('info', {}) + + if info: + info_df = self.create_df_from_dict(info, ['Info', 'Description']) + info_table = self.convert_df_to_html(info_df) + self.write_html_table_to_markdown(index_file, info_table) + else: + index_file.write(f"No description provided.\n\n") + else: + section_data = pipeline_data.get(id, []) + self.generate_section_files(id, section_data) + + # Create the mkdocs.yml file and append the nav structure with the config settings + + mkdocs_final = { + 'site_name': pipeline_data.get('pipeline', {}).get('info', {}).get('name', 'Documentation Site'), + 'site_url': pipeline_data.get('pipeline', {}).get('info', {}).get('git_repo', ''), + 'github_url': pipeline_data.get('pipeline', {}).get('info', {}).get('git_repo', ''), + 'nav': self.nav + } + + if self.mkdocs_cfg: + mkdocs_final.update(self.mkdocs_cfg) + + mkdocs_yml_path = os.path.join(self.project_dir, 'mkdocs.yml') + with open(mkdocs_yml_path, 'w') as mkdocs_yml_file: + yaml.dump(mkdocs_final, mkdocs_yml_file, default_flow_style=False, sort_keys=False, indent=2) + + print("MkDocs project structure and configuration created.") + diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..ffe0ff8 --- /dev/null +++ b/src/config.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import yaml + +class Config: + """ + A class for managing configuration settings for the pipeline documentation project. + + Attributes: + MKDOCS_YAML (str): The file path for the default mkdocs.yaml configuration file. + READTHEDOCS_YAML (str): The file path for the default readthedocs.yaml configuration file. + TOOL_DESCRIPTIONS (str): The file path for the tool descriptions TSV file. + PIPELINE_YAML (str): The file path for the pipeline YAML template file. + GITHUB_URL (str): The URL for the GitHub repository for the pipeline documentation project. + GITHUB_PAGES_YAML (str): The file path for the default GitHub Pages configuration file. + PYTHON_REQUIREMENTS (str): The file path for the Python requirements file. + EXTRA_CSS (str): The file path for the extra CSS file. + + Methods: + __init__(self, mkdocs=None, readthedocs=None): Initializes a new Config instance with optional mkdocs and readthedocs file paths. + load_mkdocs(self): Loads the mkdocs configuration file and returns its contents as a dictionary. + load_readthedocs(self): Loads the readthedocs configuration file and returns its contents as a dictionary. + load_yaml(self, file_path): Loads a YAML file at the specified file path and returns its contents as a dictionary. + """ + + MKDOCS_YAML = 'configs/default.mkdocs.yml' + READTHEDOCS_YAML = 'configs/default.readthedocs.yaml' + TOOL_DESCRIPTIONS = 'static/files/tool_descriptions.tsv' + PIPELINE_YAML = 'static/templates/pipeline.yaml' + GITHUB_URL = 'https://github.com/ramsainanduri/pipeline_documentation' + GITHUB_PAGES_YAML = 'configs/default.github.pages.yaml' + PYTHON_REQUIREMENTS = 'static/files/requirements.txt' + EXTRA_CSS = 'static/css/extra.css' + + def __init__(self, mkdocs=None, readthedocs=None): + """ + Initializes a new Config instance with optional mkdocs and readthedocs file paths. + + Args: + mkdocs (str, optional): The file path for the mkdocs configuration file. Defaults to None. + readthedocs (str, optional): The file path for the readthedocs configuration file. Defaults to None. + """ + self.mkdocs = mkdocs + self.readthedocs = readthedocs + + def load_mkdocs(self): + """ + Loads the mkdocs configuration file and returns its contents as a dictionary. + + Returns: + dict: The contents of the mkdocs configuration file as a dictionary. + """ + if self.mkdocs: + return self.load_yaml(self.mkdocs) + else: + return None + + def load_readthedocs(self): + """ + Loads the readthedocs configuration file and returns its contents as a dictionary. + + Returns: + dict: The contents of the readthedocs configuration file as a dictionary. + """ + if self.readthedocs: + return self.load_yaml(self.readthedocs) + else: + return None + + def load_yaml(self, file_path): + """ + Loads a YAML file at the specified file path and returns its contents as a dictionary. + + Args: + file_path (str): The file path for the YAML file to load. + + Returns: + dict: The contents of the YAML file as a dictionary. + """ + with open(file_path, 'r') as config_file: + return yaml.load(config_file, Loader=yaml.FullLoader) + diff --git a/static/css/extra.css b/static/css/extra.css new file mode 100644 index 0000000..af60c18 --- /dev/null +++ b/static/css/extra.css @@ -0,0 +1,63 @@ +.wy-nav-content { + max-width: 80% !important; +} + +h1,h2 { + margin-top: 50px; + margin-bottom: 10px !important; +} + +h3 { + font-size: 18px; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +h4 { + font-size: 17px; + font-style: italic; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +h5 { + font-size: 16px; + font-style: italic; + margin-top: 30px; + margin-bottom: 10px !important; + +} + +p { + margin-top: 5px !important; +} + +* { + margin-bottom: 0px !important; +} + +table tr { + text-align: left !important; +} + +table td { + min-width: 10px; + max-width: 800px; + text-align: left !important; +} + +table td:last-child { + width: 100%; +} + +hr { + margin-top: 50px; + border: 1px solid black; +} + +.twemoji { + width: 20px; + color: black; +} \ No newline at end of file diff --git a/static/files/requirements.txt b/static/files/requirements.txt new file mode 100755 index 0000000..2a728c4 --- /dev/null +++ b/static/files/requirements.txt @@ -0,0 +1,25 @@ +click==8.1.3 +ghp-import==2.1.0 +griffe==0.22.0 +importlib-metadata==4.12.0 +jinja2<3.1.0 +markdown==3.3.7 +markdown-include==0.6.0 +markupsafe==2.1.1 +mergedeep==1.3.4 +mkdocs==1.4.2 +mkdocs-autorefs==0.4.1 +mkdocstrings[python]==0.19.0 +mkdocstrings-python==0.7.1 +packaging==21.3 +pyparsing==3.0.9 +python-dateutil==2.8.2 +pyyaml==6.0 +pyyaml-env-tag==0.1 +six==1.16.0 +watchdog==2.1.9 +pymdown-extensions==9.10 +mkdocs-schema-reader==0.11.1 +mkdocs-simple-hooks==0.1.5 +mdx_spanner==0.0.5 +mkdocs-yaml-schema-plugin==0.2.3 \ No newline at end of file diff --git a/static/files/tool_descriptions.tsv b/static/files/tool_descriptions.tsv new file mode 100755 index 0000000..0101d62 --- /dev/null +++ b/static/files/tool_descriptions.tsv @@ -0,0 +1,60 @@ +Tool Type URL External_Contact_Person Short_Description Reference +annotsv Open-Source https://github.com/lgmgeo/AnnotSV https://github.com/lgmgeo/AnnotSV/issues Annotation and Ranking of Structural Variation 10.1093/bioinformatics/bty304 +bcftools Open-Source https://github.com/samtools/bcftools https://github.com/samtools/bcftools/issues BCFtools is a program for variant calling and manipulating files in the Variant Call Format (VCF) and its binary counterpart BCF. - +bedtools Open-Soource https://github.com/arq5x/bedtools2 https://github.com/arq5x/bedtools2/issues - - +bgzip Open-Soource https://github.com/samtools/htslib https://github.com/samtools/htslib/issues - - +bwa Open-Source https://github.com/lh3/bwa https://github.com/lh3/bwa/issues Read alignment and mapping https://doi.org/10.48550/arXiv.1303.3997 +bwa-mem Open-Source https://github.com/lh3/bwa https://github.com/lh3/bwa/issues Read alignment and mapping https://doi.org/10.48550/arXiv.1303.3997 +cadd Open-Source https://github.com/kircherlab/CADD-scripts https://github.com/kircherlab/CADD-scripts/issues CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. 10.1038/ng.2892 +cdm Open-Soource https://github.com/Clinical-Genomics-Lund/cmd-data-management - Quality control reporting for CMD lab - +cnvkit Open-Source https://cnvkit.readthedocs.io/en/stable/ https://github.com/etal/cnvkit/issues Genome-wide copy number from high-throughput sequencing https://doi.org/10.1371/journal.pcbi.1004873 +coyote Open-Soource https://github.com/Clinical-Genomics-Lund/coyote - Presentation and intreprtation of variants - +delly Open-Source https://github.com/dellytools/delly https://github.com/dellytools/delly/issues Structural variant discovery by integrated paired-end and split-read analysis https://doi.org/10.1093/bioinformatics/bts378 +eklipse Open-Source https://github.com/dooguypapua/eKLIPse https://github.com/dooguypapua/eKLIPse/issues Sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data https://doi.org/10.1038/s41436-018-0350-8 +fastp Open-Source https://github.com/OpenGene/fastp https://github.com/OpenGene/fastp/issues - - +freebayes Open-Source https://github.com/freebayes/freebayes https://github.com/freebayes/freebayes/issues Bayesian haplotype-based polymorphism discovery. https://doi.org/10.48550/arXiv.1207.390 +gatk Open-Source https://gatk.broadinstitute.org/hc/en-us https://github.com/broadinstitute/gatk/issues Variant Discovery in High-Throughput Sequencing Data http://dx.doi.org/10.1101/gr.107524.110 +gatk4 Open-Source https://gatk.broadinstitute.org/hc/en-us https://github.com/broadinstitute/gatk/issues Variant Discovery in High-Throughput Sequencing Data http://dx.doi.org/10.1101/gr.107524.110 +genmod Open-Source https://github.com/Clinical-Genomics/genmod https://github.com/Clinical-Genomics/genmod/issues Annotate models of genetic inheritance patterns in variant files 10.5281/zenodo.591885 +gunzip Open-Source https://www.gnu.org/software/gzip/ bug-gzip@gnu.org GNU Gzip is a popular data compression program - +haplogrep Open-Source https://github.com/seppinho/haplogrep-cmd https://github.com/seppinho/haplogrep-cmd/issues HaploGrep - mtDNA haplogroup classification. Supporting rCRS and RSRS. https://doi.org/10.1093/nar/gkad284 +hmtnote Open-Source https://github.com/robertopreste/HmtNote https://github.com/robertopreste/HmtNote/issues Human mitochondrial variants annotation using HmtVar https://doi.org/10.1101/600619 +madeleine Open-Source https://github.com/piratical/Madeline_2.0_PDE https://github.com/piratical/Madeline_2.0_PDE/issues The Madeline 2.0 Pedigree Drawing Engine (PDE) is a pedigree drawing program designed to handle large and complex pedigrees with an emphasis on readability and aesthetics. - +manta Open-Source https://github.com/Illumina/manta https://github.com/Illumina/manta/issues Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.  https://doi.org/10.1093/bioinformatics/btv710 +melt Open-Source https://melt.igs.umaryland.edu/index.php - The Mobile Element Locator Tool (MELT): Perform transposon analysis https://doi.org/10.1101%2Fgr.218032.116 +merge-vcfs Open-Source https://github.com/broadinstitute/gatk https://github.com/broadinstitute/gatk/issues Combines multiple variant files into a single variant file https://gatk.broadinstitute.org/hc/en-us/articles/360037226612-MergeVcfs-Picard- +montage Open-Source https://github.com/CAG-CNV/MONTAGE https://github.com/CAG-CNV/MONTAGE/issues Mosaic CNV Detection Tool https://doi.org/10.1186/s12864-021-07395-7 +multiqc Open-Source https://multiqc.info/ https://github.com/ewels/MultiQC/issues Aggregate results from bioinformatics analyses across many samples into a single report. - +Nextflow Open-Soource https://www.nextflow.io/ https://github.com/nextflow-io/nextflow/issues - - +peddy Open-Source https://github.com/brentp/peddy https://github.com/brentp/peddy/issues "genotype :: ped correspondence check, ancestry check, sex check. directly, quickly on VCF" https://doi.org/10.1016/j.ajhg.2017.01.017 +ped-parser Open-Source https://github.com/moonso/ped_parser https://github.com/moonso/ped_parser/issues A python tool for parsing pedigree files - +perl Open-Soource https://www.perl.org/ - - - +pindel Open-Source https://github.com/genome/pindel kaiye@xjtu.edu.cn "Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads." https://doi.org/10.1093%2Fbioinformatics%2Fbtp394 +python Open-Soource https://www.python.org/ - - - +python3 Open-Soource https://www.python.org/ - - - +R Open-Source https://www.r-project.org/ https://www.r-project.org/ R is a free software environment for statistical computing and graphics. - +rename-sample-in-vcf Open-Source https://github.com/broadinstitute/gatk https://github.com/broadinstitute/gatk/issues This tool enables the user to rename a sample in either a VCF or BCF file https://gatk.broadinstitute.org/hc/en-us/articles/360040510011-RenameSampleInVcf-Picard- +reviewer Open-Source https://github.com/Illumina/REViewer https://github.com/Illumina/REViewer/issues A tool for visualizing alignments of reads in regions containing tandem repeats - +sambamba Open-Source https://github.com/biod/sambamba https://github.com/biod/sambamba/issues Tools for working with SAM/BAM data https://doi.org/10.1093/bioinformatics/btv098 +samtools Open-Source https://github.com/samtools/samtools https://github.com/samtools/samtools/issues Tools (written in C using htslib) for manipulating next-generation sequencing data https://doi.org/10.1093/bioinformatics/btp352 +sentieon Commercial https://support.sentieon.com/manual/ - "Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency." https://doi.org/10.1101/115717 +seqtk Open-Source https://github.com/lh3/seqtk https://github.com/lh3/seqtk/issues - - +smn-copy-number-caller Open-Source https://github.com/Illumina/SMNCopyNumberCaller https://github.com/Illumina/SMNCopyNumberCaller/issues A copy number caller for SMN1 and SMN2 to enable SMA diagnosis and carrier screening with WGS https://doi.org/10.1186/s13073-022-01085-z +snpEff Open-Source https://github.com/pcingola/SnpEff https://github.com/pcingola/SnpEff/issues SnpEff is a variant annotation and effect prediction tool. https://pcingola.github.io/SnpEff/adds/SnpEff_paper.pdf +SomaticPanelPipeline Open-Soource https://github.com/Clinical-Genomics-Lund/SomaticPanelPipeline https://github.com/Clinical-Genomics-Lund/SomaticPanelPipeline/issues - - +stranger Open-Source https://github.com/Clinical-Genomics/stranger https://github.com/Clinical-Genomics/stranger/issues Tool to annotate outfiles from ExpansionHunter with the pathologic implications of the repeat https://doi.org/10.5281/zenodo.4548873 +svdb Open-Source https://github.com/J35P312/SVDB https://github.com/J35P312/SVDB/issues "SVDB is a toolkit for constructing and querying structural variant databases. The databases are constructed using the output vcf files from structural variant callers such as TIDDIT, Manta, Fermikit or Delly. SVDB may also be used to merge SV vcf files from multiple callers or individuals." - +tabix Open-Soource https://github.com/samtools/htslib https://github.com/samtools/htslib/issues - - +tiddit Open-Source https://github.com/SciLifeLab/TIDDIT https://github.com/SciLifeLab/TIDDIT/issues TIDDIT - structural variant calling 10.12688/f1000research.11168.2 +upd Open-Source https://github.com/bjhall/upd https://github.com/bjhall/upd/issues Basic UPD caller - +vardict Open-Source https://github.com/AstraZeneca-NGS/VarDict https://github.com/AstraZeneca-NGS/VarDict/issues "VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants." https://doi.org/10.1093/nar/gkw227 +vcfanno Open-Source https://github.com/brentp/vcfanno https://github.com/brentp/vcfanno/issues annotate a VCF with other VCFs/BEDs/tabixed files https://doi.org/10.1186/s13059-016-0973-5 +vcffilter Open-Soource https://github.com/biopet/vcffilter https://github.com/biopet/vcffilter/issues - - +vcflib Open-Source https://github.com/vcflib/vcflib https://github.com/vcflib/vcflib/issues C++ library and cmdline tools for parsing and manipulating VCF files with python and zig bindings https://doi.org/10.1371/journal.pcbi.1009123 +vcftools Open-Source https://github.com/vcftools/vcftools https://github.com/vcftools/vcftools/issues "A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project." http://dx.doi.org/10.1093/bioinformatics/btr330 +vep Open-Source https://github.com/Ensembl/ensembl-vep https://github.com/Ensembl/ensembl-vep/issues The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants https://doi.org/10.1186/s13059-016-0974-4 +vt Open-Source https://genome.sph.umich.edu/wiki/Vt Adiran (atks@umich.edu) vt is a variant tool set that discovers short variants from Next Generation Sequencing data. https://doi.org/10.1093/bioinformatics/btv112 +vt-decompose Open-Soource https://genome.sph.umich.edu/wiki/Vt Adiran (atks@umich.edu) vt is a variant tool set that discovers short variants from Next Generation Sequencing data. https://doi.org/10.1093/bioinformatics/btv112 +vt-normalize Open-Soource https://genome.sph.umich.edu/wiki/Vt Adiran (atks@umich.edu) vt is a variant tool set that discovers short variants from Next Generation Sequencing data. https://doi.org/10.1093/bioinformatics/btv112 +yaml Open-Soource https://yaml.org/ - - - +yml Open-Soource https://yaml.org/ - - - diff --git a/templates/rs-logo-color.svg b/static/images/rs-logo-color.svg similarity index 100% rename from templates/rs-logo-color.svg rename to static/images/rs-logo-color.svg diff --git a/templates/template_logo.png b/static/images/template_logo.png similarity index 100% rename from templates/template_logo.png rename to static/images/template_logo.png diff --git a/templates/template_workflow.png b/static/images/template_workflow.png similarity index 100% rename from templates/template_workflow.png rename to static/images/template_workflow.png diff --git a/templates/template_input.csv b/static/profiles/myeloid/input.csv similarity index 100% rename from templates/template_input.csv rename to static/profiles/myeloid/input.csv diff --git a/static/profiles/myeloid/versions.yml b/static/profiles/myeloid/versions.yml new file mode 100644 index 0000000..41c68c2 --- /dev/null +++ b/static/profiles/myeloid/versions.yml @@ -0,0 +1,61 @@ +AGGREGATE_VCFS: + perl: 5.26.2 +ANNOTATE_VEP: + perl: 5.26.1 +BQSR_UMI: + sentieon: '202112' +BWA_UMI: + bwa: 0.7.17-r1188 + sentieon: '202112' +CNVKIT_BATCH: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_CALL: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_GENS: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_PLOT: + cnvkit: 0.9.9 + python: 3.7.1 +CONCATENATE_VCFS: + vcftools: 0.1.16 + vt-decompose: '0.5' + vt-normalize: '0.5' +CONTAMINATION: + perl: 5.28.1 +CUSTOM_DUMPSOFTWAREVERSIONS: + python: 3.11.0 + yaml: '6.0' +FILTER_FOR_CNV: + bedtools: 2.30.0 + bgzip: '1.12' + perl: 5.26.2 + tabix: '1.12' +FREEBAYES: + freebayes: 1.3.5 + perl: 5.26.2 + vcffilter: 1.0.2 +LOWCOV: + sambamba: 0.8.0 +MARKDUP: + sentieon: '202112' +MARK_GERMLINES: + perl: 5.26.2 +MERGE_GENS: + bedtools: 2.30.0 + bgzip: '1.12' + tabix: '1.12' +PON_FILTER: + perl: 5.26.2 +SENTIEON_QC: + sentieon: '202112' +TNSCOPE: + sentieon: '202010.01' +VARDICT: + perl: 5.26.2 + vardict: 1.8.2 +Workflow: + Nextflow: 23.04.2 + SomaticPanelPipeline: 1.0dev diff --git a/static/profiles/solid/input.csv b/static/profiles/solid/input.csv new file mode 100644 index 0000000..8e249c7 --- /dev/null +++ b/static/profiles/solid/input.csv @@ -0,0 +1,2 @@ +sample_id,type,assay,platform,read1,read2 +example,template,documentation,mkdocs,example.1.fastq.gz,example.2.fastq.gz \ No newline at end of file diff --git a/static/profiles/solid/versions.yml b/static/profiles/solid/versions.yml new file mode 100644 index 0000000..b675e02 --- /dev/null +++ b/static/profiles/solid/versions.yml @@ -0,0 +1,112 @@ +AGGREGATE_VCFS: + perl: 5.26.2 +ANNOTATE_VEP: + perl: 5.26.1 +BIOMARKERS_TO_JSON: + python: 3.9.2 +BQSR_UMI: + sentieon: '202112' +BWA_UMI: + bwa: 0.7.17-r1188 + sentieon: '202112' +CNVKIT2SCARHRD: + perl: 5.26.2 +CNVKIT_BACKBONE: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_BATCH: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_CALL: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_CALL_TC: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_EXONS: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_GENS: + cnvkit: 0.9.9 + python: 3.7.1 +CNVKIT_PLOT: + cnvkit: 0.9.9 + python: 3.7.1 +CONCATENATE_VCFS: + vcftools: 0.1.16 + vt-decompose: '0.5' + vt-normalize: '0.5' +CONTAMINATION: + perl: 5.28.1 +COYOTE_SEGMENTS: + perl: 5.26.2 +CUSTOM_DUMPSOFTWAREVERSIONS: + python: 3.11.0 + yaml: '6.0' +FFPE_PON_FILTER: + perl: 5.26.2 +FILTER_FOR_CNV: + bedtools: 2.30.0 + bgzip: '1.12' + perl: 5.26.2 + tabix: '1.12' +FILTER_MANTA: + perl: 5.26.2 +FILTER_MANTA_TUMOR: + perl: 5.26.2 +FREEBAYES: + freebayes: 1.3.5 + perl: 5.26.2 + vcffilter: 1.0.2 +GATK2VCF: + python: 3.9.2 +GATKCOV_BAF: + gatk4: 4.1.9.0-SNAPSHOT +GATKCOV_CALL: + gatk4: 4.1.9.0-SNAPSHOT +GATKCOV_COUNT: + gatk4: 4.1.9.0-SNAPSHOT +GENEFUSE: + genefuse: 0.8.0 +GENEFUSE_JSON_TO_VCF: + python: 3.9.2 +JOIN_FUSIONS: + svdb: 2.2.0 +JOIN_TUMOR: + svdb: 2.2.0 +LOWCOV: + sambamba: 0.8.0 +MANTA: + manta: 1.6.0 + python: 2.7.15 +MANTA_FUSIONS: + manta: 1.6.0 + python: 2.7.15 +MARKDUP: + sentieon: '202112' +MARK_GERMLINES: + perl: 5.26.2 +MERGE_GATK_TUMOR: + perl: 5.26.2 +MERGE_GENS: + bedtools: 2.30.0 + bgzip: '1.12' + tabix: '1.12' +MSISENSOR: + msisensor-pro: 1.2.0 +PON_FILTER: + perl: 5.26.2 +SCARHRD: + Rscript: 4.1.0 +SENTIEON_QC: + sentieon: '202112' +SEQTK: + seqtk: 1.3-r106 +SNPEFF: + snpEff: 4.3t +VARDICT: + perl: 5.26.2 + vardict: 1.8.2 +Workflow: + Nextflow: 23.04.2 + SomaticPanelPipeline: 1.0dev diff --git a/static/templates/pipeline.yaml b/static/templates/pipeline.yaml new file mode 100644 index 0000000..d877073 --- /dev/null +++ b/static/templates/pipeline.yaml @@ -0,0 +1,210 @@ +# This code uses version 1.2 of the YAML specification. +# YAML is a human-readable data serialization format that is often used for configuration files. +# For more information, see https://yaml.org/spec/1.2/spec.html + +# The pipeline documentation template consists of the following sections: +# - pipeline: Information about the pipeline, such as the name, version, and author +# - sections: A list of sections for the documentation, such as Introduction, Scope, Min_requirements, Usage, Pipeline Components, etc. +# - profiles: A list of profiles for the documentation, such as solid, and myeloid +# - workflow: A link to the workflow diagram for the pipeline + +pipeline: + logo: static/images/rs-logo-color.svg + info: + name: Pipeline Documentation + version: v1.1.0 + author: Ram Sai Nanduri + author_email: Ram.Nanduri@skane.se + git_repo: https://github.com/ramsainanduri/pipeline_documentation + +# The sections section consists of a list of sections for the documentation, such as Introduction, Scope, and Input Data, pipeline components, etc. +sections: + - name: Introduction + headings: + - name: Introduction + type: text + content: | + This automatic documentation generation is a time-saving tool for developers and teams, as it eliminates the need to manually create and maintain documentation. It also helps ensure that documentation is up-to-date and consistent, as changes made to the pipeline.yaml file used for the document updation in a simple and easy way. + + The pipeline.yaml file contains all the relevant information about the pipeline or tool, including the description, inputs, outputs, parameters, and usage. This information is used to generate the HTML and MD documents, which provide clear and detailed information about the pipeline or tool. + + The HTML document is visually appealing and easy to navigate, with links to different sections and a search bar for quickly finding specific information. The MD document is plain text, but can be formatted with Markdown syntax for a more readable and structured format. The MD document can be uploaded to a readthedocs server for online documentation. It uses the mkdocs format, with the required "docs" folder and related files in the project root folder. + + Additionally, the generated documentation also includes a table of contents for easy navigation, and sections for examples. + + Overall, this repo helps improve the documentation process for pipelines and tools, making it easier for others to understand and use them. + - name: Scope + headings: + - name: Purpose + type: text + content: | + The program aims to simplify the process of creating and maintaining documentation for software projects using MkDocs and Read the Docs. + - name: Audience + type: text + content: | + The program is designed for developers, technical writers, and project managers who want to create high-quality documentation... + - name: Functionality + type: text + content: | + The program will automatically generate documentation for a software project using the MkDocs framework... + - name: Features + type: list + content: + - Automated creation of documentation using MkDocs + - Integration with Read the Docs for hosting and management of documentation sites + - Customizable templates for documentation sites + - Support for multiple documentation versions and languages + - Integration with source control systems such as Git + - name: Technology Stack + type: text + content: | + The program will be developed using Python and will use the MkDocs and Read the Docs APIs to generate and host documentation sites... + - name: Limitations + type: text + content: | + The program may be limited by the capabilities of MkDocs and Read the Docs, and may not be suitable for projects with complex documentation requirements... + - name: Maintenance and Support + type: text + content: | + The program will be maintained and supported by the development team, who will provide regular updates and bug fixes. Documentation and user support will also be provided... + - name: Expected Outcomes + type: text + content: | + The program is expected to simplify the process of creating and maintaining documentation, reduce the amount of time and effort required to create documentation sites, and improve the overall quality of documentation for software projects... + - name: Min_requirements + headings: + - name: Operating System + type: text + content: | + The pipeline is designed to run on Linux operating systems, such as Ubuntu and CentOS... + - name: Number of CPUs + type: text + content: | + The pipeline requires at least 4 CPUs to run efficiently... + - name: Memory + type: text + content: | + The pipeline requires at least 16 GB of memory to run efficiently... + - name: Disk Space + type: text + content: | + The pipeline requires at least 100 GB of disk space to run efficiently... + - name: Singularity + type: text + content: | + The pipeline requires Singularity version 3.0 or higher to run efficiently... + - name: Python + type: text + content: | + The pipeline requires Python version 3.6 or higher to run efficiently... + - name: Usage + headings: + - name: How to run the pipeline + type: text + content: | + To generate online Markdown(MD) file in your project folder, clone the repo and execute the python script src/main.py. + + To learn how to parse parameters for the script, run, + + `python3 src/main.py -h` + + To create an example script using the provided template files in the repo location, run, + + `python3 src/main.py -e` + - name: Pipeline Components + headings: + - name: Pipeline Components Description + type: text + content: | + To automate the documentation for a pipeline, the following pipeline components are included. + - name: Data Retrieval + type: text + content: | + This step involves retrieving data related to the pipeline, such as the code, input data, and output data. + - name: Parsing + type: text + content: | + This step involves parsing the code and the input and output data to extract relevant information, such as the pipeline components, their parameters, and their inputs and outputs. + - name: Template Generation + type: text + content: | + This step involves generating a template for the documentation based on the parsed information. The template should include sections for the pipeline components, their descriptions, their parameters, and their inputs and outputs. + - name: Documentation Generation + type: text + content: | + This step involves generating the actual documentation by populating the parsed information in respective markdown files. The documentation should be generated in a format that is easily readable and accessible. + - name: Version Control + type: text + content: | + This step involves using version control software, such as Git, to track changes to the documentation over time, and to maintain a history of the documentation. + - name: Workflow Management + type: text + content: | + This step involves using workflow management software, such as Snakemake or Nextflow, to automate the pipeline components and manage dependencies between the components. + - name: Maintenance and Support + type: text + content: | + This step involves maintaining and supporting the documentation over time, including regular updates and bug fixes. + +# This section consists of all the profiles for this pipeline +profiles: + - name: Myeloid + headings: + - name: Description + type: text + content: | + The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV file that includes metadata... + - name: Input CSV + type: file + content: 'static/profiles/myeloid/input.csv' + - name: Column Descriptions + type: dictionary + content: + sample_id: Text representing the name or id of the sample being analyzed + type: Type of the sample, e.g., tumor or normal + assay: Assay of the sample, e.g., tumorWGS, myeloid, solidtumor, etc. + platform: Name of the platform used for sequencing, e.g., illumina + read1: Full path to the read 1 fastq file + read2: Full path to the read 2 fastq file + - name: Output Files + type: dictionary + content: + BAM: A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome + VCF: A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. + - name: Software Versions + type: versions + content: 'static/profiles/myeloid/versions.yml' + - name: Solid + headings: + - name: Description + type: text + content: | + The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV file that includes metadata... + - name: Input CSV + type: file + content: 'static/profiles/solid/input.csv' + - name: Column Descriptions + type: dictionary + content: + sample_id: Text representing the name or id of the sample being analyzed + type: Type of the sample, e.g., tumor or normal + assay: Assay of the sample, e.g., tumorWGS, myeloid, solidtumor, etc. + platform: Name of the platform used for sequencing, e.g., illumina + read1: Full path to the read 1 fastq file + read2: Full path to the read 2 fastq file + - name: Output Files + type: dictionary + content: + BAM: A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome + VCF: A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. + HTML_Report: An HTML documentation report is a text-based file format used to present information in a web browser, including text, images, and hyperlinks, typically used for displaying project documentation and results + Markdown_Files: A Markdown file is a lightweight markup language used to format and structure plain text documents, often used for creating documentation, README files, and notes + - name: Software Versions + type: versions + content: 'static/profiles/solid/versions.yml' +workflows: + - name: Workflow Diagram + headings: + - name: Workflow Flowchart + type: image + content: https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/template_workflow.png diff --git a/static/templates/template.tool.versions.yaml b/static/templates/template.tool.versions.yaml new file mode 100644 index 0000000..804f88a --- /dev/null +++ b/static/templates/template.tool.versions.yaml @@ -0,0 +1,3 @@ +bwa_umi: + sentieon: v202010.01 + bwa: v202010.01 diff --git a/static/templates/template_input.csv b/static/templates/template_input.csv new file mode 100644 index 0000000..8e249c7 --- /dev/null +++ b/static/templates/template_input.csv @@ -0,0 +1,2 @@ +sample_id,type,assay,platform,read1,read2 +example,template,documentation,mkdocs,example.1.fastq.gz,example.2.fastq.gz \ No newline at end of file diff --git a/templates/github_pages.template.yaml b/templates/github_pages.template.yaml deleted file mode 100644 index fb8cd1c..0000000 --- a/templates/github_pages.template.yaml +++ /dev/null @@ -1,2 +0,0 @@ -theme: jekyll-theme-cayman -show_downloads: true \ No newline at end of file diff --git a/templates/list_of_softwares.tsv b/templates/list_of_softwares.tsv deleted file mode 100644 index 4ad2572..0000000 --- a/templates/list_of_softwares.tsv +++ /dev/null @@ -1,20 +0,0 @@ -Tool/Software Version Build Type Availability Container Name URL External Contact Person If Any Short Description Reference -BWA v0.7.17-r1188 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/lh3/bwa https://github.com/lh3/bwa/issues Read alignment and mapping https://doi.org/10.48550/arXiv.1303.3997 -CNVkit v0.9.5 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://cnvkit.readthedocs.io/en/stable/ https://github.com/etal/cnvkit/issues Genome-wide copy number from high-throughput sequencing https://doi.org/10.1371/journal.pcbi.1004873 -CNVkit v0.9.9 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://cnvkit.readthedocs.io/en/stable/ https://github.com/etal/cnvkit/issues Genome-wide copy number from high-throughput sequencing https://doi.org/10.1371/journal.pcbi.1004873 -delly v0.8.7 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/dellytools/delly https://github.com/dellytools/delly/issues Structural variant discovery by integrated paired-end and split-read analysis https://doi.org/10.1093/bioinformatics/bts378 -freebayes v1.3.5 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/freebayes/freebayes https://github.com/freebayes/freebayes/issues Bayesian haplotype-based polymorphism discovery. https://doi.org/10.48550/arXiv.1207.390 -GATK v4.1.9.0 HG19/HG38 Open-Source Container/Image gatk_4.1.9.0.sif https://gatk.broadinstitute.org/hc/en-us https://github.com/broadinstitute/gatk/issues Variant Discovery in High-Throughput Sequencing Data http://dx.doi.org/10.1101/gr.107524.110 -Manta v1.6.0 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/Illumina/manta https://github.com/Illumina/manta/issues Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.  https://doi.org/10.1093/bioinformatics/btv710 -MELT v2.1.5 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://melt.igs.umaryland.edu/index.php - The Mobile Element Locator Tool (MELT): Perform transposon analysis https://doi.org/10.1101%2Fgr.218032.116 -pindel v0.2.5b9, 20160729 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/genome/pindel https://github.com/genome/pindel/issues (kaiye@xjtu.edu.cn) Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads. https://doi.org/10.1093%2Fbioinformatics%2Fbtp394 -Sambamba v0.8.0 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/biod/sambamba https://github.com/biod/sambamba/issues Tools for working with SAM/BAM data https://doi.org/10.1093/bioinformatics/btv098 -Samtools v1.12 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/samtools/samtools https://github.com/samtools/samtools/issues Tools (written in C using htslib) for manipulating next-generation sequencing data https://doi.org/10.1093/bioinformatics/btp352 -Sentieon v202010.01 HG19/HG38 Commercial Container/Image SomaticPanelPipeline_2021-06-24.sif https://support.sentieon.com/manual/ - Sentieon® develops and supplies a suite of bioinformatics secondary analysis tools that process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. https://doi.org/10.1101/115717 -svdb v2.2.0 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/J35P312/SVDB https://github.com/J35P312/SVDB/issues SVDB is a toolkit for constructing and querying structural variant databases. The databases are constructed using the output vcf files from structural variant callers such as TIDDIT, Manta, Fermikit or Delly. SVDB may also be used to merge SV vcf files from multiple callers or individuals. - -Vardict HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/AstraZeneca-NGS/VarDict https://github.com/AstraZeneca-NGS/VarDict/issues VarDict is a variant calling program for SNV, MNV, indels (<50 bp), and complex variants. https://doi.org/10.1093/nar/gkw227 -VEP v95.3 HG19/GRCh37 Open-Source Container/Image container_VEP.sif https://github.com/Ensembl/ensembl-vep https://github.com/Ensembl/ensembl-vep/issues The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants https://doi.org/10.1186/s13059-016-0974-4 -VCFtools v0.1.16 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://github.com/vcftools/vcftools https://github.com/vcftools/vcftools/issues A set of tools written in Perl and C++ for working with VCF files, such as those generated by the 1000 Genomes Project. http://dx.doi.org/10.1093/bioinformatics/btr330 -Vt v0.5 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://genome.sph.umich.edu/wiki/Vt Adiran (atks@umich.edu) vt is a variant tool set that discovers short variants from Next Generation Sequencing data. https://doi.org/10.1093/bioinformatics/btv112 -R v4.1.0 HG19/HG38 Open-Source Container/Image SomaticPanelPipeline_2021-06-24.sif https://www.r-project.org/ https://www.r-project.org/ R is a free software environment for statistical computing and graphics. - -VEP v103.1 HG38/GrCh38 Open-Source Container/Image ensembl-vep_release_103.sif https://github.com/Ensembl/ensembl-vep https://github.com/Ensembl/ensembl-vep/issues The Ensembl Variant Effect Predictor predicts the functional effects of genomic variants https://doi.org/10.1186/s13059-016-0974-4 diff --git a/templates/pipeline.yaml b/templates/pipeline.yaml deleted file mode 100644 index 463cee7..0000000 --- a/templates/pipeline.yaml +++ /dev/null @@ -1,121 +0,0 @@ -#Basic Details of the pipeline -pipeline: - logo: /data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation/templates/rs-logo-color.svg - info: - name: Pipeline Documentation - version: v1.1.0 - author: Ram Sai Nanduri - author_email: Ram.Nanduri@skane.se - git_repo: https://github.com/ramsainanduri/pipeline_documentation - server_location: /data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation - introduction: | - This automatic documentation generation is a time-saving tool for developers and teams, as it eliminates the need to manually create and maintain documentation. It also helps ensure that documentation is up-to-date and consistent, as changes made to the pipeline.yaml file used for the document updation in a simple and easy way. - - The pipeline.yaml file contains all the relevant information about the pipeline or tool, including the description, inputs, outputs, parameters, and usage. This information is used to generate the HTML and MD documents, which provide clear and detailed information about the pipeline or tool. - - The HTML document is visually appealing and easy to navigate, with links to different sections and a search bar for quickly finding specific information. The MD document is plain text, but can be formatted with Markdown syntax for a more readable and structured format. The MD document can be uploaded to a readthedocs server for online documentation. It uses the mkdocs format, with the required "docs" folder and related files in the project root folder. - - Additionally, the generated documentation also includes a table of contents for easy navigation, and sections for examples. - - Overall, this repo helps improve the documentation process for pipelines and tools, making it easier for others to understand and use them. - scope: #Can have few or all the sections, better to have all for all the pipelines to maintain uniformity - purpose: | - The program aims to simplify the process of creating and maintaining documentation for software projects using MkDocs and Read the Docs. - audience: | - The program is designed for developers, technical writers, and project managers who want to create high-quality documentation with minimal effort. - functionality: | - The program will automatically generate documentation for a software project using the MkDocs framework, which allows for the creation of user-friendly, responsive documentation sites. The documentation site will be hosted on Read the Docs, which provides a robust platform for hosting and managing documentation. - features: | - The program will include the following features: - - Automated creation of documentation using MkDocs - - Integration with Read the Docs for hosting and management of documentation sites - - Customizable templates for documentation sites - - Support for multiple documentation versions and languages - - Integration with source control systems such as Git - technology_stack: | - The program will be developed using Python and will use the MkDocs and Read the Docs APIs to generate and host documentation sites. - limitations: | - The program may be limited by the capabilities of MkDocs and Read the Docs, and may not be suitable for projects with complex documentation requirements. - maintenance_and_support: | - The program will be maintained and supported by the development team, who will provide regular updates and bug fixes. Documentation and user support will also be provided. - expected_outcomes: | - The program is expected to simplify the process of creating and maintaining documentation, reduce the amount of time and effort required to create documentation sites, and improve the overall quality of documentation for software projects. - input_data: #mandatory keys - input_desc: | - The input data for the pipeline consists of fastq files. However, for the pipeline to consume the data, it needs to be provided in the form of a CSV - file that includes metadata. Below is an example of the CSV file format that is expected, along with a detailed description of each column. - input_csv: /data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation/templates/template_input.csv - column_descriptions: #feel free to change the keys below depending the columns in your pipeline csv input file - sample_id: | - Text representating the name or id of the sample being analysed - type: | - Type of the sample, eg. tumor or normal - assay: | - Assay of the sample, eg. tumorWGS, myeloid, solidtumor etc - platform: | - Name of the paltform used for sequencing, eg. illumina - read1: | - Full path to the read 1 fastq file - read2: | - Full path to the read 2 fastq file - output_data: - output_desc: | - This pipeline spits out various files from different process, the important once are given below with a brief descripton - output_files: #feel free to change the keys below depending the output files for your pipeline - BAM: | - A BAM file is a compressed binary file format used to store and index high-throughput sequencing data, such as DNA sequence reads aligned to a reference genome - VCF: | - A VCF file is a text file format used to store and annotate genetic variation data, such as single nucleotide polymorphisms (SNPs) and small insertions/deletions (indels), identified from sequencing data. - HTML_Report: | - An HTML documentation report is a text-based file format used to present information in a web browser, including text, images, and hyperlinks, typically used for displaying project documentation and results - Markdown_Files: | - A Markdown file is a lightweight markup language used to format and structure plain text documents, often used for creating documentation, README files, and notes - min_requirements: # Can be multiple requirements but only text values are accepted - configs: | - pipeline.yaml, list_of_softwares.tsv - os: "POSIX based" - cpus: 1 - singularity: ">= 3.8.0" - usage: | - To generate offline HTML documentation and an online Markdown(MD) file in your project folder, clone the repo and execute the bash script bin/runMe.sh. - - To learn how to parse parameters for the script, run, - - `bin/runMe.sh -h` - - To create an example script using the provided template files in the repo location, run, - - `bin/runMe.sh -e` - pipeline_components: #feel free to change the keys below depending the analysis steps for your pipeline - pipeline_components_desc: | #Mandatory - To automate the documentation for a pipeline, the following pipeline components are included, #Can have specific keys based on the pipeline, free free to modify, add, delete keys in this section depending on the pipeline - data_retrieval: | - This step involves retrieving data related to the pipeline, such as the code, input data, and output data. - parsing: | - This step involves parsing the code and the input and output data to extract relevant information, such as the pipeline components, their parameters, and their inputs and outputs. - template_generation: | - This step involves generating a template for the documentation based on the parsed information. The template should include sections for the pipeline components, their descriptions, their parameters, and their inputs and outputs. - documentation_generation: | - This step involves generating the actual documentation by populating the template with the parsed information. The documentation should be generated in a format that is easily readable and accessible, such as HTML or PDF. - version_control: | - This step involves using version control software, such as Git, to track changes to the documentation over time, and to maintain a history of the documentation. - workflow_management: | - This step involves using workflow management software, such as Snakemake or Nextflow, to automate the pipeline components and manage dependencies between the components. - maintenance_and_support: | - This step involves maintaining and supporting the documentation over time, including regular updates and bug fixes. - software_stack: /data/bnf/dev/ram/Pipelines/validation_reports/pipeline_documentation/templates/template_tool_versions.yaml - profiles: #Can be multiple profiles with different names - profile1: - profile_name: 'Profile 1' - profile_description: 'Profile 1 is used for Solid Panel' - profile_usage: 'add --profile "solid" to the nextflow command' - profile_validatation_data_path: '/full/path/to/profile1/validation/data' - profile_test_data_path: '/full/path/to/profile1/test/data' - profile2: - profile_name: 'Profile 2' - profile_description: 'Profile 1 is used for AML Panel' - profile_usage: 'add --profile "myeloid" to the nextflow command' - profile_validatation_data_path: '/full/path/to/profile2/validation/data' - profile_test_data_path: '/full/path/to/profile2/test/data' - workflow: 'https://raw.githubusercontent.com/ramsainanduri/pipeline_documentation/dev/templates/template_workflow.png' - diff --git a/templates/requirements.txt b/templates/requirements.txt deleted file mode 100644 index 8f0f216..0000000 --- a/templates/requirements.txt +++ /dev/null @@ -1,66 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: -# -# pip-compile docs/requirements.in -# -click==8.1.3 - # via mkdocs -ghp-import==2.1.0 - # via mkdocs -griffe==0.22.0 - # via mkdocstrings-python -importlib-metadata==4.12.0 - # via mkdocs -jinja2==3.1.2 - # via - # mkdocs - # mkdocstrings -markdown==3.3.7 - # via - # markdown-include - # mkdocs - # mkdocs-autorefs - # mkdocstrings - # pymdown-extensions -markdown-include==0.6.0 - # via -r docs/requirements.in -markupsafe==2.1.1 - # via - # jinja2 - # mkdocstrings -mergedeep==1.3.4 - # via mkdocs -mkdocs==1.3.0 - # via - # -r docs/requirements.in - # mkdocs-autorefs - # mkdocstrings -mkdocs-autorefs==0.4.1 - # via mkdocstrings -mkdocstrings[python]==0.19.0 - # via - # -r docs/requirements.in - # mkdocstrings-python -mkdocstrings-python==0.7.1 - # via mkdocstrings -packaging==21.3 - # via mkdocs -pymdown-extensions==9.5 - # via mkdocstrings -pyparsing==3.0.9 - # via packaging -python-dateutil==2.8.2 - # via ghp-import -pyyaml==6.0 - # via - # mkdocs - # pyyaml-env-tag -pyyaml-env-tag==0.1 - # via mkdocs -six==1.16.0 - # via python-dateutil -watchdog==2.1.9 - # via mkdocs -zipp==3.8.0 - # via importlib-metadata \ No newline at end of file diff --git a/templates/template_tool_versions.yaml b/templates/template_tool_versions.yaml deleted file mode 100644 index 4f309c9..0000000 --- a/templates/template_tool_versions.yaml +++ /dev/null @@ -1,63 +0,0 @@ -bwa_umi: - Sentieon BWA: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif - Sentieon UMI: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif - Sentieon UTIL: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif - SAMtools: - version: 1.9 - container: SomaticPanelPipeline_2021-06-24.sif -bwa_align: - Sentieon BWA-MEM: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif - BWA-MEM: - version: v0.7.17-r1188 - container: SomaticPanelPipeline_2021-06-24.sif -markdup: - Sentieon DRIVER: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif -lowcov: - Sambamba: - version: v0.8.0 - container: SomaticPanelPipeline_2021-06-24.sif -freebayes: - freebayes: - version: 1.6.0 - container: SomaticPanelPipeline_2021-06-24.sif -vardict: - VarDict: - version: '' - container: SomaticPanelPipeline_2021-06-24.sif -tnscope: - Sentieon DRIVER: - version: v202010.01 - container: SomaticPanelPipeline_2021-06-24.sif -pindel: - Pindel: - version: v0.2.5b9, 20160729 - container: SomaticPanelPipeline_2021-06-24.sif -concatenate_vcfs: - VCFtools: - version: v0.1.16 - container: SomaticPanelPipeline_2021-06-24.sif - Vt: - version: v0.5 - container: SomaticPanelPipeline_2021-06-24.sif -cnvkit: - CNVkit: - version: v0.9.9 - container: SomaticPanelPipeline_2021-06-24.sif -gene_plot: - CNVkit: - version: v0.9.5 - container: SomaticPanelPipeline_2021-06-24.sif -annotate_vep: - VEP: - version: v103.1 - container: ensembl-vep_release_103.sif \ No newline at end of file