diff --git a/ViroConstrictor/ViroConstrictor.py b/ViroConstrictor/ViroConstrictor.py index 9677a9e..a5d08da 100644 --- a/ViroConstrictor/ViroConstrictor.py +++ b/ViroConstrictor/ViroConstrictor.py @@ -1,10 +1,13 @@ -# pylint: disable=C0103 +""" +Starting point of the ViroConstrictor pipeline and wrapper +Copyright © 2021 RIVM -""" -#placeholder block +https://github.com/RIVM-bioinformatics/ViroConstrictor """ +# pylint: disable=C0103 + import argparse import multiprocessing import os @@ -17,6 +20,7 @@ from .functions import MyHelpFormatter, color from .runconfigs import WriteConfigs from .samplesheet import WriteSampleSheet +from .update import update from .userprofile import ReadConfig from .validatefasta import IsValidFasta from .version import __version__ @@ -192,6 +196,12 @@ def main(): --> Change working directories and make necessary local files for snakemake --> Run snakemake with appropriate settings """ + + ##> Check the default userprofile, make it if it doesn't exist + conf = ReadConfig(os.path.expanduser("~/.ViroConstrictor_defaultprofile.ini")) + + update(sys.argv, conf) + flags = get_args(sys.argv[1:]) inpath = os.path.abspath(flags.input) @@ -213,9 +223,6 @@ def main(): Snakefile = os.path.join(here, "workflow", "workflow.smk") - ##> Check the default userprofile, make it if it doesn't exist - conf = ReadConfig(os.path.expanduser("~/.ViroConstrictor_defaultprofile.ini")) - ##@ check if the input directory contains valid files if CheckInputFiles(inpath) is False: print( diff --git a/ViroConstrictor/runconfigs.py b/ViroConstrictor/runconfigs.py index e9309ab..5d7e056 100644 --- a/ViroConstrictor/runconfigs.py +++ b/ViroConstrictor/runconfigs.py @@ -9,6 +9,11 @@ import yaml +def get_max_local_mem(): + avl_mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") + return int(round(avl_mem_bytes / (1024.0 ** 2) - 2000, -3)) + + def SnakemakeConfig(conf, cores, dryrun): cores = cores - 2 compmode = conf["COMPUTING"]["compmode"] @@ -25,13 +30,14 @@ def SnakemakeConfig(conf, cores, dryrun): if compmode == "grid": queuename = conf["COMPUTING"]["queuename"] threads = "{threads}" + mem = "{resources.mem_mb}" config = { "cores": 300, "latency-wait": 60, "use-conda": True, "dryrun": dryrun, "jobname": "ViroConstrictor_{name}.jobid{jobid}", - "drmaa": f' -q {queuename} -n {threads} -R "span[hosts=1]"', + "drmaa": f' -q {queuename} -n {threads} -R "span[hosts=1]" -M {mem}', "drmaa-log-dir": "logs/drmaa", } @@ -50,6 +56,8 @@ def SnakemakeParams(conf, cores, ref, prim, feats, platform, samplesheet, amplic params = { "sample_sheet": samplesheet, + "computing_execution": conf["COMPUTING"]["compmode"], + "max_local_mem": get_max_local_mem(), "reference_file": ref, "primer_file": prim, "features_file": feats, @@ -60,7 +68,7 @@ def SnakemakeParams(conf, cores, ref, prim, feats, platform, samplesheet, amplic "QC": threads_midcpu, "AdapterRemoval": threads_lowcpu, "PrimerRemoval": threads_highcpu, - "Consensus": threads_lowcpu, + "Consensus": threads_midcpu, "Index": threads_lowcpu, "Typing": threads_lowcpu, }, diff --git a/ViroConstrictor/update.py b/ViroConstrictor/update.py new file mode 100644 index 0000000..77045b8 --- /dev/null +++ b/ViroConstrictor/update.py @@ -0,0 +1,123 @@ +import json +import subprocess +import sys +from distutils.version import LooseVersion +from urllib import request + +from .functions import color +from .userprofile import AskPrompts +from .version import __version__ + + +def update(args, conf): + + autocontinue = False + ask_prompt = False + + if conf["GENERAL"]["auto_update"] == "yes": + autocontinue = True + + if autocontinue is False: + if conf["GENERAL"]["ask_for_update"] == "yes": + ask_prompt = True + + if autocontinue is True: + try: + latest_release = request.urlopen( + "https://api.github.com/repos/RIVM-bioinformatics/Viroconstrictor/releases" + ) + except Exception as e: + sys.stderr.write("Unable to connect to GitHub API\n" f"{e}") + return + + latest_release = json.loads(latest_release.read().decode("utf-8"))[0] + + latest_release_tag = latest_release["tag_name"] + latest_release_tag_tidied = LooseVersion( + latest_release["tag_name"].lstrip("v").strip() + ) + + localversion = LooseVersion(__version__) + + if localversion < latest_release_tag_tidied: + subprocess.run( + [ + sys.executable, + "-m", + "pip", + "install", + "--upgrade", + f"git+https://github.com/RIVM-bioinformatics/ViroConstrictor@{latest_release_tag}", + ], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + print( + f"ViroConstrictor updated to {color.YELLOW + color.BOLD}{latest_release_tag}{color.END}" + ) + + subprocess.run(args) + sys.exit(0) + return + + if autocontinue is False and ask_prompt is False: + return + + if autocontinue is False and ask_prompt is True: + try: + latest_release = request.urlopen( + "https://api.github.com/repos/RIVM-bioinformatics/Viroconstrictor/releases" + ) + except Exception as e: + sys.stderr.write("Unable to connect to GitHub API\n" f"{e}") + return + + latest_release = json.loads(latest_release.read().decode("utf-8"))[0] + + latest_release_tag = latest_release["tag_name"] + latest_release_tag_tidied = LooseVersion( + latest_release["tag_name"].lstrip("v").strip() + ) + + localversion = LooseVersion(__version__) + + if localversion < latest_release_tag_tidied: + if ( + AskPrompts( + f""" +There's a new version of ViroConstrictor available. + +Current version: {color.RED + color.BOLD}{'v' + __version__}{color.END} +Latest version: {color.GREEN + color.BOLD}{latest_release_tag}{color.END}\n""", + f"""Do you want to update? [yes/no] """, + ["yes", "no"], + fixedchoices=True, + ) + == "yes" + ): + subprocess.run( + [ + sys.executable, + "-m", + "pip", + "install", + "--upgrade", + f"git+https://github.com/RIVM-bioinformatics/ViroConstrictor@{latest_release_tag}", + ], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + print( + f"ViroConstrictor updated to {color.YELLOW + color.BOLD}{latest_release_tag}{color.END}" + ) + + subprocess.run(args) + sys.exit(0) + print(f"Skipping update to version {latest_release_tag}") + print(f"Continuing...") + return + return diff --git a/ViroConstrictor/userprofile.py b/ViroConstrictor/userprofile.py index 4f9dcbf..d829997 100644 --- a/ViroConstrictor/userprofile.py +++ b/ViroConstrictor/userprofile.py @@ -77,16 +77,65 @@ def BuildConfig(file): if conf_object["COMPUTING"]["compmode"] == "grid": conf_object["COMPUTING"]["queuename"] = AskPrompts( - f"""Grid mode has been chosen. Please enter the name of computing-queue that you wish to use on your grid/HPC cluster.\nThis is necessary so ViroConstrictor will send all the various tasks to the correct (remote) computers.\n\n{color.BOLD + color.UNDERLINE + color.YELLOW}Please note that this is case-sensitive{color.END}\n""", + f""" +Grid mode has been chosen. Please enter the name of computing-queue that you wish to use on your grid/HPC cluster.\nThis is necessary so ViroConstrictor will send all the various tasks to the correct (remote) computers.\n\n{color.BOLD + color.UNDERLINE + color.YELLOW}Please note that this is case-sensitive{color.END}\n""", "Please specify the name of the Queue on your grid/HPC cluster that you wish to use. [free text] ", [], fixedchoices=False, ) + conf_object["GENERAL"] = { + "auto_update": AskPrompts( + f""" +ViroConstrictor can check and update itself everytime you run it. +Please specify whether you wish to enable the auto-update feature. + """, + f"""Do you wish to enable the auto-update feature? [yes/no] """, + ["yes", "no"], + fixedchoices=True, + ) + } + + if conf_object["GENERAL"]["auto_update"] == "no": + conf_object["GENERAL"]["ask_for_update"] = AskPrompts( + f""" +ViroConstrictor will not automatically update itself, but ViroConstrictor can still check for updates and ask you if you wish to update. + """, + f"""Do you want ViroConstrictor to {color.YELLOW}ask you{color.END} to update everytime a new update is available? [yes/no] """, + ["yes", "no"], + fixedchoices=True, + ) + with open(file, "w") as conffile: conf_object.write(conffile) +def AllOptionsGiven(config): + all_present = True + + if config.has_section("COMPUTING") is True: + if config.has_option("COMPUTING", "compmode") is True: + if config["COMPUTING"]["compmode"] == "grid": + if config.has_option("COMPUTING", "queuename") is False: + all_present = False + else: + all_present = False + else: + all_present = False + + if config.has_section("GENERAL") is True: + if config.has_option("GENERAL", "auto_update") is True: + if config["GENERAL"]["auto_update"] == "no": + if config.has_option("GENERAL", "ask_for_update") is False: + all_present = False + else: + all_present = False + else: + all_present = False + + return all_present + + def ReadConfig(file): if FileExists(file) is False: BuildConfig(file) @@ -95,4 +144,9 @@ def ReadConfig(file): config = configparser.ConfigParser() config.read(file) + + while AllOptionsGiven(config) is False: + BuildConfig(file) + config = configparser.ConfigParser() + config.read(file) return config diff --git a/ViroConstrictor/workflow/workflow.smk b/ViroConstrictor/workflow/workflow.smk index 2c10096..de59e3f 100644 --- a/ViroConstrictor/workflow/workflow.smk +++ b/ViroConstrictor/workflow/workflow.smk @@ -51,6 +51,21 @@ else: f"{res}Amplicon_coverage.csv" +def low_memory_job(wildcards, threads, attempt): + if config['computing_execution'] == 'local': + return min(attempt * threads * 1 * 1000, config['max_local_mem']) + return attempt * threads * 1 * 1000 + +def medium_memory_job(wildcards, threads, attempt): + if config['computing_execution'] == 'local': + return min(attempt * threads * 2 * 1000, config['max_local_mem']) + return attempt * threads * 2 * 1000 + +def high_memory_job(wildcards, threads, attempt): + if config['computing_execution'] == 'local': + return min(attempt * threads * 4 * 1000, config['max_local_mem']) + return attempt * threads * 4 * 1000 + #TODO: Check if this can be done more elegantly than by wrapping everything in an if/else statement if config["primer_file"] != "NONE": rule Prepare_ref_and_primers: @@ -64,6 +79,8 @@ if config["primer_file"] != "NONE": conda: f"{conda_envs}Alignment.yaml" threads: config['threads']['Index'] + resources: + mem_mb = low_memory_job shell: """ cat {input.ref} | seqkit replace -p "\-" -s -r "N" > {output.ref} @@ -82,6 +99,8 @@ else: conda: f"{conda_envs}Alignment.yaml" threads: config['threads']['Index'] + resources: + mem_mb = low_memory_job shell: """ cat {input.ref} | seqkit replace -p "\-" -s -r "N" > {output.ref} @@ -95,6 +114,8 @@ if config["features_file"] != "NONE": output: gff = temp(f"{datadir + features}reference_features.gff") threads: config['threads']['Index'] + resources: + mem_mb = low_memory_job shell: """ cat {input} >> {output} @@ -109,6 +130,8 @@ else: conda: f"{conda_envs}ORF_analysis.yaml" threads: config['threads']['Index'] + resources: + mem_mb = medium_memory_job log: f"{logdir}ORF_Analysis.log" params: @@ -139,6 +162,8 @@ if config["platform"] == "illumina": benchmark: f"{logdir + bench}" + "QC_raw_data_{sample}_{read}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: output_dir = f"{datadir + qc_pre}", script = srcdir("scripts/fastqc_wrapper.sh") @@ -163,6 +188,8 @@ if config["platform"] == "illumina": benchmark: f"{logdir + bench}"+ "RemoveAdapters_p1_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -180,6 +207,8 @@ if config["platform"] == "illumina": conda: f"{conda_envs}Clean.yaml" threads: config['threads']['AdapterRemoval'] + resources: + mem_mb = low_memory_job params: script = srcdir('scripts/clipper.py') shell: @@ -200,6 +229,8 @@ if config["platform"] == "illumina": benchmark: f"{logdir + bench}" + "Cleanup_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: score = config['runparams']['qc_filter_illumina'], size = config['runparams']['qc_window_illumina'], @@ -224,6 +255,8 @@ if config["platform"] == "nanopore": benchmark: f"{logdir + bench}" + "QC_raw_data_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: output_dir = f"{datadir + qc_pre}", script = srcdir("scripts/fastqc_wrapper.sh") @@ -246,6 +279,8 @@ if config["platform"] == "nanopore": benchmark: f"{logdir + bench}"+ "RemoveAdapters_p1_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -263,6 +298,8 @@ if config["platform"] == "nanopore": conda: f"{conda_envs}Clean.yaml" threads: config['threads']['AdapterRemoval'] + resources: + mem_mb = low_memory_job params: script = srcdir('scripts/clipper.py') shell: @@ -283,6 +320,8 @@ if config["platform"] == "nanopore": benchmark: f"{logdir + bench}" + "Cleanup_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: score = config['runparams']['qc_filter_nanopore'], size = config['runparams']['qc_window_nanopore'], @@ -307,6 +346,8 @@ if config["platform"] == "iontorrent": benchmark: f"{logdir + bench}" + "QC_raw_data_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: output_dir = f"{datadir + qc_pre}", script = srcdir("scripts/fastqc_wrapper.sh") @@ -329,6 +370,8 @@ if config["platform"] == "iontorrent": benchmark: f"{logdir + bench}"+ "RemoveAdapters_p1_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -346,6 +389,8 @@ if config["platform"] == "iontorrent": conda: f"{conda_envs}Clean.yaml" threads: config['threads']['AdapterRemoval'] + resources: + mem_mb = low_memory_job params: script = srcdir('scripts/clipper.py') shell: @@ -366,6 +411,8 @@ if config["platform"] == "iontorrent": benchmark: f"{logdir + bench}" + "Cleanup_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: score = config['runparams']['qc_filter_iontorrent'], size = config['runparams']['qc_window_iontorrent'], @@ -393,6 +440,8 @@ if config["primer_file"] != "NONE": benchmark: f"{logdir + bench}" + "RemovePrimers_{sample}.txt" threads: config['threads']['PrimerRemoval'] + resources: + mem_mb = high_memory_job params: amplicontype = config["amplicon_type"] shell: @@ -408,6 +457,8 @@ if config["primer_file"] != "NONE": if config["primer_file"] == "NONE": rule RemovePrimers: input: rules.QC_filter.output.fq + resources: + mem_mb = 1 output: fq = f"{datadir + cln + prdir}" + "{sample}.fastq" shell: @@ -427,6 +478,8 @@ rule QC_clean: benchmark: f"{logdir + bench}" + "QC_clean_data_{sample}.txt" threads: config['threads']['QC'] + resources: + mem_mb = low_memory_job params: outdir = f"{datadir + qc_post}" shell: @@ -456,6 +509,8 @@ if config["platform"] == "illumina": benchmark: f"{logdir + bench}" + "Alignment_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -482,6 +537,8 @@ if config["platform"] == "nanopore": benchmark: f"{logdir + bench}" + "Alignment_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -508,6 +565,8 @@ if config["platform"] == "iontorrent": benchmark: f"{logdir + bench}" + "Alignment_{sample}.txt" threads: config['threads']['Alignments'] + resources: + mem_mb = medium_memory_job params: mapthreads = config['threads']['Alignments'] - 1, filters = config["runparams"]["alignmentfilters"] @@ -553,6 +612,8 @@ rule Consensus: benchmark: f"{logdir + bench}" + "Consensus_{sample}.txt" threads: config['threads']['Consensus'] + resources: + mem_mb = medium_memory_job shell: """ TrueConsense --input {input.bam} \ @@ -589,6 +650,9 @@ rule Concat_Seqs: cov10 = f"{res + seqs}" + "concat_cov_ge_10.fasta", cov50 = f"{res + seqs}" + "concat_cov_ge_50.fasta", cov100 = f"{res + seqs}" + "concat_cov_ge_100.fasta", + threads: 1 + resources: + mem_mb = low_memory_job shell: """ cat {input.cov1} >> {output.cov1} @@ -613,6 +677,9 @@ rule VCF_to_TSV: cov50 = temp(f"{datadir + aln + vf}" + "{sample}_cov_ge_50.tsv"), cov100 = temp(f"{datadir + aln + vf}" + "{sample}_cov_ge_100.tsv"), conda: f"{conda_envs}Mutations.yaml" + threads: config['threads']['Index'] + resources: + mem_mb = low_memory_job log: f"{logdir}" + "vcf_to_tsv_{sample}.log" shell: @@ -659,6 +726,9 @@ rule Concat_TSV_coverages: cov100 = f"{res + muts}concat_mutations_cov_ge_100.tsv", log: f"{logdir}concat_tsv.log" + threads: 1 + resources: + mem_mb = low_memory_job run: shell("echo -e 'Sample\tReference_Chromosome\tPosition\tReference\tAlternative\tDepth' > {output.cov1} 2> {log}") shell("echo -e 'Sample\tReference_Chromosome\tPosition\tReference\tAlternative\tDepth' > {output.cov5} 2> {log}") @@ -681,6 +751,8 @@ rule Get_Breadth_of_coverage: conda: f"{conda_envs}Consensus.yaml" threads: 1 + resources: + mem_mb = low_memory_job params: script = srcdir("scripts/boc.py") shell: @@ -695,6 +767,8 @@ rule concat_boc: sample = SAMPLES) output: f"{res}Width_of_coverage.tsv" threads: 1 + resources: + mem_mb = low_memory_job shell: """ echo -e "Sample_name\tWidth_at_mincov_1\tWidth_at_mincov_5\tWidth_at_mincov_10\tWidth_at_mincov_50\tWidth_at_mincov_100" > {output} @@ -708,6 +782,8 @@ if config["primer_file"] != "NONE": output: ampcov = f"{datadir + prim}" + "{sample}_ampliconcoverage.csv" threads: 1 + resources: + mem_mb = low_memory_job conda: f"{conda_envs}Clean.yaml" params: @@ -725,6 +801,8 @@ if config["primer_file"] != "NONE": input: expand(f"{datadir + prim}" + "{sample}_ampliconcoverage.csv", sample = SAMPLES) output: f"{res}Amplicon_coverage.csv" threads: 1 + resources: + mem_mb = low_memory_job conda: f"{conda_envs}Clean.yaml" params: @@ -758,6 +836,8 @@ if config['platform'] == "illumina": benchmark: f"{logdir + bench}" + "MultiQC_report.txt" threads: 1 + resources: + mem_mb = medium_memory_job params: conffile = srcdir('files/multiqc_config.yaml'), outdir = f"{res}" @@ -789,6 +869,8 @@ if config['platform'] == "nanopore" or config['platform'] == "iontorrent": benchmark: f"{logdir + bench}" + "MultiQC_report.txt" threads: 1 + resources: + mem_mb = medium_memory_job params: conffile = srcdir('files/multiqc_config.yaml'), outdir = f"{res}"