diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1a1fbf187..de8e31b14 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -17,4 +17,4 @@ for contributing to the repository :** ## Testing -* Please make sure that travis tests are passing +* Please make sure that github actions are passing diff --git a/.github/test_and_build.yml b/.github/test_and_build.yml index e2dcd08be..0d1c77b20 100644 --- a/.github/test_and_build.yml +++ b/.github/test_and_build.yml @@ -2,7 +2,7 @@ channels: - conda-forge - bioconda dependencies: - - python >= 3.7 + - python > 3.7 - numpy - scipy - flake8 diff --git a/.github/workflows/planemo.yml b/.github/workflows/planemo.yml index bc7e4b0bb..067a303cc 100644 --- a/.github/workflows/planemo.yml +++ b/.github/workflows/planemo.yml @@ -49,7 +49,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.11'] + python-version: ['3.8', '3.11'] steps: - uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a4789073..07ffd664d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -63,9 +63,9 @@ jobs: micromamba activate test_and_build rm -f dist/* python -m build - - uses: actions/upload-artifact@master + - uses: actions/upload-artifact@v3 with: - name: "Dist files" + name: "distfiles" path: "dist" test-wheels: name: test wheel @@ -73,12 +73,12 @@ jobs: needs: build-linux strategy: matrix: - python-version: ['3.7','3.8','3.9','3.10', '3.11'] + python-version: ['3.8','3.9','3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: - name: "Dist files" + name: "distfiles" path: ~/dist/ - uses: actions/setup-python@v4 with: diff --git a/.readthedocs.yaml b/.readthedocs.yaml index d87c2e6fc..d95161e3d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,11 +3,13 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" + python: "3.12" sphinx: configuration: docs/conf.py python: install: + - method: pip + path: . - requirements: docs/requirements.txt diff --git a/CHANGES.txt b/CHANGES.txt index 335dbc80b..5f0bf0f0c 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,19 @@ +3.5.5 +* drop support for python 3.7 +* doc fixes (argparse properly displayed, minor changes in installation instructions) +* deepblue support stops +* initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). +* documentation changes to improve ESS tab, table constraints have been lifted & sphinx_rtd_theme to v2.0.0 +* upload artifact in gh test runner pinned to 3 +* Try to get the number of processors from sched_getaffinity, to avoid using to many in job submissions for example. #1199 +* Fix typo in estimateScaleFactor that fixes broken argparsing. #1286 + +3.5.4 +* error handling and cases for bwAverage with >2 samples +* Tick.label deprecation for mpl 3.8 +* minimal mpl version is 3.5 +* cicd update for pypi push + 3.5.3 * requirement cap for matplotlib lifted (changes in plotting can occur) * nose has been deprecated in favor of pytests diff --git a/README.md b/README.md index 84d46be8d..f3f614c21 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/deeptools/README.html) [![European Galaxy server](https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAASCAYAAABB7B6eAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAACXBIWXMAAAsTAAALEwEAmpwYAAACC2lUWHRYTUw6Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNS40LjAiPgogICA8cmRmOlJERiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyI+CiAgICAgICAgIDx0aWZmOlJlc29sdXRpb25Vbml0PjI8L3RpZmY6UmVzb2x1dGlvblVuaXQ+CiAgICAgICAgIDx0aWZmOkNvbXByZXNzaW9uPjE8L3RpZmY6Q29tcHJlc3Npb24+CiAgICAgICAgIDx0aWZmOk9yaWVudGF0aW9uPjE8L3RpZmY6T3JpZW50YXRpb24+CiAgICAgICAgIDx0aWZmOlBob3RvbWV0cmljSW50ZXJwcmV0YXRpb24+MjwvdGlmZjpQaG90b21ldHJpY0ludGVycHJldGF0aW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KD0UqkwAAAn9JREFUOBGlVEuLE0EQruqZiftwDz4QYT1IYM8eFkHFw/4HYX+GB3/B4l/YP+CP8OBNTwpCwFMQXAQPKtnsg5nJZpKdni6/6kzHvAYDFtRUT71f3UwAEbkLch9ogQxcBwRKMfAnM1/CBwgrbxkgPAYqlBOy1jfovlaPsEiWPROZmqmZKKzOYCJb/AbdYLso9/9B6GppBRqCrjSYYaquZq20EUKAzVpjo1FzWRDVrNay6C/HDxT92wXrAVCH3ASqq5VqEtv1WZ13Mdwf8LFyyKECNbgHHAObWhScf4Wnj9CbQpPzWYU3UFoX3qkhlG8AY2BTQt5/EA7qaEPQsgGLWied0A8VKrHAsCC1eJ6EFoUd1v6GoPOaRAtDPViUr/wPzkIFV9AaAZGtYB568VyJfijV+ZBzlVZJ3W7XHB2RESGe4opXIGzRTdjcAupOK09RA6kzr1NTrTj7V1ugM4VgPGWEw+e39CxO6JUw5XhhKihmaDacU2GiR0Ohcc4cZ+Kq3AjlEnEeRSazLs6/9b/kh4eTC+hngE3QQD7Yyclxsrf3cpxsPXn+cFdenF9aqlBXMXaDiEyfyfawBz2RqC/O9WF1ysacOpytlUSoqNrtfbS642+4D4CS9V3xb4u8P/ACI4O810efRu6KsC0QnjHJGaq4IOGUjWTo/YDZDB3xSIxcGyNlWcTucb4T3in/3IaueNrZyX0lGOrWndstOr+w21UlVFokILjJLFhPukbVY8OmwNQ3nZgNJNmKDccusSb4UIe+gtkI+9/bSLJDjqn763f5CQ5TLApmICkqwR0QnUPKZFIUnoozWcQuRbC0Km02knj0tPYx63furGs3x/iPnz83zJDVNtdP3QAAAABJRU5ErkJggg==)](https://usegalaxy.eu/root?tool_id=deeptools_compute_matrix) ![test](https://github.com/deeptools/deepTools/actions/workflows/test.yml/badge.svg) -![planemo](https://github.com/deeptools/deepTools/actions/workflows/planemo.yml/badge.svg) ## User-friendly tools for exploring deep-sequencing data @@ -34,35 +33,26 @@ Our [Gallery](http://deeptools.readthedocs.org/en/latest/content/example_gallery deepTools are available for: -* Command line usage (via pip/anaconda/github) +* Command line usage (via pip / conda / github) * Integration into Galaxy servers (via toolshed/API/web-browser) -There are many easy ways to install deepTools. Details can be found [here](https://deeptools.readthedocs.io/en/latest/content/installation.html) +There are many easy ways to install deepTools. More details can be found [here](https://deeptools.readthedocs.io/en/latest/content/installation.html). -**Install by cloning this repository:** +In Brief: -You can install any one of the deepTools branches on command line (linux/mac) by cloning this git repository : +**Install through pypi** - $ git clone https://github.com/deeptools/deepTools - $ cd deepTools - $ python setup.py install - -By default, the script will install the python library and executable -codes globally, which means you need to be root or administrator of -the machine to complete the installation. If you need to -provide a nonstandard install prefix, or any other nonstandard -options, you can provide many command line options to the install -script. + $ pip install deeptools - $ python setup.py --help +**Install via conda** -For example, to install under a specific location use: + $ conda install -c bioconda deeptools - $ python setup.py install --prefix +**Install by cloning the repository** -To install into your home directory, use: - - $ python setup.py install --user + $ git clone https://github.com/deeptools/deepTools + $ cd deepTools + $ pip install . ### Galaxy Installation diff --git a/deeptools/alignmentSieve.py b/deeptools/alignmentSieve.py index 4f2aa1879..73a247349 100644 --- a/deeptools/alignmentSieve.py +++ b/deeptools/alignmentSieve.py @@ -7,11 +7,8 @@ from deeptools import parserCommon from deeptools.bamHandler import openBam from deeptools.mapReduce import mapReduce -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version from deeptools.utilities import getTLen, smartLabels, getTempFileName +from importlib.metadata import version def parseArguments(): diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index ad63fa14f..913805171 100755 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -18,10 +18,7 @@ # own tools from deeptools.parserCommon import writableFile from deeptools.getFragmentAndReadSize import get_read_and_fragment_length -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version def parse_arguments(): diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py index 7153d98f4..9dd12acde 100644 --- a/deeptools/bigwigAverage.py +++ b/deeptools/bigwigAverage.py @@ -1,13 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments +import argparse import sys -import multiprocessing -import os import numpy as np from deeptools import parserCommon from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +12,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool average multiple bigWig files based on the number ' 'of mapped reads. To average the bigWig files, the genome is ' @@ -59,7 +55,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -119,29 +115,6 @@ def main(args=None): FUNC = average function_args = {'scaleFactors': scaleFactors} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bigwigs): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bigwigs[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(b, getType(b)) for b in args.bigwigs], args.outFileName, 0, FUNC, @@ -154,12 +127,3 @@ def main(args=None): smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bigwigs[v]) - else: - for k, v in deepBlueFiles: - foo = args.bigwigs[v] - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index 4e15c7df8..a4501d45c 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -1,13 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments -import sys -import multiprocessing -import os +import argparse from deeptools import parserCommon from deeptools.getRatio import getRatio from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +11,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool compares two bigWig files based on the number ' 'of mapped reads. To compare the bigWig files, the genome is ' @@ -104,7 +99,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file or a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -136,32 +131,6 @@ def main(args=None): 'scaleFactors': scaleFactors, 'pseudocount': args.pseudocount} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate([args.bigwig1, args.bigwig2]): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - if ftuple[1] == 0: - args.bigwig1 = r - else: - args.bigwig2 = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(args.bigwig1, getType(args.bigwig1)), (args.bigwig2, getType(args.bigwig2))], @@ -176,17 +145,3 @@ def main(args=None): missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False, fixedStep=args.fixedStep) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - if v == 0: - os.remove(args.bigwig1) - else: - os.remove(args.bigwig2) - else: - for k, v in deepBlueFiles: - foo = args.bigwig1 - if v == 1: - foo = args.bigwig2 - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 440358c9b..62a95657c 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -3,18 +3,11 @@ import argparse import sys -import os -import multiprocessing - from deeptools.parserCommon import writableFile, numberOfProcessors -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version from deeptools import parserCommon from deeptools import heatmapper import deeptools.computeMatrixOperations as cmo -import deeptools.deepBlue as db +from importlib.metadata import version def parse_arguments(args=None): @@ -47,17 +40,16 @@ def parse_arguments(args=None): dest='command', metavar='') - dbParser = parserCommon.deepBlueOptionalArgs() - # scale-regions mode options subparsers.add_parser( 'scale-regions', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[computeMatrixRequiredArgs(), - computeMatrixOutputArgs(), - computeMatrixOptArgs(case='scale-regions'), - parserCommon.gtf_options(), - dbParser], + parents=[ + computeMatrixRequiredArgs(), + computeMatrixOutputArgs(), + computeMatrixOptArgs(case='scale-regions'), + parserCommon.gtf_options() + ], help="In the scale-regions mode, all regions in the BED file are " "stretched or shrunken to the length (in bases) indicated by the user.", usage='An example usage is:\n computeMatrix scale-regions -S ' @@ -70,8 +62,8 @@ def parse_arguments(args=None): parents=[computeMatrixRequiredArgs(), computeMatrixOutputArgs(), computeMatrixOptArgs(case='reference-point'), - parserCommon.gtf_options(), - dbParser], + parserCommon.gtf_options() + ], help="Reference-point refers to a position within a BED region " "(e.g., the starting point). In this mode, only those genomic" "positions before (upstream) and/or after (downstream) of the " @@ -402,28 +394,6 @@ def main(args=None): hm = heatmapper.heatmapper() - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.scoreFileName): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - regs = db.makeRegions(args.regionsFileName, args) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.scoreFileName[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - scores_file_list = args.scoreFileName hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args) if args.sortRegions not in ['no', 'keep']: @@ -449,11 +419,3 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.scoreFileName[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.scoreFileName[v])) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index b246b9ce0..6b3272d4b 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -6,10 +6,7 @@ import sys import os import csv -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version def parse_arguments(): diff --git a/deeptools/deepBlue.py b/deeptools/deepBlue.py deleted file mode 100644 index 864393391..000000000 --- a/deeptools/deepBlue.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python -try: - # python 2 - import xmlrpclib -except: - # python 3 - import xmlrpc.client as xmlrpclib -import time -import tempfile -import os.path -import sys -import pyBigWig -from deeptools.utilities import mungeChromosome -from deeptoolsintervals import GTF -import datetime - - -def isDeepBlue(fname): - """ - Returns true if the file ends in .wig, .wiggle, or .bedgraph, since these indicate a file on the deepBlue server - """ - if fname.endswith(".wig"): - return True - if fname.endswith(".wiggle"): - return True - if fname.endswith(".bedgraph"): - return True - if fname.startswith("http") or fname.startswith("ftp"): - return False - # For ENCODE samples, the "Name" is just the ENCODE sample ID, so as a fallback check for files that aren't there. - if not os.path.exists(fname): - return True - return False - - -def mergeRegions(regions): - """ - Given a list of [(chrom, start, end), ...], merge all overlapping regions - - This returns a dict, where values are sorted lists of [start, end]. - """ - bar = sorted(regions) - out = dict() - last = [None, None, None] - for reg in bar: - if reg[0] == last[0] and reg[1] <= last[2]: - if reg[2] > last[2]: - last[2] = reg[2] - continue - else: - if last[0]: - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - last = [reg[0], reg[1], reg[2]] - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - return out - - -def makeTiles(db, args): - """ - Given a deepBlue object, return a list of regions that will be queried - """ - out = [] - for (k, v) in db.chromsTuple: - start = 0 - while start <= v: - end = start + args.binSize - if end > v: - end = v - out.append([k, start, end]) - start += end + args.distanceBetweenBins - return out - - -def makeChromTiles(db): - """ - Make a region for each chromosome - """ - out = [] - for (k, v) in db.chromsTuple: - out.append([k, 0, v]) - return out - - -def makeRegions(BED, args): - """ - Given a list of BED/GTF files, make a list of regions. - These are vaguely extended as appropriate. For simplicity, the maximum of --beforeRegionStartLength - and --afterRegionStartLength are tacked on to each end and transcripts are used for GTF files. - """ - itree = GTF(BED, transcriptID=args.transcriptID, transcript_id_designator=args.transcript_id_designator) - o = [] - extend = 0 - # The before/after stuff is specific to computeMatrix - if "beforeRegionStartLength" in args: - extend = max(args.beforeRegionStartLength, args.afterRegionStartLength) - for chrom in itree.chroms: - regs = itree.findOverlaps(chrom, 0, 4294967295) # bigWig files use 32 bit coordinates - for reg in regs: - o.append([chrom, max(0, reg[0] - extend), reg[1] + extend]) - del itree - return o - - -def preloadWrapper(foo): - """ - This is a wrapper around the preload function for multiprocessing - """ - args = foo[2] - regs = foo[3] - res = deepBlue(foo[0], url=args.deepBlueURL, userKey=args.userKey) - return res.preload(regs, tmpDir=args.deepBlueTempDir) - - -class deepBlue(object): - def __init__(self, sample, url="http://deepblue.mpi-inf.mpg.de/xmlrpc", userKey="anonymous_key"): - """ - Connect to the requested deepblue server with the given user key and request the specifed sample from it. - - >>> sample = "S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph" - >>> db = deepBlue(sample) # doctest: +SKIP - >>> assert db.chroms("chr1") == 248956422 # doctest: +SKIP - """ - self.sample = sample - self.url = url - self.userKey = userKey - self.server = xmlrpclib.Server(url, allow_none=True) - self.info = None - self.experimentID = None - self.genome = None - self.chromsDict = None - self.chromsTuple = None - - # Set self.experimentID - experimentID = self.getEID() - if not experimentID: - raise RuntimeError("The requested sample({}) has no associated experiment! If you did not intend to use samples on deepBlue, then it appears either you misspelled a file name or (if you're using BAM files for input) one of your BAM files is lacking a valid index.".format(sample)) - - # Set self.info - (status, resp) = self.server.info(self.experimentID, userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching information about '{}': {}".format(resp, sample)) - self.info = resp[0] - - # Set self.genome - genome = self.getGenome() - if not genome: - raise RuntimeError("Unable to determine an appropriate genome for '{}'".format(sample)) - - # Set self.chroms - chroms = self.getChroms() - if not chroms: - raise RuntimeError("Unable to determine chromosome names/sizes for '{}'".format(sample)) - - def getEID(self): - """ - Given a sample name, return its associated experiment ID (or None on error). - - self.experimentID is then the internal ID (e.g., e52525) - """ - (status, resps) = self.server.search(self.sample, "experiments", self.userKey) - if status != "okay": - raise RuntimeError("Received an error ({}) while searching for the experiment associated with '{}'".format(resps, self.sample)) - for resp in resps: - if resp[1] == self.sample: - self.experimentID = resp[0] - return resp[0] - return None - - def getGenome(self): - """ - Determines and sets the genome assigned to a given sample. On error, this raises a runtime exception. - - self.genome is then the internal genome ID. - """ - if "genome" in self.info.keys(): - self.genome = self.info["genome"] - return self.genome - - def getChroms(self): - """ - Determines and sets the chromosome names/sizes for a given sample. On error, this raises a runtime exception. - - self.chroms is then a dictionary of chromosome:length pairs - """ - (status, resp) = self.server.chromosomes(self.genome, self.userKey) - if status != "okay": - raise RuntimeError("Received an error while fetching chromosome information for '{}': {}".format(self.sample, resp)) - self.chromsDict = {k: v for k, v in resp} - self.chromsTuple = [(k, v) for k, v in resp] - return resp - - def chroms(self, chrom=None): - """ - Like the chroms() function in pyBigWig, returns either chromsDict (chrom is None) or the length of a given chromosome - """ - if chrom is None: - return self.chromsDict - elif chrom in self.chromsDict: - return self.chromsDict[chrom] - return None - - def close(self): - pass - - def preload(self, regions, tmpDir=None): - """ - Given a sample and a set of regions, write a bigWig file containing the underlying signal. - - This function returns the file name, which needs to be deleted by the calling function at some point. - - This sends queries one chromosome at a time, due to memory limits on deepBlue - """ - startTime = datetime.datetime.now() - regions2 = mergeRegions(regions) - - # Make a temporary file - f = tempfile.NamedTemporaryFile(delete=False, dir=tmpDir) - fname = f.name - f.close() - - # Start with the bigWig file - bw = pyBigWig.open(fname, "w") - bw.addHeader(self.chromsTuple, maxZooms=0) # This won't work in IGV! - - # Make a string out of everything in a resonable order - for k, v in self.chromsTuple: - # Munge chromosome names as appropriate - chrom = mungeChromosome(k, regions2.keys()) - if not chrom: - continue - if chrom not in regions2 or len(regions2) == 0: - continue - regionsStr = "\n".join(["{}\t{}\t{}".format(k, reg[0], reg[1]) for reg in regions2[chrom]]) - regionsStr += "\n" - - # Send the regions - (status, regionsID) = self.server.input_regions(self.genome, regionsStr, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while sending regions for '{}': {}".format(regionsID, self.sample)) - - # Get the experiment information - (status, queryID) = self.server.select_experiments(self.sample, k, None, None, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running select_experiments on file '{}': {}".format(self.sample, queryID)) - if not queryID: - raise RuntimeError("Somehow, we received None as a query ID (file '{}')".format(self.sample)) - - # Intersect - (status, intersectID) = self.server.intersection(queryID, regionsID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running intersection on file '{}': {}".format(self.sample, intersectID)) - if not intersectID: - raise RuntimeError("Somehow, we received None as an intersect ID (file '{}')".format(self.sample)) - - # Query the regions - (status, reqID) = self.server.get_regions(intersectID, "START,END,VALUE", self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching regions in file '{}': {}".format(self.sample, reqID)) - - # Wait for the server to process the data - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - while request_status != "done" and request_status != "failed": - time.sleep(0.1) - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - - # Get the actual data - (status, resp) = self.server.get_request_data(reqID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching data in file '{}': {}".format(self.sample, resp)) - - for intervals in resp.split("\n"): - interval = intervals.split("\t") - if interval[0] == '': - continue - bw.addEntries([k], [int(interval[0]) - 1], ends=[int(interval[1]) - 1], values=[float(interval[2])]) - bw.close() - sys.stderr.write("{} done (took {})\n".format(self.sample, datetime.datetime.now() - startTime)) - sys.stderr.flush() - - return fname diff --git a/deeptools/deeptools_list_tools.py b/deeptools/deeptools_list_tools.py index 0e4b6a387..32dcf7021 100644 --- a/deeptools/deeptools_list_tools.py +++ b/deeptools/deeptools_list_tools.py @@ -3,10 +3,7 @@ import argparse import sys -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version def parse_arguments(args=None): diff --git a/deeptools/estimateReadFiltering.py b/deeptools/estimateReadFiltering.py index 52fded538..8c46a3841 100644 --- a/deeptools/estimateReadFiltering.py +++ b/deeptools/estimateReadFiltering.py @@ -5,10 +5,7 @@ from deeptools import parserCommon, bamHandler, utilities from deeptools.mapReduce import mapReduce from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version def parseArguments(): diff --git a/deeptools/estimateScaleFactor.py b/deeptools/estimateScaleFactor.py index 31acea3f5..97869a7bd 100644 --- a/deeptools/estimateScaleFactor.py +++ b/deeptools/estimateScaleFactor.py @@ -6,11 +6,7 @@ from deeptools.SES_scaleFactor import estimateScaleFactor from deeptools.parserCommon import numberOfProcessors -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version debug = 0 @@ -102,7 +98,7 @@ def main(args=None): between to samples """ - args = parseArguments().parse_args(args) + args = parseArguments(args) if len(args.bamfiles) > 2: print("SES method to estimate scale factors only works for two samples") exit(0) diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 427d5308c..0cf2dc8eb 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -76,7 +76,8 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileNam ------- d : dict tuple of two dictionaries, one for the fragment length and the other - for the read length. The dictionaries summarise the mean, median etc. values +for the read length. The dictionaries summarise the mean, median etc. values + """ bam_handle = bamHandler.openBam(bamFile) diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py index b010001ff..981a99e3c 100644 --- a/deeptools/multiBamSummary.py +++ b/deeptools/multiBamSummary.py @@ -9,11 +9,7 @@ import deeptools.countReadsPerBin as countR from deeptools import parserCommon from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 50f40beef..f7231921d 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -5,15 +5,10 @@ import argparse import os.path import numpy as np -import multiprocessing from deeptools import parserCommon from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw -import deeptools.deepBlue as db -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version old_settings = np.seterr(all='ignore') @@ -53,17 +48,16 @@ def parse_arguments(args=None): metavar='') parent_parser = parserCommon.getParentArgParse(binSize=False) - dbParser = parserCommon.deepBlueOptionalArgs() # bins mode options subparsers.add_parser( 'bins', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='bins'), - parent_parser, - parserCommon.gtf_options(suppress=True), - dbParser - ], + parents=[ + multiBigwigSummaryArgs(case='bins'), + parent_parser, + parserCommon.gtf_options(suppress=True) + ], help="The average score is based on equally sized bins " "(10 kilobases by default), which consecutively cover the " "entire genome. The only exception is the last bin of a chromosome, which " @@ -79,11 +73,11 @@ def parse_arguments(args=None): subparsers.add_parser( 'BED-file', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='BED-file'), - parent_parser, - parserCommon.gtf_options(), - dbParser - ], + parents=[ + multiBigwigSummaryArgs(case='BED-file'), + parent_parser, + parserCommon.gtf_options() + ], help="The user provides a BED file that contains all regions " "that should be considered for the analysis. A " "common use is to compare scores (e.g. ChIP-seq scores) between " @@ -230,33 +224,6 @@ def main(args=None): "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bwfiles): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - if 'BED' in args: - regs = db.makeRegions(args.BED, args) - else: - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeTiles(foo, args) - del foo - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bwfiles[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, @@ -312,11 +279,3 @@ def main(args=None): args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bwfiles[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.bwfiles[v])) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 37e9f359a..9849d9c43 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -1,9 +1,7 @@ import argparse import os -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version +import multiprocessing def check_float_0_1(value): @@ -344,8 +342,12 @@ def getParentArgParse(args=None, binSize=True, blackList=True): def numberOfProcessors(string): - import multiprocessing - availProc = multiprocessing.cpu_count() + try: + # won't work on macOS or windows + # limit threads to what is available (e.g. grid submissions, issue #1199) + availProc = len(os.sched_getaffinity(0)) + except AttributeError: + availProc = multiprocessing.cpu_count() if string == "max/2": # default case # by default half of the available processors are used @@ -865,43 +867,6 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): return parser -def deepBlueOptionalArgs(): - - parser = argparse.ArgumentParser(add_help=False) - dbo = parser.add_argument_group('deepBlue arguments', 'Options used only for remote bedgraph/wig files hosted on deepBlue') - dbo.add_argument( - '--deepBlueURL', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the server URL. The default is ' - '"http://deepblue.mpi-inf.mpg.de/xmlrpc", which should not be ' - 'changed without good reason.', - default='http://deepblue.mpi-inf.mpg.de/xmlrpc') - dbo.add_argument( - '--userKey', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the user key to use for access. The default is ' - '"anonymous_key", which suffices for public datasets. If you need ' - 'access to a restricted access/private dataset, then request a ' - 'key from deepBlue and specify it here.', - default='anonymous_key') - dbo.add_argument( - '--deepBlueTempDir', - help='If specified, temporary files from preloading datasets from ' - 'deepBlue will be written here (note, this directory must exist). ' - 'If not specified, where ever temporary files would normally be written ' - 'on your system is used.', - default=None) - dbo.add_argument( - '--deepBlueKeepTemp', - action='store_true', - help='If specified, temporary bigWig files from preloading deepBlue ' - 'datasets are not deleted. A message will be printed noting where these ' - 'files are and what sample they correspond to. These can then be used ' - 'if you wish to analyse the same sample with the same regions again.') - - return parser - - def requiredLength(minL, maxL): """ This is an optional action that can be given to argparse.add_argument(..., nargs='+') diff --git a/deeptools/plotCorrelation.py b/deeptools/plotCorrelation.py index 2b8d9f790..988cf559e 100644 --- a/deeptools/plotCorrelation.py +++ b/deeptools/plotCorrelation.py @@ -10,13 +10,9 @@ matplotlib.rcParams['svg.fonttype'] = 'none' from deeptools import cm # noqa: F401 import matplotlib.pyplot as plt - +from importlib.metadata import version from deeptools.correlation import Correlation from deeptools.parserCommon import writableFile -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index e233dcb71..a3235955c 100755 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -14,14 +14,10 @@ import matplotlib.pyplot as plt import plotly.offline as py import plotly.graph_objs as go - +from importlib.metadata import version import deeptools.countReadsPerBin as countR from deeptools import parserCommon from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index ad666998e..a2149f829 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -62,7 +62,7 @@ def process_args(args=None): return args -def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position): +def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position, fig): """ prepare the plot layout as a grid having as many rows @@ -113,7 +113,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro # numbers to heatmapheigt fractions height_ratio = np.concatenate([[sumplot_height, spacer_height], height_ratio]) - grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio) + grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio, figure=fig) return grids @@ -498,9 +498,6 @@ def plotMatrix(hm, outFileName, else: colorbar_position = 'side' - grids = prepare_layout(hm.matrix, (heatmapWidth, heatmapHeight), - showSummaryPlot, showColorbar, perGroup, colorbar_position) - # figsize: w,h tuple in inches figwidth = heatmapWidth / 2.54 figheight = heatmapHeight / 2.54 @@ -521,9 +518,19 @@ def plotMatrix(hm, outFileName, else: total_figwidth += 1 / 2.54 - fig = plt.figure(figsize=(total_figwidth, figheight)) + fig = plt.figure(figsize=(total_figwidth, figheight), constrained_layout=True) fig.suptitle(plotTitle, y=1 - (0.06 / figheight)) + grids = prepare_layout( + hm.matrix, + (heatmapWidth, heatmapHeight), + showSummaryPlot, + showColorbar, + perGroup, + colorbar_position, + fig + ) + # color map for the summary plot (profile) on top of the heatmap cmap_plot = plt.get_cmap('jet') numgroups = hm.matrix.get_num_groups() @@ -582,17 +589,6 @@ def plotMatrix(hm, outFileName, iterNum = hm.matrix.get_num_samples() iterNum2 = numgroups ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, None, None, colorbar_position, label_rotation) - if len(yMin) > 1 or len(yMax) > 1: - # replot with a tight layout - import matplotlib.tight_layout as tl - specList = tl.get_subplotspec_list(fig.axes, grid_spec=grids) - renderer = tl.get_renderer(fig) - kwargs = tl.get_tight_layout_figure(fig, fig.axes, specList, renderer, pad=1.08) - - for ax in ax_list: - fig.delaxes(ax) - - ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, kwargs['wspace'], kwargs['hspace'], colorbar_position, label_rotation) if legend_location != 'none': ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, @@ -764,10 +760,10 @@ def plotMatrix(hm, outFileName, fig.colorbar(img, cax=ax) if box_around_heatmaps: - plt.subplots_adjust(wspace=0.10, hspace=0.025, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85)) else: # When no box is plotted the space between heatmaps is reduced - plt.subplots_adjust(wspace=0.05, hspace=0.01, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85)) plt.savefig(outFileName, bbox_inches='tight', pad_inches=0.1, dpi=dpi, format=image_format) plt.close() diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index c43942b85..bc17ed32b 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -8,13 +8,9 @@ matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['svg.fonttype'] = 'none' from deeptools import cm # noqa: F401 - +from importlib.metadata import version from deeptools.correlation import Correlation from deeptools.parserCommon import writableFile -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version def parse_arguments(args=None): diff --git a/docs/_static/fix_tables.css b/docs/_static/fix_tables.css deleted file mode 100644 index 189a7c2a3..000000000 --- a/docs/_static/fix_tables.css +++ /dev/null @@ -1,7 +0,0 @@ -.wy-table-responsive table td { - white-space: normal !important; -} - -.wy-table-responsive { - overflow: visible !important; -} diff --git a/docs/content/advanced_features.rst b/docs/content/advanced_features.rst index db325a6e9..ea9143042 100644 --- a/docs/content/advanced_features.rst +++ b/docs/content/advanced_features.rst @@ -8,7 +8,6 @@ Some of the features of deepTools are not self-explanatory. Below, we provide li * :doc:`feature/read_extension` * :doc:`feature/unscaled_regions` * :doc:`feature/read_offsets` - * :doc:`feature/deepBlue` * :doc:`feature/plotFingerprint_QC_metrics` * :doc:`feature/plotly` * :doc:`feature/effectiveGenomeSize` diff --git a/docs/content/feature/deepBlue.rst b/docs/content/feature/deepBlue.rst deleted file mode 100644 index 1fd7230c7..000000000 --- a/docs/content/feature/deepBlue.rst +++ /dev/null @@ -1,16 +0,0 @@ -Accessing datasets hosted on deepBlue -===================================== - -`deepBlue `__ is an epigenome dataset server hosting many ENCODE, ROADMAP, BLUEPRINT, and DEEP samples. These are often hosted as normalized signal tracks that can be used with `bigwigCompare`, `bigwigAverage`, `multiBigwigSummary`, and `computeMatrix`. As of version 2.4.0, the aforementioned tools can now access signal files hosted on deepBlue. To do so, simply specify the "experiment name" from deepBlue, such as: - -.. code:: bash - - $ bigwigCompare -b1 S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph -b2 S002R5H1.ERX337057.Input.bwa.GRCh38.20150528.bedgraph -p 10 -o bwCompare.bw - -The file names given to the aforementioned commands are in the "Name" column in deepBlue. Any file ending in ".wig", ".wiggle", ".bedgraph" or otherwise not present on the file system (and not beginning with "http" or "ftp") is assumed to be hosted on deepBlue. This means that for ENCODE samples, one can simply use the ENCODE ID (e.g., "ENCFF721EKA"). - -Internally, deepTools queries deepBlue and creates a temporary bigWig file including signal in all of the regions that deepTools will use. By default, these temporary files are deleted after the command finishes. This can be prevented by specifying `--deepBlueKeepTemp`. The directory to which the temporary files are written can be specified by `--deepBlueTempDir`. If you intend to use the same sample multiple times with the same basic command (e.g., computeMatrix with the same regions or bigwigCompare with different samples), then considerable time can be saved by keeping the temporary bigWig file and simply specifying it in subsequent runs (i.e., deepTools won't magically find the previous file, you need to specify it). - -Note that some datasets may be restricted access. In such cases, you can request an account and will receive a "user key". You can then provide that to `bigwigCompare`, `multiBigwigSummary`, or `computeMatrix` using the `--userKey` option. In the off-chance that you have access to other deepBlue servers aside from the main one (http://deepblue.mpi-inf.mpg.de/xmlrpc), you can specify that with the `--deepBlueURL` option. - -.. warning:: bigwigCompare can be incredibly slow due to essentially downloading entire samples. It's faster to simply download bigWig files from the original source. diff --git a/docs/content/feature/effectiveGenomeSize.rst b/docs/content/feature/effectiveGenomeSize.rst index 4cbbb2dd5..e988b18e1 100644 --- a/docs/content/feature/effectiveGenomeSize.rst +++ b/docs/content/feature/effectiveGenomeSize.rst @@ -6,30 +6,56 @@ A number of tools can accept an "effective genome size". This is defined as the 1. The number of non-N bases in the genome. 2. The number of regions (of some size) in the genome that are uniquely mappable (possibly given some maximal edit distance). -Option 1 can be computed using ``faCount`` from `Kent's tools `__. The effective genome size for a number of genomes using this method is given below: - -======== ============== -Genome Effective size -======== ============== -GRCh37 2864785220 -GRCh38 2913022398 -GRCm37 2620345972 -GRCm38 2652783500 -dm3 162367812 -dm6 142573017 -GRCz10 1369631918 -WBcel235 100286401 -TAIR10 119481543 -======== ============== - -These values only appropriate if multimapping reads are included. If they are excluded (or there's any MAPQ filter applied), then values derived from option 2 are more appropriate. These are then based on the read length. We can approximate these values for various read lengths using the `khmer program `__ program and ``unique-kmers.py`` in particular. A table of effective genome sizes given a read length using this method is provided below: - -=========== ========== ========== ========== ========== ========= ========= ========== ======== -Read length GRCh37 GRCh38 GRCm37 GRCm38 dm3 dm6 GRCz10 WBcel235 -=========== ========== ========== ========== ========== ========= ========= ========== ======== -50 2685511504 2701495761 2304947926 2308125349 130428560 125464728 1195445591 95159452 -75 2736124973 2747877777 2404646224 2407883318 135004462 127324632 1251132686 96945445 -100 2776919808 2805636331 2462481010 2467481108 139647232 129789873 1280189044 98259998 -150 2827437033 2862010578 2489384235 2494787188 144307808 129941135 1312207169 98721253 -200 2855464000 2887553303 2513019276 2520869189 148524010 132509163 1321355241 98672758 -=========== ========== ========== ========== ========== ========= ========= ========== ======== +Option 1 can be computed using ``faCount`` from `Kents tools `__. +The effective genome size for a number of genomes using this method is given below: + + ++---------------+------------------+ +| Genome | Effective size | ++===============+==================+ +|GRCh37 | 2864785220 | ++---------------+------------------+ +|GRCh38 | 2913022398 | ++---------------+------------------+ +|T2T/CHM13CAT_v2| 3117292070 | ++---------------+------------------+ +|GRCm37 | 2620345972 | ++---------------+------------------+ +|GRCm38 | 2652783500 | ++---------------+------------------+ +|dm3 | 162367812 | ++---------------+------------------+ +|dm6 | 142573017 | ++---------------+------------------+ +|GRCz10 | 1369631918 | ++---------------+------------------+ +|GRCz11 | 1368780147 | ++---------------+------------------+ +|WBcel235 | 100286401 | ++---------------+------------------+ +|TAIR10 | 119482012 | ++---------------+------------------+ + + + +These values only appropriate if multimapping reads are included. If they are excluded (or there's any MAPQ filter applied), +then values derived from option 2 are more appropriate. +These are then based on the read length. +We can approximate these values for various read lengths using the `khmer program `__ program and ``unique-kmers.py`` in particular. +A table of effective genome sizes given a read length using this method is provided below: + ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|Read length | GRCh37 | GRCh38 | T2T/CHM13CAT_v2 | GRCm37 | GRCm38 | dm3 | dm6 | GRCz10 | GRCz11 | WBcel235 | TAIR10 | ++=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+ +|50 | 2685511454 | 2701495711 | 2725240337 | 2304947876 | 2308125299 | 130428510 | 125464678 | 1195445541 | 1197575653 | 95159402 | 114339094 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|75 | 2736124898 | 2747877702 | 2786136059 | 2404646149 | 2407883243 | 135004387 | 127324557 | 1251132611 | 1250812288 | 96945370 | 115317469 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|100 | 2776919708 | 2805636231 | 2814334875 | 2462480910 | 2467481008 | 139647132 | 129789773 | 1280188944 | 1280354977 | 98259898 | 118459858 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|150 | 2827436883 | 2862010428 | 2931551487 | 2489384085 | 2494787038 | 144307658 | 129940985 | 1312207019 | 1311832909 | 98721103 | 118504138 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|200 | 2855463800 | 2887553103 | 2936403235 | 2513019076 | 2520868989 | 148523810 | 132508963 | 1321355041 | 1322366338 | 98672558 | 117723393 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|250 | 2855044784 | 2898802627 | 2960856300 | 2528988583 | 2538590322 | 151901455 | 132900923 | 1339205109 | 1342093482 | 101271756 | 119585546 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ diff --git a/docs/content/installation.rst b/docs/content/installation.rst index a8fcaaad6..f0ce0ef5c 100644 --- a/docs/content/installation.rst +++ b/docs/content/installation.rst @@ -2,7 +2,7 @@ Installation ============= Remember -- deepTools are available for **command line usage** as well as for -**integration into Galaxy servers**! +**integration into Galaxy servers** ! .. contents:: :local: @@ -10,68 +10,53 @@ Remember -- deepTools are available for **command line usage** as well as for Command line installation using ``conda`` ----------------------------------------- -DeepTools (including the requirements) can be installed with conda: +The recommended way to install deepTools (including its requirements) is via `miniconda `_ or `anaconda `_. .. code:: bash $ conda install -c bioconda deeptools -Command line installation using ``pip`` from pypi --------------------------------------------------- +Command line installation using ``pip`` +--------------------------------------- -Install deepTools using the following command: -:: +deepTools can also be installed using `pip `_. +You can either install the latest release from `pypi `_: - $ pip install deeptools +.. code:: bash -All python requirements should be automatically installed. + $ pip install deeptools -If you need to specify a specific path for the installation of the tools, make use of `pip install`'s numerous options: +or a specific version with: .. code:: bash - $ pip install --install-option="--prefix=/MyPath/Tools/deepTools2.0" git+https://github.com/deeptools/deepTools.git - + $ pip install deeptools==3.5.3 -Command line installation using ``pip`` from source ---------------------------------------------------- +In case you would like to install an unreleased or development version, deepTools can also be installed from the repository: -You are highly recommended to use the 'pypi installation' rather than these more complicated steps. - -1. Download source code -:: +.. code:: bash $ git clone https://github.com/deeptools/deepTools.git - -or if you want a particular release, choose one from https://github.com/deeptools/deepTools/releases: -:: - - $ wget https://github.com/deeptools/deepTools/archive/1.5.12.tar.gz - $ tar -xzvf - -3. install the source code -:: - - $ python -m build - $ pip install dist/*whl + $ cd deepTools + $ pip install . Galaxy installation -------------------- -deepTools can be easily integrated into a local `Galaxy `_. +deepTools can be easily integrated into a local `Galaxy `_. All wrappers and dependencies are available in the `Galaxy Tool -Shed `_. +Shed `_. Installation via Galaxy API (recommended) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -First generate an `API Key `_ +First generate an `API Key `_ for your admin user and run the the installation script: :: $ python ./scripts/api/install_tool_shed_repositories.py \ --api YOUR_API_KEY -l http://localhost/ \ - --url http://toolshed.g2.bx.psu.edu/ \ + --url https://toolshed.g2.bx.psu.edu/ \ -o bgruening -r --name suite_deeptools \ --tool-deps --repository-deps --panel-section-name deepTools @@ -80,7 +65,7 @@ latest revision number from the test tool shed or with the following command: :: - $ hg identify http://toolshed.g2.bx.psu.edu/repos/bgruening/suite_deeptools + $ hg identify https://toolshed.g2.bx.psu.edu/repos/bgruening/suite_deeptools You can watch the installation status under: Top Panel --> Admin --> Manage installed tool shed repositories @@ -92,15 +77,3 @@ Installation via web browser - select *Search and browse tool sheds* - Galaxy tool shed --> Sequence Analysis --> deeptools - install deeptools - -Installation with Docker -^^^^^^^^^^^^^^^^^^^^^^^^ - -The deepTools Galaxy instance is also available as a docker container, for those wishing to use the Galaxy framework but who also prefer a virtualized solution. This container is quite simple to install: -:: - - $ sudo docker pull quay.io/bgruening/galaxy-deeptools - -To start and otherwise modify this container, please see the instructions on `the docker-galaxy-stable github repository `__. Note that you must use `bgruening/galaxy-deeptools` in place of `bgruening/galaxy-stable` in the examples, as the deepTools Galaxy container is built on top of the galaxy-stable container. - -.. tip:: For support or questions please make a post on `Biostars `__. For feature requests or bug reports please open an issue `on github `__. diff --git a/docs/content/list_of_tools.rst b/docs/content/list_of_tools.rst index 4a8740991..2191f3c23 100644 --- a/docs/content/list_of_tools.rst +++ b/docs/content/list_of_tools.rst @@ -1,16 +1,6 @@ The tools ========= -.. note:: With the release of deepTools 2.0, we renamed a couple of tools: - - * **heatmapper** to :doc:`tools/plotHeatmap` - * **profiler** to :doc:`tools/plotProfile` - * **bamCorrelate** to :doc:`tools/multiBamSummary` - * **bigwigCorrelate** to :doc:`tools/multiBigwigSummary` - * **bamFingerprint** to :doc:`tools/plotFingerprint`. - - For more, see :doc:`changelog`. - .. contents:: :local: @@ -68,11 +58,18 @@ A typical deepTools command could look like this: --ignoreDuplicates \ --scaleFactor 0.5 -You can always see all available command-line options via --help: +You can always see all available command-line options via --help or -h: .. code:: bash $ bamCoverage --help + $ bamCoverage -h + +And a minimal usage example can be shown by running a command without any arguments: + +.. code:: bash + + $ bamCoverage - Output format of plots should be indicated by the file ending, e.g. ``MyPlot.pdf`` will return a pdf file, ``MyPlot.png`` a png-file - All tools that produce plots can also output the underlying data - this can be useful in cases where you don't like the deepTools visualization, as you can then use the data matrices produced by deepTools with your favorite plotting tool, such as R @@ -82,14 +79,15 @@ Parameters to decrease the run time """"""""""""""""""""""""""""""""""" - ``numberOfProcessors`` - Number of processors to be used - For example, setting ``--numberOfProcessors 10`` will split up the - workload internally into 10 chunks, which will be - processed in parallel. + +For example, setting ``--numberOfProcessors 10`` will split up the workload internally into 10 chunks, which will be processed in parallel. +Note that for highly fragmented assemblies (> 1000 contigs) the runtime increases drastically. Consider to include only canonical chromosomes in cases like this. + - ``region`` - Process only a single genomic region. - This is particularly useful when you're still trying to figure out the best parameter setting. You can focus on a certain genomic region by setting, e.g., ``--region chr2`` or - ``--region chr2:100000-200000`` -These parameters are optional and available throughout almost all deepTools. +This is particularly useful when you're still trying to figure out the best parameter setting. You can focus on a certain genomic region by setting, e.g., ``--region chr2`` or ``--region chr2:100000-200000`` + +Both parameters are optional and available throughout almost all deepTools. Filtering BAMs while processing """"""""""""""""""""""""""""""" @@ -103,7 +101,7 @@ We offer several ways to filter those BAM files on the fly so that you don't nee Only reads with a mapping quality score of at least this are considered - ``samFlagInclude`` Include reads based on the SAM flag, e.g. ``--samFlagInclude 64`` gets reads that are first in a pair. For translating SAM flags into English, go to: `https://broadinstitute.github.io/picard/explain-flags.html `_ -- ``samFlagExclude`` +- ` `samFlagExclude`` Exclude reads based on the SAM flags - see previous explanation. These parameters are optional and available throughout deepTools. diff --git a/docs/index.rst b/docs/index.rst index a512e8c11..1d739da73 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -56,7 +56,7 @@ that fulfills the following criteria: we can add more and more modules and make use of established methods) -.. tip:: For support or questions please post to `Biostars `__. For bug reports and feature requests please open an issue ``__. +.. tip:: For support or questions please post to `Biostars `__. For bug reports and feature requests please open an issue `on github `__. Please cite deepTools2 as follows: @@ -67,6 +67,6 @@ Steffen Heyne, Friederike Dündar, and Thomas Manke. .. image:: images/logo_mpi-ie.jpg -This tool suite is developed by the `Bioinformatics Facility `_ at the +This tool suite is developed by the `Bioinformatics Facility `_ at the `Max Planck Institute for Immunobiology and Epigenetics, -Freiburg `_. +Freiburg `_. diff --git a/docs/requirements.txt b/docs/requirements.txt index 0a9300aa9..f330fe4e6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx -mock -sphinx_rtd_theme -sphinx-argparse \ No newline at end of file +sphinx==7.2.6 +mock==5.1.0 +sphinx_rtd_theme==2.0.0 +sphinx-argparse==0.4.0 \ No newline at end of file diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html index f6fc75ddd..32e730537 100644 --- a/docs/source/_templates/layout.html +++ b/docs/source/_templates/layout.html @@ -1,3 +1,3 @@ {% extends "!layout.html" %} {% set script_files = script_files + ["_static/welcome_owl.carousel.min.js"] %} -{% set css_files = css_files + ["_static/welcome_owl.carousel.css", "_static/welcome_owl.carousel.theme.css", "_static/fix_tables.css"] %} +{% set css_files = css_files + ["_static/welcome_owl.carousel.css", "_static/welcome_owl.carousel.theme.css"] %} diff --git a/docs/source/deeptools.rst b/docs/source/deeptools.rst index e85e7c75c..97d968994 100644 --- a/docs/source/deeptools.rst +++ b/docs/source/deeptools.rst @@ -44,14 +44,6 @@ deeptools.countReadsPerBin module :undoc-members: :show-inheritance: -deeptools.deepBlue ------------------- - -.. automodule:: deeptools.deepBlue - :members: - :undoc-members: - :show-inheritance: - deeptools.getFragmentAndReadSize module --------------------------------------- diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml index 85cbcb2aa..49c8dd9ed 100755 --- a/galaxy/wrapper/deepTools_macros.xml +++ b/galaxy/wrapper/deepTools_macros.xml @@ -1,7 +1,7 @@ --numberOfProcessors "\${GALAXY_SLOTS:-4}" - 3.5.4 + 3.5.5 22.05 diff --git a/pyproject.toml b/pyproject.toml index 3b877aa5f..5a902d1db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ [project] name = "deepTools" -version = "3.5.4" +version = "3.5.5" authors = [ {name="Fidel Ramirez"}, {name="Devon P Ryan"}, @@ -19,7 +19,7 @@ authors = [ {name="Thomas Manke"}, {email="bioinfo-core@ie-freiburg.mpg.de"} ] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "numpy >= 1.9.0", "scipy >= 0.17.0", @@ -29,8 +29,7 @@ dependencies = [ "pyBigWig >= 0.2.1", "py2bit >= 0.2.0", "plotly >= 4.9", - "deeptoolsintervals >= 0.1.8", - "importlib-metadata" # python 3.7 support + "deeptoolsintervals >= 0.1.8" ] description = "Useful tools for exploring deep sequencing data." license = {file = "LICENSE.txt"}