code format for passing pipeline

deeptools · Jul 12, 2023 · fb92d3c · fb92d3c
1 parent a7059c1
commit fb92d3c
Show file tree

Hide file tree

Showing 55 changed files with 9,543 additions and 5,934 deletions.
diff --git a/deeptools/SES_scaleFactor.py b/deeptools/SES_scaleFactor.py
@@ -8,14 +8,22 @@
 from deeptools import bamHandler
 import deeptools.countReadsPerBin as countR
 
-old_settings = np.seterr(all='ignore')
+old_settings = np.seterr(all="ignore")
 debug = 0
 
 
-def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
-                        normalizationLength,
-                        avg_method='median', blackListFileName=None, numberOfProcessors=1,
-                        verbose=False, chrsToSkip=[], mappingStatsList=[]):
+def estimateScaleFactor(
+    bamFilesList,
+    binLength,
+    numberOfSamples,
+    normalizationLength,
+    avg_method="median",
+    blackListFileName=None,
+    numberOfProcessors=1,
+    verbose=False,
+    chrsToSkip=[],
+    mappingStatsList=[],
+):
     r"""
     Subdivides the genome into chunks to be analyzed in parallel
     using several processors. The code handles the creation of
@@ -83,20 +91,28 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
     else:
         mappedReads = []
         for fname in bamFilesList:
-            mappedReads.append(bamHandler.openBam(fname, returnStats=True, nThreads=numberOfProcessors)[1])
-
-    sizeFactorBasedOnMappedReads = np.array(mappedReads, dtype='float64')
-
-    sizeFactorBasedOnMappedReads = sizeFactorBasedOnMappedReads.min() / sizeFactorBasedOnMappedReads
-
-    cr = countR.CountReadsPerBin(bamFilesList,
-                                 binLength=binLength,
-                                 numberOfSamples=numberOfSamples,
-                                 extendReads=False,
-                                 blackListFileName=blackListFileName,
-                                 numberOfProcessors=numberOfProcessors,
-                                 verbose=verbose,
-                                 chrsToSkip=chrsToSkip)
+            mappedReads.append(
+                bamHandler.openBam(
+                    fname, returnStats=True, nThreads=numberOfProcessors
+                )[1]
+            )
+
+    sizeFactorBasedOnMappedReads = np.array(mappedReads, dtype="float64")
+
+    sizeFactorBasedOnMappedReads = (
+        sizeFactorBasedOnMappedReads.min() / sizeFactorBasedOnMappedReads
+    )
+
+    cr = countR.CountReadsPerBin(
+        bamFilesList,
+        binLength=binLength,
+        numberOfSamples=numberOfSamples,
+        extendReads=False,
+        blackListFileName=blackListFileName,
+        numberOfProcessors=numberOfProcessors,
+        verbose=verbose,
+        chrsToSkip=chrsToSkip,
+    )
 
     try:
         num_reads_per_bin = cr.run()
@@ -127,7 +143,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
     # Take a lower rank to move to a region with probably
     # less peaks and more background.
     maxIndex = int(maxIndex * 0.8)
-    while(maxIndex < len(p)):
+    while maxIndex < len(p):
         # in rare cases the maxIndex maps to a zero value.
         # In such cases, the next index is used until
         # a non zero value appears.
@@ -136,8 +152,10 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
             break
         maxIndex += 1
 
-    meanSES = [np.mean(np.sort(num_reads_per_bin[0, :])[:maxIndex]),
-               np.mean(np.sort(num_reads_per_bin[1, :])[:maxIndex])]
+    meanSES = [
+        np.mean(np.sort(num_reads_per_bin[0, :])[:maxIndex]),
+        np.mean(np.sort(num_reads_per_bin[1, :])[:maxIndex]),
+    ]
 
     # the maxIndex may be too close to the the signal regions
     # so i take a more conservative approach by taking a close number
@@ -151,9 +169,9 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
     mean = []
     std = []
     for values in num_reads_per_bin:
-        maxNumReads = (np.percentile(values, 90))
+        maxNumReads = np.percentile(values, 90)
         if maxNumReads == 0:
-            maxNumReads = (np.percentile(values, 99))
+            maxNumReads = np.percentile(values, 99)
             if maxNumReads == 0:
                 print("all genomic regions sampled from one ")
                 "of the bam files have no reads.\n"
@@ -163,33 +181,38 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
         std.append(np.std(values))
 
     mean = np.array(mean)
-    readsPerBin = mean if avg_method == 'mean' else median
+    readsPerBin = mean if avg_method == "mean" else median
 
     if min(median) == 0:
         idx_zero = [ix + 1 for ix, value in enumerate(median) if value == 0]
-        exit("\n*ERROR*: The median coverage computed is zero for sample(s) #{}\n"
-             "Try selecting a larger sample size or a region with coverage\n".format(idx_zero))
+        exit(
+            "\n*ERROR*: The median coverage computed is zero for sample(s) #{}\n"
+            "Try selecting a larger sample size or a region with coverage\n".format(
+                idx_zero
+            )
+        )
 
     sizeFactor = sizeFactorsSES
-    return {'size_factors': sizeFactor,
-            'size_factors_based_on_mapped_reads': sizeFactorBasedOnMappedReads,
-            'size_factors_SES': sizeFactorsSES,
-            'size_factors_based_on_mean': mean.min() / mean,
-            'size_factors_based_on_median': median.min() / median,
-            'mean': mean,
-            'meanSES': meanSES,
-            'median': median,
-            'reads_per_bin': readsPerBin,
-            'std': std,
-            'sites_sampled': sitesSampled}
+    return {
+        "size_factors": sizeFactor,
+        "size_factors_based_on_mapped_reads": sizeFactorBasedOnMappedReads,
+        "size_factors_SES": sizeFactorsSES,
+        "size_factors_based_on_mean": mean.min() / mean,
+        "size_factors_based_on_median": median.min() / median,
+        "mean": mean,
+        "meanSES": meanSES,
+        "median": median,
+        "reads_per_bin": readsPerBin,
+        "std": std,
+        "sites_sampled": sitesSampled,
+    }
 
 
 class Tester(object):
-
     def __init__(self):
         self.root = os.path.dirname(os.path.abspath(__file__)) + "/test/test_data/"
         self.bamFile1 = self.root + "testA.bam"
         self.bamFile2 = self.root + "testB.bam"
         global debug
         debug = 0
-        self.chrom = '3R'
+        self.chrom = "3R"
diff --git a/deeptools/_version.py b/deeptools/_version.py
@@ -1,5 +1,4 @@
-
 # This file is originally generated from Git information by running 'setup.py
 # version'. Distribution tarballs contain a pre-generated copy of this file.
 
-__version__ = '3.5.1'
+__version__ = "3.5.1"