diff --git a/.circleci/config.yml b/.circleci/config.yml index d1ec5b25..5fbe3c8b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,6 +21,7 @@ jobs: # The executor is the environment in which the steps below will be executed docker: - image: condaforge/mambaforge + resource_class: large # 8GB of memory # CircleCI will report the results back to your VCS provider. steps: - checkout diff --git a/README.md b/README.md index c4a11ede..461f06a2 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -CircleCI [![CircleCI](https://dl.circleci.com/status-badge/img/gh/broadinstitute/ABC-Enhancer-Gene-Prediction/tree/dev.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/broadinstitute/ABC-Enhancer-Gene-Prediction/tree/dev) +CircleCI [![CircleCI](https://dl.circleci.com/status-badge/img/gh/broadinstitute/ABC-Enhancer-Gene-Prediction.svg?style=svg)](https://app.circleci.com/pipelines/github/broadinstitute/ABC-Enhancer-Gene-Prediction) > :memo: **Note:** This is a revamp of the ABC codebase presented in [1]. If you wish to access that version of the ABC repo, please check out https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction/tree/master. diff --git a/docs/usage/getting_started.rst b/docs/usage/getting_started.rst index b69cc938..ba74cfc2 100644 --- a/docs/usage/getting_started.rst +++ b/docs/usage/getting_started.rst @@ -15,6 +15,7 @@ Installation - Make sure you're not using strict channel priorities: ``conda config --set channel_priority flexible``. Otherwise, you may encounter package conflicts later when installing abc. - To install mamba: ``conda create -n mamba -c conda-forge mamba -y`` - We recommend mamba as using conda can take 1hr+ for setup + - See troubleshooting page if you run into issues Setup Conda Environment @@ -123,7 +124,7 @@ biosamples config is a tsv separated file with the following columns - If you dumped hic into a directory via JuicerTools, use ``juicebox`` - If you have a bedpe file for contact, it should be a tab delimited file containing 8 columns (chr1,start1,end1,chr2,start2,end2,name,score) #. HiC_resolution (int) - - Recommended to use 5KB (kilobases) + - Currently only 5KB (kilobases) is supported - 5KB means dna regions are bucketed into 5KB bins and we measure contact between those bins #. alt_TSS (optional; not recommended to fill) - Alternative TSS reference file diff --git a/docs/usage/troubleshooting.rst b/docs/usage/troubleshooting.rst index 6cdd211c..498b29c3 100644 --- a/docs/usage/troubleshooting.rst +++ b/docs/usage/troubleshooting.rst @@ -10,3 +10,8 @@ If you're on MacOSX, make sure to remove some of the requirements in abcenv.yml. If there are incompatibility issues, try building off the 'release.yml' conda environment. + +malloc: Heap corruption detected +-------------------------------- +We've seen this happen when running on MacOSX during the prediction rule. It's an error thrown by the hicstraw library and happens the first time you use it. +Re-running the pipeline should fix it. diff --git a/tests/config/test_biosamples.tsv b/tests/config/test_biosamples.tsv index 3025d423..dc5fda28 100644 --- a/tests/config/test_biosamples.tsv +++ b/tests/config/test_biosamples.tsv @@ -1,3 +1,3 @@ biosample DHS ATAC H3K27ac default_accessibility_feature HiC_file HiC_type HiC_resolution alt_TSS alt_genes -K562_chr22 example_chr/chr22/ENCFF860XAE.chr22.sorted.se.bam example_chr/chr22/ENCFF790GFL.chr22.sorted.se.bam DHS https://www.encodeproject.org/files/ENCFF621AIY/@@download/ENCFF621AIY.hic hic 5000 example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.TSS500bp.bed example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.bed +K562_chr22 example_chr/chr22/ENCFF860XAE.chr22.sorted.se.bam example_chr/chr22/ENCFF790GFL.chr22.sorted.se.bam DHS https://encode-public.s3.amazonaws.com/2022/05/15/0571c671-3645-4f92-beae-51dfd3f42c36/ENCFF621AIY.hic hic 5000 example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.TSS500bp.bed example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.bed K562_chr22_tagAlign example_chr/chr22/chr22.sorted.tagAlign.gz ATAC example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.TSS500bp.bed example_chr/chr22/RefSeqCurated.170308.bed.CollapsedGeneBounds.chr22.hg38.bed diff --git a/tests/test_predictor.py b/tests/test_predictor.py index cbfb998e..ad9afcf1 100644 --- a/tests/test_predictor.py +++ b/tests/test_predictor.py @@ -7,7 +7,7 @@ from predictor import add_hic_from_hic_file import pandas as pd -HIC_FILE = "https://www.encodeproject.org/files/ENCFF621AIY/@@download/ENCFF621AIY.hic" +HIC_FILE = "https://encode-public.s3.amazonaws.com/2022/05/15/0571c671-3645-4f92-beae-51dfd3f42c36/ENCFF621AIY.hic" # this file has 3k rows of E-G pairs with valid contact values # contact values were generated from the original doubly stochastic method diff --git a/workflow/envs/abcenv.yml b/workflow/envs/abcenv.yml index f526f5ea..36f07c8f 100644 --- a/workflow/envs/abcenv.yml +++ b/workflow/envs/abcenv.yml @@ -5,34 +5,378 @@ channels: - defaults - anaconda dependencies: - - bedtools=2.26.0 - - black - - click - - macs2 - - matplotlib - - numpy - - pandas - - pybigwig - - pip - - pyarrow - - pyranges - - pysam - - pytest-xdist - - python>=3.6 - - samtools>=1.9 # to avoid open ssl issue: https://github.com/merenlab/anvio/issues/1479 - - scipy - - seaborn - # smart_open version >=7 leads to errors with Snakemake https://pastebin.com/uBmBwdnK - - smart_open<7 - - snakemake>=7,<8 - - sphinx - - tabix - - yaml - # The following 3 are needed to compile hic-straw in HPC - # You may need to remove them if working on macosx - - gcc - - gxx - - zlib + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - aioeasywebdav=2.4.0=pyha770c72_0 + - aiohttp=3.9.5=py310h2372a71_0 + - aiosignal=1.3.1=pyhd8ed1ab_0 + - alabaster=0.7.16=pyhd8ed1ab_0 + - alsa-lib=1.2.11=hd590300_1 + - amply=0.1.6=pyhd8ed1ab_0 + - appdirs=1.4.4=pyh9f0ad1d_0 + - async-timeout=4.0.3=pyhd8ed1ab_0 + - attmap=0.13.2=pyhd8ed1ab_0 + - attr=2.5.1=h166bdaf_1 + - attrs=23.2.0=pyh71513ae_0 + - aws-c-auth=0.7.22=h96bc93b_2 + - aws-c-cal=0.6.14=h88a6e22_1 + - aws-c-common=0.9.19=h4ab18f5_0 + - aws-c-compression=0.2.18=h83b837d_6 + - aws-c-event-stream=0.4.2=ha47c788_12 + - aws-c-http=0.8.1=h29d6fba_17 + - aws-c-io=0.14.8=h21d4f22_5 + - aws-c-mqtt=0.10.4=h759edc4_4 + - aws-c-s3=0.5.9=h594631b_3 + - aws-c-sdkutils=0.1.16=h83b837d_2 + - aws-checksums=0.1.18=h83b837d_6 + - aws-crt-cpp=0.26.9=he3a8b3b_0 + - aws-sdk-cpp=1.11.329=hba8bd5f_3 + - babel=2.14.0=pyhd8ed1ab_0 + - bcrypt=4.1.3=py310he421c4c_0 + - bedtools=2.26.0=0 + - binutils_impl_linux-64=2.40=ha1999f0_1 + - black=24.4.2=py310hff52083_0 + - boto3=1.34.114=pyhd8ed1ab_0 + - botocore=1.34.114=pyge310_1234567_0 + - brotli=1.1.0=hd590300_1 + - brotli-bin=1.1.0=hd590300_1 + - brotli-python=1.1.0=py310hc6cd4ac_1 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=hd590300_5 + - c-ares=1.28.1=hd590300_0 + - ca-certificates=2024.2.2=hbcca054_0 + - cachetools=5.3.3=pyhd8ed1ab_0 + - cairo=1.18.0=h3faef2a_0 + - certifi=2024.2.2=pyhd8ed1ab_0 + - cffi=1.16.0=py310h2fee648_0 + - charset-normalizer=3.3.2=pyhd8ed1ab_0 + - click=8.1.7=unix_pyh707e725_0 + - coin-or-cbc=2.10.10=h9002f0b_0 + - coin-or-cgl=0.60.7=h516709c_0 + - coin-or-clp=1.17.8=h1ee7a9c_0 + - coin-or-osi=0.108.10=haf5fa05_0 + - coin-or-utils=2.11.11=hee58242_0 + - coincbc=2.10.10=0_metapackage + - colorama=0.4.6=pyhd8ed1ab_0 + - configargparse=1.7=pyhd8ed1ab_0 + - connection_pool=0.0.3=pyhd3deb0d_0 + - contourpy=1.2.1=py310hd41b1e2_0 + - cryptography=42.0.7=py310hb1bd9d3_0 + - curl=8.8.0=he654da7_0 + - cycler=0.12.1=pyhd8ed1ab_0 + - datrie=0.8.2=py310h2372a71_7 + - dbus=1.13.6=h5008d03_3 + - defusedxml=0.7.1=pyhd8ed1ab_0 + - dpath=2.1.6=pyha770c72_0 + - dropbox=11.36.2=pyhd8ed1ab_0 + - eido=0.2.2=pyhd8ed1ab_0 + - exceptiongroup=1.2.0=pyhd8ed1ab_2 + - execnet=2.1.1=pyhd8ed1ab_0 + - expat=2.6.2=h59595ed_0 + - filechunkio=1.8=py_2 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=h77eed37_2 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - fonttools=4.52.4=py310hc51659f_0 + - freetype=2.12.1=h267a509_2 + - fribidi=1.0.10=h36c2ea0_0 + - frozenlist=1.4.1=py310h2372a71_0 + - ftputil=5.1.0=pyhd8ed1ab_0 + - gcc=13.2.0=hc7bed06_7 + - gcc_impl_linux-64=13.2.0=h9eb54c0_7 + - gettext=0.22.5=h59595ed_2 + - gettext-tools=0.22.5=h59595ed_2 + - gflags=2.2.2=he1b5a44_1004 + - gfortran_impl_linux-64=13.2.0=h738fc78_7 + - gitdb=4.0.11=pyhd8ed1ab_0 + - gitpython=3.1.43=pyhd8ed1ab_0 + - glib=2.80.2=hf974151_0 + - glib-tools=2.80.2=hb6ce0ca_0 + - glog=0.7.0=hed5481d_0 + - google-api-core=2.19.0=pyhd8ed1ab_0 + - google-api-python-client=2.131.0=pyhd8ed1ab_0 + - google-auth=2.29.0=pyhca7485f_0 + - google-auth-httplib2=0.2.0=pyhd8ed1ab_0 + - google-cloud-core=2.4.1=pyhd8ed1ab_0 + - google-cloud-storage=2.16.0=pyhca7485f_0 + - google-crc32c=1.1.2=py310hc5c09a0_5 + - google-resumable-media=2.7.0=pyhd8ed1ab_0 + - googleapis-common-protos=1.63.0=pyhd8ed1ab_0 + - graphite2=1.3.13=h59595ed_1003 + - grpcio=1.62.2=py310h1b8f574_0 + - gst-plugins-base=1.24.4=h9ad1361_0 + - gstreamer=1.24.4=haf2f30d_0 + - gxx=13.2.0=hc7bed06_7 + - gxx_impl_linux-64=13.2.0=h2a599c4_7 + - harfbuzz=8.5.0=hfac3d4d_0 + - htslib=1.20=h81da01d_0 + - httplib2=0.22.0=pyhd8ed1ab_0 + - humanfriendly=10.0=pyhd8ed1ab_6 + - icu=73.2=h59595ed_0 + - idna=3.7=pyhd8ed1ab_0 + - imagesize=1.4.1=pyhd8ed1ab_0 + - importlib-metadata=7.1.0=pyha770c72_0 + - importlib_resources=6.4.0=pyhd8ed1ab_0 + - iniconfig=2.0.0=pyhd8ed1ab_0 + - jinja2=3.1.4=pyhd8ed1ab_0 + - jmespath=1.0.1=pyhd8ed1ab_0 + - jsonschema=4.22.0=pyhd8ed1ab_0 + - jsonschema-specifications=2023.12.1=pyhd8ed1ab_0 + - jupyter_core=5.7.2=py310hff52083_0 + - kernel-headers_linux-64=2.6.32=he073ed8_17 + - keyutils=1.6.1=h166bdaf_0 + - kiwisolver=1.4.5=py310hd41b1e2_1 + - krb5=1.21.2=h659d440_0 + - lame=3.100=h166bdaf_1003 + - lcms2=2.16=hb7c19ff_0 + - ld_impl_linux-64=2.40=hf3520f5_1 + - lerc=4.0.0=h27087fc_0 + - libabseil=20240116.2=cxx17_h59595ed_0 + - libarrow=16.1.0=hcb6531f_6_cpu + - libarrow-acero=16.1.0=hac33072_6_cpu + - libarrow-dataset=16.1.0=hac33072_6_cpu + - libarrow-substrait=16.1.0=h7e0c224_6_cpu + - libasprintf=0.22.5=h661eb56_2 + - libasprintf-devel=0.22.5=h661eb56_2 + - libblas=3.9.0=22_linux64_openblas + - libbrotlicommon=1.1.0=hd590300_1 + - libbrotlidec=1.1.0=hd590300_1 + - libbrotlienc=1.1.0=hd590300_1 + - libcap=2.69=h0f662aa_0 + - libcblas=3.9.0=22_linux64_openblas + - libclang-cpp15=15.0.7=default_h127d8a8_5 + - libclang13=18.1.6=default_h5d6823c_0 + - libcrc32c=1.1.2=h9c3ff4c_0 + - libcups=2.3.3=h4637d8d_4 + - libcurl=8.8.0=hca28451_0 + - libdeflate=1.20=hd590300_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=hd590300_2 + - libevent=2.1.12=hf998b51_1 + - libexpat=2.6.2=h59595ed_0 + - libffi=3.4.2=h7f98852_5 + - libflac=1.4.3=h59595ed_0 + - libgcc=7.2.0=h69d50b8_2 + - libgcc-devel_linux-64=13.2.0=hceb6213_107 + - libgcc-ng=13.2.0=h77fa898_7 + - libgcrypt=1.10.3=hd590300_0 + - libgettextpo=0.22.5=h59595ed_2 + - libgettextpo-devel=0.22.5=h59595ed_2 + - libgfortran-ng=13.2.0=h69a702a_7 + - libgfortran5=13.2.0=hca663fb_7 + - libglib=2.80.2=hf974151_0 + - libgomp=13.2.0=h77fa898_7 + - libgoogle-cloud=2.24.0=h2736e30_0 + - libgoogle-cloud-storage=2.24.0=h3d9a0c8_0 + - libgpg-error=1.49=h4f305b6_0 + - libgrpc=1.62.2=h15f2491_0 + - libiconv=1.17=hd590300_2 + - libjpeg-turbo=3.0.0=hd590300_1 + - liblapack=3.9.0=22_linux64_openblas + - liblapacke=3.9.0=22_linux64_openblas + - libllvm15=15.0.7=hb3ce162_4 + - libllvm18=18.1.6=hb77312f_0 + - libnghttp2=1.58.0=h47da74e_1 + - libnsl=2.0.1=hd590300_0 + - libogg=1.3.4=h7f98852_1 + - libopenblas=0.3.27=pthreads_h413a1c8_0 + - libopus=1.3.1=h7f98852_1 + - libparquet=16.1.0=h6a7eafb_6_cpu + - libpng=1.6.43=h2797004_0 + - libpq=16.3=ha72fbe1_0 + - libprotobuf=4.25.3=h08a7969_0 + - libre2-11=2023.09.01=h5a48ba9_2 + - libsanitizer=13.2.0=h6ddb7a1_7 + - libsndfile=1.2.2=hc60ed4a_1 + - libsodium=1.0.18=h36c2ea0_1 + - libsqlite=3.45.3=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.2.0=hceb6213_107 + - libstdcxx-ng=13.2.0=hc0a3c3a_7 + - libsystemd0=255=h3516f8a_1 + - libthrift=0.19.0=hb90f79a_1 + - libtiff=4.6.0=h1dd3fc0_3 + - libutf8proc=2.8.0=h166bdaf_0 + - libuuid=2.38.1=h0b41bf4_0 + - libvorbis=1.3.7=h9c3ff4c_0 + - libwebp-base=1.4.0=hd590300_0 + - libxcb=1.15=h0b41bf4_0 + - libxcrypt=4.4.36=hd590300_1 + - libxkbcommon=1.7.0=h662e7e4_0 + - libxml2=2.12.7=hc051c1a_0 + - libzlib=1.2.13=h4ab18f5_6 + - logmuse=0.2.6=pyh8c360ce_0 + - lz4-c=1.9.4=hcb278e6_0 + - macs2=2.2.9.1=py310h4b81fae_0 + - make=4.3=hd18ef5c_1 + - markdown-it-py=3.0.0=pyhd8ed1ab_0 + - markupsafe=2.1.5=py310h2372a71_0 + - matplotlib=3.8.4=py310hff52083_2 + - matplotlib-base=3.8.4=py310hef631a5_2 + - mdurl=0.1.2=pyhd8ed1ab_0 + - mpg123=1.32.6=h59595ed_0 + - multidict=6.0.5=py310h2372a71_0 + - munkres=1.0.7=py_1 + - mypy_extensions=1.0.0=pyha770c72_0 + - mysql-common=8.3.0=hf1915f5_4 + - mysql-libs=8.3.0=hca2cd23_4 + - natsort=8.4.0=pyhd8ed1ab_0 + - nbformat=5.10.4=pyhd8ed1ab_0 + - ncls=0.0.68=py310h4b81fae_2 + - ncurses=6.5=h59595ed_0 + - nspr=4.35=h27087fc_0 + - nss=3.100=hca3bf56_0 + - numpy=1.26.4=py310hb13e2d6_0 + - oauth2client=4.1.3=py_0 + - openjpeg=2.5.2=h488ebb8_0 + - openssl=3.3.0=h4ab18f5_3 + - orc=2.0.1=h17fec99_1 + - packaging=24.0=pyhd8ed1ab_0 + - pandas=2.2.2=py310hf9f9076_1 + - pango=1.52.2=ha41ecd1_0 + - paramiko=3.4.0=pyhd8ed1ab_0 + - pathspec=0.12.1=pyhd8ed1ab_0 + - patsy=0.5.6=pyhd8ed1ab_0 + - pcre2=10.43=hcad00b1_0 + - peppy=0.40.2=pyhd8ed1ab_0 + - pillow=10.3.0=py310hf73ecf8_0 + - pip=24.0=pyhd8ed1ab_0 + - pixman=0.43.2=h59595ed_0 + - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1 + - plac=1.4.3=pyhd8ed1ab_0 + - platformdirs=4.2.2=pyhd8ed1ab_0 + - pluggy=1.5.0=pyhd8ed1ab_0 + - ply=3.11=pyhd8ed1ab_2 + - prettytable=3.10.0=pyhd8ed1ab_0 + - proto-plus=1.23.0=pyhd8ed1ab_0 + - protobuf=4.25.3=py310ha8c1f0e_0 + - psutil=5.9.8=py310h2372a71_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - pulp=2.7.0=py310hff52083_1 + - pulseaudio-client=17.0=hb77b528_0 + - pyarrow=16.1.0=py310h17c5347_1 + - pyarrow-core=16.1.0=py310h6f79a3a_1_cpu + - pyasn1=0.6.0=pyhd8ed1ab_0 + - pyasn1-modules=0.4.0=pyhd8ed1ab_0 + - pybigwig=0.3.22=py310h79000e5_2 + - pycparser=2.22=pyhd8ed1ab_0 + - pygments=2.18.0=pyhd8ed1ab_0 + - pynacl=1.5.0=py310h2372a71_3 + - pyopenssl=24.0.0=pyhd8ed1ab_0 + - pyparsing=3.1.2=pyhd8ed1ab_0 + - pyqt=5.15.9=py310h04931ad_5 + - pyqt5-sip=12.12.2=py310hc6cd4ac_5 + - pyranges=0.0.129=pyh7cba7a3_0 + - pyrle=0.0.40=py310h4b81fae_0 + - pysam=0.22.1=py310h41dec4a_0 + - pysftp=0.2.9=py_1 + - pysocks=1.7.1=pyha2e5f31_6 + - pytest=8.2.1=pyhd8ed1ab_0 + - pytest-xdist=3.5.0=pyhd8ed1ab_0 + - python=3.10.14=hd12c33a_0_cpython + - python-dateutil=2.9.0=pyhd8ed1ab_0 + - python-fastjsonschema=2.19.1=pyhd8ed1ab_0 + - python-irodsclient=2.0.1=pyhd8ed1ab_0 + - python-tzdata=2024.1=pyhd8ed1ab_0 + - python_abi=3.10=4_cp310 + - pytz=2024.1=pyhd8ed1ab_0 + - pyu2f=0.1.5=pyhd8ed1ab_0 + - pyyaml=6.0.1=py310h2372a71_1 + - qt-main=5.15.8=hc9dc06e_21 + - r-base=4.3.3=hf0d99cb_1 + - re2=2023.09.01=h7f4b329_2 + - readline=8.2=h8228510_1 + - referencing=0.35.1=pyhd8ed1ab_0 + - requests=2.32.2=pyhd8ed1ab_0 + - reretry=0.11.8=pyhd8ed1ab_0 + - rich=13.7.1=pyhd8ed1ab_0 + - rpds-py=0.18.1=py310he421c4c_0 + - rsa=4.9=pyhd8ed1ab_0 + - s2n=1.4.15=he19d79f_0 + - s3transfer=0.10.1=pyhd8ed1ab_0 + - samtools=1.20=h50ea8bc_0 + - scipy=1.13.1=py310h93e2701_0 + - seaborn=0.13.2=hd8ed1ab_2 + - seaborn-base=0.13.2=pyhd8ed1ab_2 + - sed=4.8=he412f7d_0 + - setuptools=70.0.0=pyhd8ed1ab_0 + - setuptools-scm=8.1.0=pyhd8ed1ab_0 + - sip=6.7.12=py310hc6cd4ac_0 + - six=1.16.0=pyh6c4a22f_0 + - slacker=0.14.0=py_0 + - smart_open=6.4.0=pyhd8ed1ab_0 + - smmap=5.0.0=pyhd8ed1ab_0 + - snakemake=7.32.4=hdfd78af_1 + - snakemake-minimal=7.32.4=pyhdfd78af_1 + - snappy=1.2.0=hdb0a2a9_1 + - snowballstemmer=2.2.0=pyhd8ed1ab_0 + - sorted_nearest=0.0.39=py310h4b81fae_1 + - sphinx=7.3.7=pyhd8ed1ab_0 + - sphinxcontrib-applehelp=1.0.8=pyhd8ed1ab_0 + - sphinxcontrib-devhelp=1.0.6=pyhd8ed1ab_0 + - sphinxcontrib-htmlhelp=2.0.5=pyhd8ed1ab_0 + - sphinxcontrib-jsmath=1.0.1=pyhd8ed1ab_0 + - sphinxcontrib-qthelp=1.0.7=pyhd8ed1ab_0 + - sphinxcontrib-serializinghtml=1.1.10=pyhd8ed1ab_0 + - statsmodels=0.14.2=py310h261611a_0 + - stone=3.3.6=pyhd8ed1ab_0 + - stopit=1.1.2=py_0 + - sysroot_linux-64=2.12=he073ed8_17 + - tabix=1.11=hdfd78af_0 + - tabulate=0.9.0=pyhd8ed1ab_1 + - throttler=1.2.2=pyhd8ed1ab_0 + - tk=8.6.13=noxft_h4845f30_101 + - tktable=2.10=h0c5db8f_5 + - toml=0.10.2=pyhd8ed1ab_0 + - tomli=2.0.1=pyhd8ed1ab_0 + - toposort=1.10=pyhd8ed1ab_0 + - tornado=6.4=py310h2372a71_0 + - traitlets=5.14.3=pyhd8ed1ab_0 + - typing-extensions=4.11.0=hd8ed1ab_0 + - typing_extensions=4.11.0=pyha770c72_0 + - tzdata=2024a=h0c530f3_0 + - ubiquerg=0.7.0=pyhd8ed1ab_0 + - unicodedata2=15.1.0=py310h2372a71_0 + - uritemplate=4.1.1=pyhd8ed1ab_0 + - urllib3=2.2.1=pyhd8ed1ab_0 + - veracitools=0.1.3=py_0 + - wcwidth=0.2.13=pyhd8ed1ab_0 + - wheel=0.43.0=pyhd8ed1ab_1 + - wrapt=1.16.0=py310h2372a71_0 + - xcb-util=0.4.0=hd590300_1 + - xcb-util-image=0.4.0=h8ee46fc_1 + - xcb-util-keysyms=0.4.0=h8ee46fc_1 + - xcb-util-renderutil=0.3.9=hd590300_1 + - xcb-util-wm=0.4.1=h8ee46fc_1 + - xkeyboard-config=2.41=hd590300_0 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.9=h8ee46fc_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - yaml=0.2.5=h7f98852_2 + - yarl=1.9.4=py310h2372a71_0 + - yte=1.5.4=pyha770c72_0 + - zipp=3.17.0=pyhd8ed1ab_0 + - zlib=1.2.13=h4ab18f5_6 + - zstd=1.5.6=ha6fb4c9_0 - pip: - - sphinx_rtd_theme - - hic-straw \ No newline at end of file + - docutils==0.20.1 + - hic-straw==1.3.1 + - pybind11==2.12.0 + - sphinx-rtd-theme==2.0.0 + - sphinxcontrib-jquery==4.1 diff --git a/workflow/envs/abcenv_dev.yml b/workflow/envs/abcenv_dev.yml new file mode 100644 index 00000000..13d93752 --- /dev/null +++ b/workflow/envs/abcenv_dev.yml @@ -0,0 +1,42 @@ +name: abc-env +# note: this environment does not support pipeline due to package conflicts +# for use to make major changes to the pipeline environment +channels: + - bioconda + - conda-forge + - defaults + - anaconda +dependencies: + - bedtools=2.26.0 + - black + - click + - cryptography<42 + - macs2 + - matplotlib + - numpy + - pandas + - pybigwig + - pulp<2.8 # Pin pulp <2.8 for snakemake: https://github.com/snakemake/snakemake/issues/2607 + - pip + - pyarrow + - pyranges + - pysam + - pytest-xdist + - python>=3.6 + - samtools>=1.9 # to avoid open ssl issue: https://github.com/merenlab/anvio/issues/1479 + - scipy + - seaborn + # smart_open version >=7 leads to errors with Snakemake https://pastebin.com/uBmBwdnK + - smart_open<7 + - snakemake>=7,<8 + - sphinx + - tabix + - yaml + # The following 3 are needed to compile hic-straw in HPC + # You may need to remove them if working on macosx + - gcc + - gxx + - zlib + - pip: + - sphinx_rtd_theme + - hic-straw diff --git a/workflow/envs/release.yml b/workflow/envs/release.yml deleted file mode 100644 index 959a66c2..00000000 --- a/workflow/envs/release.yml +++ /dev/null @@ -1,409 +0,0 @@ -name: abc-env -channels: - - bioconda - - conda-forge - - defaults - - anaconda -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - _r-mutex=1.0.1=anacondar_1 - - aioeasywebdav=2.4.0=pyha770c72_0 - - aiohttp=3.9.3=py310h2372a71_0 - - aiosignal=1.3.1=pyhd8ed1ab_0 - - alabaster=0.7.16=pyhd8ed1ab_0 - - alsa-lib=1.2.11=hd590300_1 - - amply=0.1.6=pyhd8ed1ab_0 - - appdirs=1.4.4=pyh9f0ad1d_0 - - archspec=0.2.3=pyhd8ed1ab_0 - - async-timeout=4.0.3=pyhd8ed1ab_0 - - attmap=0.13.2=pyhd8ed1ab_0 - - attr=2.5.1=h166bdaf_1 - - attrs=23.2.0=pyh71513ae_0 - - aws-c-auth=0.7.16=h79b3bcb_6 - - aws-c-cal=0.6.10=hb29e0c7_1 - - aws-c-common=0.9.13=hd590300_0 - - aws-c-compression=0.2.18=hecc5fa9_1 - - aws-c-event-stream=0.4.2=hf9b2f7b_4 - - aws-c-http=0.8.1=h5d7533a_5 - - aws-c-io=0.14.5=h50678d4_1 - - aws-c-mqtt=0.10.2=hf479d2b_4 - - aws-c-s3=0.5.2=h4ad9680_0 - - aws-c-sdkutils=0.1.15=hecc5fa9_1 - - aws-checksums=0.1.18=hecc5fa9_1 - - aws-crt-cpp=0.26.2=h19f5d62_7 - - aws-sdk-cpp=1.11.267=h5606698_1 - - babel=2.14.0=pyhd8ed1ab_0 - - bcrypt=4.1.2=py310hcb5633a_0 - - bedtools=2.26.0=0 - - binutils_impl_linux-64=2.40=hf600244_0 - - black=24.2.0=py310hff52083_0 - - boltons=23.1.1=pyhd8ed1ab_0 - - boto3=1.34.55=pyhd8ed1ab_0 - - botocore=1.34.56=pyge310_1234567_0 - - brotli=1.1.0=hd590300_1 - - brotli-bin=1.1.0=hd590300_1 - - brotli-python=1.1.0=py310hc6cd4ac_1 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=hd590300_5 - - c-ares=1.27.0=hd590300_0 - - ca-certificates=2024.2.2=hbcca054_0 - - cachetools=5.3.3=pyhd8ed1ab_0 - - cairo=1.18.0=h3faef2a_0 - - certifi=2024.2.2=pyhd8ed1ab_0 - - cffi=1.16.0=py310h2fee648_0 - - charset-normalizer=3.3.2=pyhd8ed1ab_0 - - click=8.1.7=unix_pyh707e725_0 - - coin-or-cbc=2.10.10=h9002f0b_0 - - coin-or-cgl=0.60.7=h516709c_0 - - coin-or-clp=1.17.8=h1ee7a9c_0 - - coin-or-osi=0.108.8=ha2443b9_0 - - coin-or-utils=2.11.9=hee58242_0 - - coincbc=2.10.10=0_metapackage - - colorama=0.4.6=pyhd8ed1ab_0 - - conda=24.1.2=py310hff52083_0 - - conda-libmamba-solver=24.1.0=pyhd8ed1ab_0 - - conda-package-handling=2.2.0=pyh38be061_0 - - conda-package-streaming=0.9.0=pyhd8ed1ab_0 - - configargparse=1.7=pyhd8ed1ab_0 - - connection_pool=0.0.3=pyhd3deb0d_0 - - contourpy=1.2.0=py310hd41b1e2_0 - - cryptography=42.0.5=py310h75e40e8_0 - - curl=8.5.0=hca28451_0 - - cycler=0.12.1=pyhd8ed1ab_0 - - datrie=0.8.2=py310h2372a71_7 - - dbus=1.13.6=h5008d03_3 - - defusedxml=0.7.1=pyhd8ed1ab_0 - - distro=1.9.0=pyhd8ed1ab_0 - - docutils=0.20.1=py310hff52083_3 - - dpath=2.1.6=pyha770c72_0 - - dropbox=11.36.2=pyhd8ed1ab_0 - - eido=0.2.2=pyhd8ed1ab_0 - - exceptiongroup=1.2.0=pyhd8ed1ab_2 - - execnet=2.0.2=pyhd8ed1ab_0 - - expat=2.5.0=hcb278e6_1 - - filechunkio=1.8=py_2 - - fmt=10.2.1=h00ab1b0_0 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=h77eed37_1 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - fonttools=4.49.0=py310h2372a71_0 - - freetype=2.12.1=h267a509_2 - - fribidi=1.0.10=h36c2ea0_0 - - frozenlist=1.4.1=py310h2372a71_0 - - ftputil=5.1.0=pyhd8ed1ab_0 - - gcc=13.2.0=h574f8da_2 - - gcc_impl_linux-64=13.2.0=h338b0a0_5 - - gettext=0.21.1=h27087fc_0 - - gflags=2.2.2=he1b5a44_1004 - - gfortran_impl_linux-64=13.2.0=h76e1118_5 - - gitdb=4.0.11=pyhd8ed1ab_0 - - gitpython=3.1.42=pyhd8ed1ab_0 - - glib=2.78.1=hfc55251_0 - - glib-tools=2.78.1=hfc55251_0 - - glog=0.7.0=hed5481d_0 - - google-api-core=2.17.1=pyhd8ed1ab_0 - - google-api-python-client=2.121.0=pyhd8ed1ab_0 - - google-auth=2.28.1=pyhca7485f_0 - - google-auth-httplib2=0.2.0=pyhd8ed1ab_0 - - google-cloud-core=2.4.1=pyhd8ed1ab_0 - - google-cloud-storage=2.14.0=pyhca7485f_0 - - google-crc32c=1.1.2=py310hc5c09a0_5 - - google-resumable-media=2.7.0=pyhd8ed1ab_0 - - googleapis-common-protos=1.62.0=pyhd8ed1ab_0 - - graphite2=1.3.13=h58526e2_1001 - - grpcio=1.61.1=py310h1b8f574_1 - - gst-plugins-base=1.22.7=h8e1006c_0 - - gstreamer=1.22.7=h98fc4e7_0 - - gxx=13.2.0=h574f8da_2 - - gxx_impl_linux-64=13.2.0=h338b0a0_5 - - harfbuzz=8.3.0=h3d44ed6_0 - - htslib=1.19.1=h81da01d_2 - - httplib2=0.22.0=pyhd8ed1ab_0 - - humanfriendly=10.0=pyhd8ed1ab_6 - - icu=73.2=h59595ed_0 - - idna=3.6=pyhd8ed1ab_0 - - imagesize=1.4.1=pyhd8ed1ab_0 - - importlib-metadata=7.0.1=pyha770c72_0 - - importlib_resources=6.1.2=pyhd8ed1ab_0 - - iniconfig=2.0.0=pyhd8ed1ab_0 - - jinja2=3.1.3=pyhd8ed1ab_0 - - jmespath=1.0.1=pyhd8ed1ab_0 - - jsonpatch=1.33=pyhd8ed1ab_0 - - jsonpointer=2.4=py310hff52083_3 - - jsonschema=4.21.1=pyhd8ed1ab_0 - - jsonschema-specifications=2023.12.1=pyhd8ed1ab_0 - - jupyter_core=5.7.1=py310hff52083_0 - - kernel-headers_linux-64=2.6.32=he073ed8_17 - - keyutils=1.6.1=h166bdaf_0 - - kiwisolver=1.4.5=py310hd41b1e2_1 - - krb5=1.21.2=h659d440_0 - - lame=3.100=h166bdaf_1003 - - lcms2=2.15=h7f713cb_2 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libabseil=20240116.1=cxx17_h59595ed_2 - - libarchive=3.7.2=h2aa1ff5_1 - - libarrow=15.0.0=h5001e6d_7_cpu - - libarrow-acero=15.0.0=h59595ed_7_cpu - - libarrow-dataset=15.0.0=h59595ed_7_cpu - - libarrow-flight=15.0.0=hf334d8d_7_cpu - - libarrow-flight-sql=15.0.0=h469e5c9_7_cpu - - libarrow-gandiva=15.0.0=h3f306ff_7_cpu - - libarrow-substrait=15.0.0=h469e5c9_7_cpu - - libblas=3.9.0=21_linux64_openblas - - libbrotlicommon=1.1.0=hd590300_1 - - libbrotlidec=1.1.0=hd590300_1 - - libbrotlienc=1.1.0=hd590300_1 - - libcap=2.69=h0f662aa_0 - - libcblas=3.9.0=21_linux64_openblas - - libclang=15.0.7=default_hb11cfb5_4 - - libclang13=15.0.7=default_ha2b6cf4_4 - - libcrc32c=1.1.2=h9c3ff4c_0 - - libcups=2.3.3=h4637d8d_4 - - libcurl=8.5.0=hca28451_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=hd590300_2 - - libevent=2.1.12=hf998b51_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libflac=1.4.3=h59595ed_0 - - libgcc=7.2.0=h69d50b8_2 - - libgcc-devel_linux-64=13.2.0=ha9c7c90_105 - - libgcc-ng=13.2.0=h807b86a_5 - - libgcrypt=1.10.3=hd590300_0 - - libgfortran-ng=13.2.0=h69a702a_5 - - libgfortran5=13.2.0=ha4646dd_5 - - libglib=2.78.1=hebfc3b9_0 - - libgomp=13.2.0=h807b86a_5 - - libgoogle-cloud=2.21.0=h72bcb37_2 - - libgoogle-cloud-storage=2.21.0=hc7a4891_2 - - libgpg-error=1.48=h71f35ed_0 - - libgrpc=1.61.1=h42401df_1 - - libiconv=1.17=hd590300_2 - - libjpeg-turbo=2.1.5.1=hd590300_1 - - liblapack=3.9.0=21_linux64_openblas - - liblapacke=3.9.0=21_linux64_openblas - - libllvm15=15.0.7=hb3ce162_4 - - libllvm16=16.0.6=hb3ce162_3 - - libmamba=1.5.7=had39da4_0 - - libmambapy=1.5.7=py310h39ff949_0 - - libnghttp2=1.58.0=h47da74e_1 - - libnl=3.9.0=hd590300_0 - - libnsl=2.0.1=hd590300_0 - - libnuma=2.0.16=h0b41bf4_1 - - libogg=1.3.4=h7f98852_1 - - libopenblas=0.3.26=pthreads_h413a1c8_0 - - libopus=1.3.1=h7f98852_1 - - libparquet=15.0.0=h352af49_7_cpu - - libpng=1.6.43=h2797004_0 - - libpq=15.6=h088ca5b_0 - - libprotobuf=4.25.2=h08a7969_1 - - libre2-11=2023.09.01=h5a48ba9_2 - - libsanitizer=13.2.0=h7e041cc_5 - - libsndfile=1.2.2=hc60ed4a_1 - - libsodium=1.0.18=h36c2ea0_1 - - libsolv=0.7.28=hfc55251_0 - - libsqlite=3.45.1=h2797004_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_105 - - libstdcxx-ng=13.2.0=h7e041cc_5 - - libsystemd0=255=h3516f8a_1 - - libthrift=0.19.0=hb90f79a_1 - - libtiff=4.6.0=h8b53f26_0 - - libutf8proc=2.8.0=h166bdaf_0 - - libuuid=2.38.1=h0b41bf4_0 - - libvorbis=1.3.7=h9c3ff4c_0 - - libwebp-base=1.3.2=hd590300_0 - - libxcb=1.15=h0b41bf4_0 - - libxcrypt=4.4.36=hd590300_1 - - libxkbcommon=1.6.0=hd429924_1 - - libxml2=2.12.5=h232c23b_0 - - libzlib=1.2.13=hd590300_5 - - logmuse=0.2.6=pyh8c360ce_0 - - lz4-c=1.9.4=hcb278e6_0 - - lzo=2.10=h516909a_1000 - - macs2=2.2.9.1=py310h4b81fae_0 - - make=4.3=hd18ef5c_1 - - markdown-it-py=3.0.0=pyhd8ed1ab_0 - - markupsafe=2.1.5=py310h2372a71_0 - - matplotlib=3.8.3=py310hff52083_0 - - matplotlib-base=3.8.3=py310h62c0568_0 - - mdurl=0.1.2=pyhd8ed1ab_0 - - menuinst=2.0.2=py310hff52083_0 - - mpg123=1.32.4=h59595ed_0 - - multidict=6.0.5=py310h2372a71_0 - - munkres=1.0.7=py_1 - - mypy_extensions=1.0.0=pyha770c72_0 - - mysql-common=8.0.33=hf1915f5_6 - - mysql-libs=8.0.33=hca2cd23_6 - - natsort=8.4.0=pyhd8ed1ab_0 - - nbformat=5.9.2=pyhd8ed1ab_0 - - ncls=0.0.68=py310h4b81fae_1 - - ncurses=6.4=h59595ed_2 - - nspr=4.35=h27087fc_0 - - nss=3.98=h1d7d5a4_0 - - numpy=1.26.4=py310hb13e2d6_0 - - oauth2client=4.1.3=py_0 - - openjpeg=2.5.2=h488ebb8_0 - - openssl=3.2.1=hd590300_0 - - orc=1.9.2=h00e871a_2 - - packaging=23.2=pyhd8ed1ab_0 - - pandas=2.2.1=py310hcc13569_0 - - pango=1.50.14=ha41ecd1_2 - - paramiko=3.4.0=pyhd8ed1ab_0 - - pathspec=0.12.1=pyhd8ed1ab_0 - - patsy=0.5.6=pyhd8ed1ab_0 - - pcre2=10.40=hc3806b6_0 - - peppy=0.40.1=pyhd8ed1ab_0 - - pillow=10.0.1=py310h29da1c1_1 - - pip=24.0=pyhd8ed1ab_0 - - pixman=0.43.2=h59595ed_0 - - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1 - - plac=1.4.3=pyhd8ed1ab_0 - - platformdirs=4.2.0=pyhd8ed1ab_0 - - pluggy=1.4.0=pyhd8ed1ab_0 - - ply=3.11=py_1 - - prettytable=3.10.0=pyhd8ed1ab_0 - - protobuf=4.25.2=py310ha8c1f0e_0 - - psutil=5.9.8=py310h2372a71_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pulp=2.7.0=py310hff52083_1 - - pulseaudio-client=16.1=hb77b528_5 - - pyarrow=15.0.0=py310hf9e7431_7_cpu - - pyasn1=0.5.1=pyhd8ed1ab_0 - - pyasn1-modules=0.3.0=pyhd8ed1ab_0 - - pybigwig=0.3.22=py310h79000e5_1 - - pybind11-abi=4=hd8ed1ab_3 - - pycosat=0.6.6=py310h2372a71_0 - - pycparser=2.21=pyhd8ed1ab_0 - - pygments=2.17.2=pyhd8ed1ab_0 - - pynacl=1.5.0=py310h2372a71_3 - - pyopenssl=24.0.0=pyhd8ed1ab_0 - - pyparsing=3.1.1=pyhd8ed1ab_0 - - pyqt=5.15.9=py310h04931ad_5 - - pyqt5-sip=12.12.2=py310hc6cd4ac_5 - - pyranges=0.0.129=pyh7cba7a3_0 - - pyrle=0.0.39=py310h4b81fae_0 - - pysam=0.22.0=py310h41dec4a_0 - - pysftp=0.2.9=py_1 - - pysocks=1.7.1=pyha2e5f31_6 - - pytest=8.0.2=pyhd8ed1ab_0 - - pytest-xdist=3.5.0=pyhd8ed1ab_0 - - python=3.10.13=hd12c33a_1_cpython - - python-dateutil=2.9.0=pyhd8ed1ab_0 - - python-fastjsonschema=2.19.1=pyhd8ed1ab_0 - - python-irodsclient=2.0.0=pyhd8ed1ab_0 - - python-tzdata=2024.1=pyhd8ed1ab_0 - - python_abi=3.10=4_cp310 - - pytz=2024.1=pyhd8ed1ab_0 - - pyu2f=0.1.5=pyhd8ed1ab_0 - - pyyaml=6.0.1=py310h2372a71_1 - - qt-main=5.15.8=hc47bfe8_16 - - r-base=4.3.1=h639d9d3_5 - - rdma-core=50.0=hd3aeb46_0 - - re2=2023.09.01=h7f4b329_2 - - readline=8.2=h8228510_1 - - referencing=0.33.0=pyhd8ed1ab_0 - - reproc=14.2.4.post0=hd590300_1 - - reproc-cpp=14.2.4.post0=h59595ed_1 - - requests=2.31.0=pyhd8ed1ab_0 - - reretry=0.11.8=pyhd8ed1ab_0 - - rich=13.7.1=pyhd8ed1ab_0 - - rpds-py=0.18.0=py310hcb5633a_0 - - rsa=4.9=pyhd8ed1ab_0 - - ruamel.yaml=0.18.6=py310h2372a71_0 - - ruamel.yaml.clib=0.2.8=py310h2372a71_0 - - s2n=1.4.5=h06160fa_0 - - s3transfer=0.10.0=pyhd8ed1ab_0 - - samtools=1.19.2=h50ea8bc_1 - - scipy=1.12.0=py310hb13e2d6_2 - - seaborn=0.13.2=hd8ed1ab_0 - - seaborn-base=0.13.2=pyhd8ed1ab_0 - - sed=4.8=he412f7d_0 - - setuptools=69.1.1=pyhd8ed1ab_0 - - setuptools-scm=8.0.4=pyhd8ed1ab_0 - - sip=6.7.12=py310hc6cd4ac_0 - - six=1.16.0=pyh6c4a22f_0 - - slacker=0.14.0=py_0 - - smart_open=6.4.0=pyhd8ed1ab_0 - - smmap=5.0.0=pyhd8ed1ab_0 - - snakemake=7.32.4=hdfd78af_1 - - snakemake-minimal=7.32.4=pyhdfd78af_1 - - snappy=1.1.10=h9fff704_0 - - snowballstemmer=2.2.0=pyhd8ed1ab_0 - - sorted_nearest=0.0.39=py310h4b81fae_0 - - sphinx=7.2.6=pyhd8ed1ab_0 - - sphinxcontrib-applehelp=1.0.8=pyhd8ed1ab_0 - - sphinxcontrib-devhelp=1.0.6=pyhd8ed1ab_0 - - sphinxcontrib-htmlhelp=2.0.5=pyhd8ed1ab_0 - - sphinxcontrib-jsmath=1.0.1=pyhd8ed1ab_0 - - sphinxcontrib-qthelp=1.0.7=pyhd8ed1ab_0 - - sphinxcontrib-serializinghtml=1.1.10=pyhd8ed1ab_0 - - statsmodels=0.14.1=py310h1f7b6fc_0 - - stone=3.3.1=pyhd8ed1ab_0 - - stopit=1.1.2=py_0 - - sysroot_linux-64=2.12=he073ed8_17 - - tabix=1.11=hdfd78af_0 - - tabulate=0.9.0=pyhd8ed1ab_1 - - throttler=1.2.2=pyhd8ed1ab_0 - - tk=8.6.13=noxft_h4845f30_101 - - tktable=2.10=h0c5db8f_5 - - toml=0.10.2=pyhd8ed1ab_0 - - tomli=2.0.1=pyhd8ed1ab_0 - - toposort=1.10=pyhd8ed1ab_0 - - tornado=6.4=py310h2372a71_0 - - tqdm=4.66.2=pyhd8ed1ab_0 - - traitlets=5.14.1=pyhd8ed1ab_0 - - truststore=0.8.0=pyhd8ed1ab_0 - - typing-extensions=4.10.0=hd8ed1ab_0 - - typing_extensions=4.10.0=pyha770c72_0 - - tzdata=2024a=h0c530f3_0 - - ubiquerg=0.7.0=pyhd8ed1ab_0 - - ucx=1.15.0=h75e419f_3 - - unicodedata2=15.1.0=py310h2372a71_0 - - uritemplate=4.1.1=pyhd8ed1ab_0 - - urllib3=2.0.7=pyhd8ed1ab_0 - - veracitools=0.1.3=py_0 - - wcwidth=0.2.13=pyhd8ed1ab_0 - - wheel=0.42.0=pyhd8ed1ab_0 - - wrapt=1.16.0=py310h2372a71_0 - - xcb-util=0.4.0=hd590300_1 - - xcb-util-image=0.4.0=h8ee46fc_1 - - xcb-util-keysyms=0.4.0=h8ee46fc_1 - - xcb-util-renderutil=0.3.9=hd590300_1 - - xcb-util-wm=0.4.1=h8ee46fc_1 - - xkeyboard-config=2.41=hd590300_0 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.7=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxrender=0.9.11=hd590300_0 - - xorg-libxt=1.3.0=hd590300_1 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - yaml-cpp=0.8.0=h59595ed_0 - - yarl=1.9.4=py310h2372a71_0 - - yte=1.5.4=pyha770c72_0 - - zipp=3.17.0=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstandard=0.22.0=py310h1275a96_0 - - zstd=1.5.5=hfc55251_0 - - pip: - - hic-straw==1.3.1 - - pybind11==2.11.1 - - sphinx-rtd-theme==2.0.0 - - sphinxcontrib-jquery==4.1 -prefix: /oak/stanford/groups/engreitz/Users/atan5133/.conda/envs/abc-env diff --git a/workflow/rules/predictions.smk b/workflow/rules/predictions.smk index 3829c3e3..5a68b499 100644 --- a/workflow/rules/predictions.smk +++ b/workflow/rules/predictions.smk @@ -62,7 +62,7 @@ rule filter_predictions: "../envs/abcenv.yml" output: enhPredictionsFull = os.path.join(RESULTS_DIR, "{biosample}", "Predictions", f"EnhancerPredictionsFull_{FILTERED_PREDICTION_FILE_FORMAT_TEMPLATE}.tsv"), - enhPredictionsFullBedpe = os.path.join(RESULTS_DIR, "{biosample}", "Predictions", f"EnhancerPredictionsFull_{FILTERED_PREDICTION_FILE_FORMAT_TEMPLATE}.bedpe"), + enhPredictionsFullBedpe = os.path.join(RESULTS_DIR, "{biosample}", "Predictions", f"EnhancerPredictionsFull_{FILTERED_PREDICTION_FILE_FORMAT_TEMPLATE}.bedpe.gz"), enhPredictionsSlim = os.path.join(RESULTS_DIR, "{biosample}", "Predictions", f"EnhancerPredictions_{FILTERED_PREDICTION_FILE_FORMAT_TEMPLATE}.tsv"), genePredictionsStats = os.path.join(RESULTS_DIR, "{biosample}", "Predictions", f"GenePredictionStats_{FILTERED_PREDICTION_FILE_FORMAT_TEMPLATE}.tsv") resources: diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk index f24022de..c3c2f34b 100644 --- a/workflow/rules/utils.smk +++ b/workflow/rules/utils.smk @@ -80,7 +80,7 @@ def determine_filtered_prediction_file_format(threshold, config): def load_biosamples_config(config): biosamples_config = pd.read_csv( config["biosamplesTable"], sep="\t", na_values="" - ).replace([np.NaN], [None]).set_index("biosample", drop=False) + ).replace([np.nan], [None]).set_index("biosample", drop=False) biosamples_config["HiC_resolution"] = biosamples_config["HiC_resolution"].replace([None], [0]).astype(int) _validate_biosamples_config(biosamples_config) _configure_tss_and_gene_files(biosamples_config) @@ -106,6 +106,8 @@ def _validate_hic_info(row: pd.Series): if row["HiC_file"]: if not (row["HiC_type"] and row["HiC_resolution"]): raise InvalidConfig("Must provide HiC type and resolution with file") + if row["HiC_resolution"] != 5000: + raise InvalidConfig("Only 5kb resolution supported at the moment") def _validate_biosamples_config(biosamples_config): """ diff --git a/workflow/scripts/predict.py b/workflow/scripts/predict.py index 05d5e894..6d5f9aef 100644 --- a/workflow/scripts/predict.py +++ b/workflow/scripts/predict.py @@ -55,7 +55,7 @@ class formatter( parser.add_argument( "--hic_pseudocount_distance", type=int, - default=1e6, + required=True, help="A pseudocount is added equal to the powerlaw fit at this distance", ) parser.add_argument( diff --git a/workflow/scripts/predictor.py b/workflow/scripts/predictor.py index 35bdda03..f5d21f55 100644 --- a/workflow/scripts/predictor.py +++ b/workflow/scripts/predictor.py @@ -103,6 +103,33 @@ def make_pred_table(chromosome, enh, genes, window, chrom_sizes_map: Dict[str, i return pred +def fill_diagonals(df, hic_resolution): + """ + Fill diagonals based on the neighbors + We want to search neighboring bins + If hic_resolution is < 5kb, make sure we use a search space of 5kb for our + neighbors + """ + diagonal_bins = df[ + df.index.get_level_values("binX") == df.index.get_level_values("binY") + ] + search_space_bins = 1 + if hic_resolution < 5000: + search_space_bins = math.ceil(5000 / hic_resolution) + for (binX, binX), _ in diagonal_bins.iterrows(): + max_contact = 0 + for i in range(1, search_space_bins + 1): + left_bin = (binX - i, binX) + right_bin = ( + binX, + binX + i, + ) # we have to look above b/c we haven't processed right bin yet + for bin in [left_bin, right_bin]: + if bin in df.index: + max_contact = max(max_contact, df.loc[bin, "counts"]) + df.loc[(binX, binX), "counts"] = max_contact + + def create_df_from_records(records, hic_resolution): """ The bins returned from hic straw are in hic_resolution increments @@ -113,33 +140,13 @@ def create_df_from_records(records, hic_resolution): 100k -> bin 20 Where the left number is the genomic position of the bin - We also record the neighboring bins of the diagonals, as we - want to replace the value with the neighbors later + We also handle filling the diagonals differently """ df = pd.DataFrame(records, columns=["binX", "binY", "counts"]) df["binX"] = np.floor(df["binX"] / hic_resolution).astype(int) df["binY"] = np.floor(df["binY"] / hic_resolution).astype(int) df = df.set_index(["binX", "binY"]) # Set indexes for performance - diagonal_bins = df[ - df.index.get_level_values("binX") == df.index.get_level_values("binY") - ] - # Provide max neighbor counts - # We'll divide by the normalizing constant later when replacing the value - for (binX, binY), _ in diagonal_bins.iterrows(): - left_bin_count, right_bin_count = 0, 0 - left_binX = binX - 1 - left_binY = binX - if (left_binX, left_binY) in df.index: - left_bin_count = df.loc[(left_binX, left_binY), "counts"] - - right_binX = binX - right_binY = binX + 1 - if (right_binX, right_binY) in df.index: - right_bin_count = df.loc[(right_binX, right_binY), "counts"] - - df.loc[(binX, binY), "max_neighbor_count"] = max( - left_bin_count, right_bin_count - ) + fill_diagonals(df, hic_resolution) return df @@ -234,20 +241,15 @@ def add_hic_from_hic_file(pred, hic_file, chromosome, hic_resolution): suffixes=(None, "_"), ) if "counts_" in pred: + # After the first join, we have to merge the new records into + # the existing count column. It's really just replacing the + # nan values in pred_df with new values from latest records pred["counts"] = np.max(pred[["counts", "counts_"]], axis=1) pred.drop("counts_", inplace=True, axis=1) - pred["max_neighbor_count"] = np.max( - pred[["max_neighbor_count", "max_neighbor_count_"]], axis=1 - ) - pred.drop("max_neighbor_count_", inplace=True, axis=1) - row_mean = np.mean(list(bin_sums.values())) # normalize hic_contact by the row_mean to reflect a doubly stochastic hic matrix pred["counts"] /= row_mean - pred["max_neighbor_count"] /= row_mean - # Fill in diagonals with the max of neighbors - pred["counts"] = pred["max_neighbor_count"].combine_first(pred["counts"]) pred.drop( [ @@ -463,7 +465,7 @@ def add_hic_pseudocount(pred, args): args.hic_pseudocount_distance, args.hic_gamma, args.hic_scale, - args.hic_resolution, + args.hic_pseudocount_distance, ) pred["hic_pseudocount"] = pd.DataFrame( {"a": pred["powerlaw_contact"], "b": pseudocount_distance} diff --git a/workflow/scripts/tools.py b/workflow/scripts/tools.py index 5c1f7b7a..7b562147 100644 --- a/workflow/scripts/tools.py +++ b/workflow/scripts/tools.py @@ -46,7 +46,15 @@ def write_connections_bedpe_format(pred, outfile, score_column): towrite["chr2"] = pred["chr"] towrite["y1"] = pred["TargetGeneTSS"] towrite["y2"] = pred["TargetGeneTSS"] - towrite["name"] = pred["TargetGene"] + "_" + pred["name"] + towrite["name"] = ( + pred["TargetGene"] + + "|" + + pred["chr"] + + ":" + + pred["start"].astype(str) + + "-" + + pred["end"].astype(str) + ) towrite["score"] = pred[score_column] towrite["strand1"] = "." towrite["strand2"] = "."