diff --git a/Dockerfile b/Dockerfile index 1cd340e..34c3e9c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,50 +4,55 @@ # using ubuntu LTS version FROM ubuntu:20.04 +# Configure Poetry +ENV POETRY_VERSION=1.8.3 +ENV POETRY_HOME=/opt/poetry +ENV POETRY_VENV=/opt/poetry-venv +ENV POETRY_CACHE_DIR=/opt/.cache +ENV PIPX_BIN_DIR=/opt/.cache/virtualenvs/orfa-9TtSrW0h-py3.8/bin/ + RUN apt-get update && apt-get install --no-install-recommends -y \ python3.8-dev python3.8-venv python3-pip python3-wheel \ build-essential libhdf5-dev pkg-config && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# create and activate virtual environment -RUN python3.8 -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +# Add `poetry` to PATH +ENV PATH="${PATH}:${POETRY_VENV}/bin" + -# Install Poetry -RUN python3 -m pip install pipx && \ - python3 -m pipx ensurepath && \ - pipx install poetry && \ - pipx ensurepath +# Install poetry separated from system interpreter +RUN python3 -m venv $POETRY_VENV \ + && . ${POETRY_VENV}/bin/activate \ + && $POETRY_VENV/bin/pip install -U pipx \ + && $POETRY_VENV/bin/pipx ensurepath \ + && $POETRY_VENV/bin/pipx install poetry==${POETRY_VERSION} # Set up environment variables for Poetry -ENV PATH="$PATH:/root/.local/bin" -ENV POETRY_HOME="/root/.poetry" +#ENV PATH="$PATH:/root/.local/bin" # export path ENV PATH="$PATH:/root/.local/share/pipx/venvs/poetry/bin/" -# Install Pandas and Keras in a virtual environment using Poetry WORKDIR /app # Copy only the pyproject.toml and poetry.lock files initially to leverage Docker cache COPY pyproject.toml ./ -# upgrade pip -RUN pip install --upgrade pip setuptools wheel - # Copy the rest of the application code COPY pyproject.toml . COPY poetry.lock . COPY framepool_annotate.py . -# not needed because we are mounting +ADD modules /app/modules # append Python module dir to Path RUN PATH="${PATH}:/app/modules" # Install dependencies +# RUN /opt/.cache/virtualenvs/orfa-9TtSrW0h-py3.8/bin/poetry install --no-root --no-cache -vvv RUN poetry install --no-root --no-cache -vvv + # Set the default command to run the application #CMD ["python", "framepool_annotate.py"] diff --git a/README.md b/README.md index e3084cf..b71d83d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,20 @@ +# Docker + +I have made a custom Dockerfile to run the Framepool codebase. Some design decisions explained: +- Ubuntu's `apt-get` does not install pipx or poetry correctly, so I used a pip workaround to install both. +- The Docker has a Jupyter notebook entrypoint by default. You can run the Docker's Jupyer notebook like so: +``` +docker run -it --rm -v .:/app/mnt:ro -p 8080:8080 framepool +``` +- If you want a different entrypoint (ie bash) you can run: +``` +docker run -it --entrypoint /bin/bash --rm -v .:/app/mnt:ro framepool +``` + +I have made a custom script for annotation VCF files with FramePool. It takes a tab-delimited file as input, with columns ???. + +You can run this script using: `poetry run python modules/framepool_annotate.py` + # 5UTR In the paper "Human 5′ UTR design and variant effect prediction from a massively parallel translation assay" (Sample et al), MPRA data is used to train a powerful deep model to predict ribsome load (a measure of translation efficiency) from the seuqnece of the 5 untranslated region. This model can be used to predict the effect of variants (mutations) on the ribosome load (and thus translation efficiency), which could be used to investigate the causes of rare genetic dieseases. diff --git a/framepool_annotate.py b/framepool_annotate.py index 3d8c30d..0def92b 100644 --- a/framepool_annotate.py +++ b/framepool_annotate.py @@ -9,6 +9,7 @@ """ import sys +import argparse sys.path.append('/app/modules/') from kipoi_functions import framepool_caller diff --git a/poetry.lock b/poetry.lock index 1f3eeb9..0d25407 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "anyio" @@ -1746,8 +1746,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2076,7 +2076,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},