bystrogenomics · akotlar · Oct 19, 2024 · Oct 19, 2024 · Oct 19, 2024 · Oct 20, 2024
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -3,24 +3,21 @@ name: CI Tests
 on: [push, pull_request]
 
 jobs:
-
   build:
-
     runs-on: ubuntu-latest
     strategy:
       matrix:
         shard: [1, 2, 3]
 
-
     steps:
       - uses: actions/checkout@v2
-      
+
       - name: Set up JDK 11
         uses: actions/setup-java@v2
         with:
-          java-version: '11'
-          distribution: 'adopt'
-          
+          java-version: "11"
+          distribution: "adopt"
+
       - name: Setup Nextflow
         uses: nf-core/setup-nextflow@v1
         with:
@@ -30,9 +27,9 @@ jobs:
         run: |
           wget -qO- get.nf-test.com | bash
           sudo mv nf-test /usr/local/bin/
-          
-      - name: Build Docker 
-        run:  docker build -t genepi/imputationserver2 .
-        
+
+      - name: Build Docker
+        run: docker build -t bystrogenomics/imputationserver2 .
+
       - name: Run Tests (Shard ${{ matrix.shard }}/${{ strategy.job-total }})
         run: nf-test test --ci --shard ${{ matrix.shard }}/${{ strategy.job-total }}
diff --git a/Dockerfile b/Dockerfile
@@ -8,7 +8,7 @@ RUN apt-get update && \
 
 #  Install miniconda
 RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_24.5.0-0-Linux-x86_64.sh -O ~/miniconda.sh && \
-  /bin/bash ~/miniconda.sh -b -p /opt/conda
+    /bin/bash ~/miniconda.sh -b -p /opt/conda
 ENV PATH=/opt/conda/bin:${PATH}
 
 COPY environment.yml .
@@ -26,7 +26,7 @@ RUN wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/
     mv Eagle_v${EAGLE_VERSION}/eagle /usr/bin/.
 
 # Install beagle
-ENV BEAGLE_VERSION=18May20.d20
+ENV BEAGLE_VERSION=27May24.118
 WORKDIR "/opt"
 RUN wget https://faculty.washington.edu/browning/beagle/beagle.${BEAGLE_VERSION}.jar && \
     mv beagle.${BEAGLE_VERSION}.jar /usr/bin/.
@@ -68,7 +68,7 @@ RUN wget https://github.com/jingweno/ccat/releases/download/v${CCAT_VERSION}/lin
 RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
     unzip awscliv2.zip && \
     ./aws/install --bin-dir /usr/local/bin --install-dir /usr/local/aws-cli --update
-    
+
 # Needed, because imputationserver-utils starts process (e.g. tabix)
 ENV JAVA_TOOL_OPTIONS="-Djdk.lang.Process.launchMechanism=vfork"
 

diff --git a/README.md b/README.md
@@ -1,35 +1,39 @@
 # Imputation Server 2
+
 [![Publication](https://img.shields.io/badge/Published-Nature%20Genetics-26af64.svg?colorB=26af64&style=popout)](https://www.nature.com/articles/ng.3656)
-[![imputationserver2](https://github.com/genepi/imputationserver2/actions/workflows/ci-tests.yml/badge.svg)](https://github.com/genepi/imputationserver2/actions/workflows/ci-tests.yml)
+[![imputationserver2](https://github.com/bystrogenomics/imputationserver2/actions/workflows/ci-tests.yml/badge.svg)](https://github.com/bystrogenomics/imputationserver2/actions/workflows/ci-tests.yml)
 [![nf-test](https://img.shields.io/badge/tested_with-nf--test-337ab7.svg)](https://github.com/askimed/nf-test)
- <a href="https://twitter.com/intent/follow?screen_name=umimpute"> <img src="https://img.shields.io/twitter/follow/umimpute.svg?style=social" alt="follow on Twitter"></a>
+<a href="https://twitter.com/intent/follow?screen_name=umimpute"> <img src="https://img.shields.io/twitter/follow/umimpute.svg?style=social" alt="follow on Twitter"></a>
 
 This repository contains the Imputation Server 2 workflow to facilitate genotype imputation at scale. It serves as the underlying workflow of the [Michigan Imputation Server](https://imputationserver.sph.umich.edu).
 
 ## Citation
-> Das S*, Forer L*, Schönherr S*, Sidore C, Locke AE, Kwong A, Vrieze S, Chew EY, Levy S, McGue M, Schlessinger D,       Stambolian D, Loh PR, Iacono WG, Swaroop A, Scott LJ, Cucca F, Kronenberg F, Boehnke M, Abecasis GR, Fuchsberger C. Next-generation genotype imputation service and methods. Nature Genetics 48, 1284–1287 (2016).
-<sub>*Shared first authors</sub>
-
+
+> Das S*, Forer L*, Schönherr S*, Sidore C, Locke AE, Kwong A, Vrieze S, Chew EY, Levy S, McGue M, Schlessinger D, Stambolian D, Loh PR, Iacono WG, Swaroop A, Scott LJ, Cucca F, Kronenberg F, Boehnke M, Abecasis GR, Fuchsberger C. Next-generation genotype imputation service and methods. Nature Genetics 48, 1284–1287 (2016).
+> <sub>*Shared first authors</sub>
+
 ## License
 
 imputationserver2 is MIT Licensed and was developed at the [Institute of Genetic Epidemiology](https://genepi.i-med.ac.at/), Medical University of Innsbruck, Austria.
 
 ## Contact
+
 If you have any questions about imputationserver2 please contact
+
 - [Sebastian Schönherr](https://genepi.i-med.ac.at/team/schoenherr-sebastian/)
 - [Lukas Forer](https://genepi.i-med.ac.at/team/forer-lukas/)
 
-If you encounter any problems, feel free to open an issue [here](https://github.com/genepi/imputationserver2/issues).
+If you encounter any problems, feel free to open an issue [here](https://github.com/bystrogenomics/imputationserver2/issues).
 
 ## Version History
 
-[Version 2.0.3 - Version 2.0.6](https://github.com/genepi/imputationserver2/releases/tag/v2.0.6)  - Fix QC issues and remove HTSJDK index creation for input validation and QC.
+[Version 2.0.3 - Version 2.0.6](https://github.com/bystrogenomics/imputationserver2/releases/tag/v2.0.8) - Fix QC issues and remove HTSJDK index creation for input validation and QC.
 
-[Version 2.0.2](https://github.com/genepi/imputationserver2/releases/tag/v2.0.2) - Set minimac4 tmp directory (required for larger sample sizes).
+[Version 2.0.2](https://github.com/bystrogenomics/imputationserver2/releases/tag/v2.0.2) - Set minimac4 tmp directory (required for larger sample sizes).
 
-[Version 2.0.1](https://github.com/genepi/imputationserver2/releases/tag/v2.0.1) - Provide statistics to users in case QC failed; check normalized multiallelic variants in reference panel. 
+[Version 2.0.1](https://github.com/bystrogenomics/imputationserver2/releases/tag/v2.0.1) - Provide statistics to users in case QC failed; check normalized multiallelic variants in reference panel.
 
-[Version 2.0.0](https://github.com/genepi/imputationserver2/releases/tag/v2.0.0) - First stable release; migration of the imputation workflow to Nextflow.
+[Version 2.0.0](https://github.com/bystrogenomics/imputationserver2/releases/tag/v2.0.0) - First stable release; migration of the imputation workflow to Nextflow.
 
 ## Run with test data
 
@@ -63,48 +67,45 @@ nextflow run main.nf -c job.config
 
 ## Parameters
 
-| Parameter             | Default Value         | Description                                        |
-| --------------------- | --------------------- | -------------------------------------------------- |
-| `project`             | `null`                | Project name                                       |
-| `project_date`        | `date`                | Project date                                       |
-| `files`               | `null`                | List of input files                                |
-| `allele_frequency_population`          | `null`                | Allele Frequency Population information                             |
-| `refpanel_yaml`       | `null`                | Reference panel YAML file                          |
-| `mode`                | `imputation`          | Processing mode (e.g., 'imputation' or `qc-only``) |
-| `chunksize`           | `20000000`            | Chunk size for processing                          |
-| `min_samples`         | `20`                  | Minimum number of samples needed                   |
-| `max_samples`         | `50000`               | Maximum number of samples allowed                  |
-| `merge_samples`       | `true`                | Execute compression and encryption workflow        |
-| `password`            | `null`                | Password for encryption                            |
-| `send_mail`           | `false`               | Enable or disable email notifications              |
-| `service.name`        | `Imputation Server 2` | Service name                                       |
-| `service.email`       | `null`                | Service email                                      |
-| `service.url`         | `null`                | Service URL                                        |
-| `user.name`           | `null`                | User's name                                        |
-| `user.email`          | `null`                | User's email                                       |
-| `phasing.engine`      | `eagle`               | Phasing method (e.g., 'eagle' or `beagle`)         |
-| `phasing.window`      | `5000000`             | Phasing window size                                |
-| `imputation.enabled`  | `true`                | Enable or disable imputation                       |
-| `imputation.window`   | `500000`              | Imputation window size                             |
-| `imputation.minimac_min_ratio`   | `0.00001`  | Minimac minimum ratio                              |
-| `imputation.min_r2`   | `0`                   | R2 filter value                                    |
-| `imputation.meta`     | `false`               | Enable or disable empirical output creation        |
-| `imputation.md5`      | `false`               | Enable or disable md5 sum creation for results     |
-| `imputation.create_index`    | `false`        | Enable or disable index creation for imputed files |
-| `imputation.decay`    | `0`                   | Set minimac decay                                  |
-| `encryption.enabled`  | `true`                | Enable or disable encryption                       |
-| `encryption.aes`      | `false`               | Enable or disable AES method for encryption        |
-| `ancestry.enabled`    | `false`               | Enable or disable ancestry analysis                |
-| `ancestry.dim`        | `10`                  | Ancestry analysis dimension                        |
-| `ancestry.dim_high`   | `20`                  | High dimension for ancestry analysis               |
-| `ancestry.batch_size` | `50`                  | Batch size for ancestry analysis                   |
-| `ancestry.reference`  | `null`                | Ancestry reference data                            |
-| `ancestry.max_pcs`    | `8`                   | Maximum principal components for ancestry          |
-| `ancestry.k`          | `10`                  | K value for ancestry analysis                      |
-| `ancestry.threshold`  | `0.75`                | Ancestry threshold                                 |
-
-
-
+| Parameter                      | Default Value         | Description                                        |
+| ------------------------------ | --------------------- | -------------------------------------------------- |
+| `project`                      | `null`                | Project name                                       |
+| `project_date`                 | `date`                | Project date                                       |
+| `files`                        | `null`                | List of input files                                |
+| `allele_frequency_population`  | `null`                | Allele Frequency Population information            |
+| `refpanel_yaml`                | `null`                | Reference panel YAML file                          |
+| `mode`                         | `imputation`          | Processing mode (e.g., 'imputation' or `qc-only``) |
+| `chunksize`                    | `20000000`            | Chunk size for processing                          |
+| `min_samples`                  | `20`                  | Minimum number of samples needed                   |
+| `max_samples`                  | `50000`               | Maximum number of samples allowed                  |
+| `merge_samples`                | `true`                | Execute compression and encryption workflow        |
+| `password`                     | `null`                | Password for encryption                            |
+| `send_mail`                    | `false`               | Enable or disable email notifications              |
+| `service.name`                 | `Imputation Server 2` | Service name                                       |
+| `service.email`                | `null`                | Service email                                      |
+| `service.url`                  | `null`                | Service URL                                        |
+| `user.name`                    | `null`                | User's name                                        |
+| `user.email`                   | `null`                | User's email                                       |
+| `phasing.engine`               | `eagle`               | Phasing method (e.g., 'eagle' or `beagle`)         |
+| `phasing.window`               | `5000000`             | Phasing window size                                |
+| `imputation.enabled`           | `true`                | Enable or disable imputation                       |
+| `imputation.window`            | `500000`              | Imputation window size                             |
+| `imputation.minimac_min_ratio` | `0.00001`             | Minimac minimum ratio                              |
+| `imputation.min_r2`            | `0`                   | R2 filter value                                    |
+| `imputation.meta`              | `false`               | Enable or disable empirical output creation        |
+| `imputation.md5`               | `false`               | Enable or disable md5 sum creation for results     |
+| `imputation.create_index`      | `false`               | Enable or disable index creation for imputed files |
+| `imputation.decay`             | `0`                   | Set minimac decay                                  |
+| `encryption.enabled`           | `true`                | Enable or disable encryption                       |
+| `encryption.aes`               | `false`               | Enable or disable AES method for encryption        |
+| `ancestry.enabled`             | `false`               | Enable or disable ancestry analysis                |
+| `ancestry.dim`                 | `10`                  | Ancestry analysis dimension                        |
+| `ancestry.dim_high`            | `20`                  | High dimension for ancestry analysis               |
+| `ancestry.batch_size`          | `50`                  | Batch size for ancestry analysis                   |
+| `ancestry.reference`           | `null`                | Ancestry reference data                            |
+| `ancestry.max_pcs`             | `8`                   | Maximum principal components for ancestry          |
+| `ancestry.k`                   | `10`                  | K value for ancestry analysis                      |
+| `ancestry.threshold`           | `0.75`                | Ancestry threshold                                 |
 
 ## Reference Panel Configuration
 
@@ -129,7 +130,7 @@ The `properties` section contains the following key-value pairs:
 | ------------- | --------------------------------------------------------------------------- | -------- |
 | `id`          | An identifier for the reference panel.                                      | yes      |
 | `genotypes`   | The location of the genotype files for the reference panel data.            | yes      |
-| `sites`      | The location of the site files for the reference panel data.                 | yes      |
+| `sites`       | The location of the site files for the reference panel data.                | yes      |
 | `mapEagle`    | The location of the genetic map file used for phasing with eagle.           | yes      |
 | `refEagle`    | The location of the BCF file for the reference panel data for eagle.        | yes      |
 | `mapBeagle`   | The location of the genetic map file used for phasing with Beagle.          | no       |
@@ -216,7 +217,7 @@ A legend file is a tab-delimited file consisting of 5 columns (`id`, `position`,
 - Start cloudgene server: `./cloudgene server`
 - Open [http://localhost:8082](http://localhost:8082)
 - Login with default admin account: username `admin` and password `admin1978`
-- Imputation can be tested with the following [test file](https://github.com/genepi/imputationserver2/raw/main/tests/input/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz)
+- Imputation can be tested with the following [test file](https://github.com/bystrogenomics/imputationserver2/raw/main/tests/input/chr20-phased/chr20.R50.merged.1.330k.recode.small.vcf.gz)
 
 ### Default Configuration
 
@@ -311,7 +312,7 @@ params.imputation.window = 100000
 ### Build docker image locally
 
 ```
-docker build -t genepi/imputationserver2:latest .
+docker build -t bystrogenomics/imputationserver2:latest .
 ```
 
 ### Run testcases

diff --git a/conf/main.config b/conf/main.config
@@ -0,0 +1,32 @@
+/*
+========================================================================================
+    Nextflow config file for running minimal tests
+========================================================================================
+    Defines input files and everything required to run a fast and simple pipeline test.
+    Use as follows:
+        nextflow run main.nf -profile test,development
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    project         = "test-job2"
+    build           = "hg19"
+    files           = ""
+    output          = ""
+    allele_frequency_population      = "off"
+    password        = null
+    mode            = "imputation"
+    refpanel_yaml   = "$projectDir/refpanels/1000G/cloudgene.yaml"
+    imputation = [
+        meta: false,
+        create_index: false,
+        min_r2: .8
+    ]
+    encryption = [
+        enabled: false,
+        aes: false
+    ]
+    phasing = [
+        engine: "eagle",
+    ]
+}
diff --git a/conf/main_with_beagle.config b/conf/main_with_beagle.config
@@ -0,0 +1,33 @@
+/*
+========================================================================================
+    Nextflow config file for running minimal tests
+========================================================================================
+    Defines input files and everything required to run a fast and simple pipeline test.
+    Use as follows:
+        nextflow run main.nf -profile test,development
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    project         = "test-job2"
+    build           = "hg19"
+    files           = ""
+    output          = ""
+    allele_frequency_population      = "off"
+    password        = null
+    mode            = "imputation"
+    refpanel_yaml   = "$projectDir/refpanels/1000G/cloudgene_with_beagle.yaml"
+    imputation = [
+        meta: false,
+        create_index: false,
+        min_r2: .8
+    ]
+    encryption = [
+        enabled: false,
+        aes: false
+    ]
+    phasing = [
+        engine: "beagle",
+        impute: true
+    ]
+}
diff --git a/conf/test_single_vcf.config b/conf/test_single_vcf.config
@@ -9,16 +9,27 @@
 */
 
 params {
-project         = "test-job2"
+    workDir = '/mnt/annotator/work'
+    project         = "test-job2"
     build           = "hg19"
-    files           = "$projectDir/tests/data/input/chr20-unphased/*.vcf.gz"
-    allele_frequency_population      = "eur"
-    password        = "lukas"
+    files           = "foo"
+    output          = "output/single_chrx"
+    allele_frequency_population      = "off"
+    password        = null
     mode            = "imputation"
-    refpanel_yaml   = "$projectDir/tests/data/refpanels/hapmap2/cloudgene.yaml"
-    output = "output/single"
+    refpanel_yaml   = "$projectDir/refpanels/1000G/cloudgene.yaml"
     imputation = [
-        meta: false          
+        meta: false,
+        create_index: false,
+        min_r2: .8
     ]
-}
-
+    encryption = [
+        enabled: false,
+        aes: false
+    ]
+    phasing = [
+        engine: "beagle",
+        impute: true
+    ]
+    cleanup = true
+}