From afef2b14277e80a70c0abbc3d68baa4d8842253d Mon Sep 17 00:00:00 2001 From: Elizabeth Kiernan <55763654+ekiernan@users.noreply.github.com> Date: Fri, 10 Jan 2025 11:27:38 -0500 Subject: [PATCH] Ultima Whole Genome Cram Only testing (#1472) Added Ultima Cram Only --- .dockstore.yml | 4 + ...ultima_genomics_whole_genome_cram_only.yml | 423 ++++++++++++++++++ .../TestUltimaGenomicsWholeGenomeCramOnly.wdl | 12 +- 3 files changed, 430 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/test_ultima_genomics_whole_genome_cram_only.yml diff --git a/.dockstore.yml b/.dockstore.yml index 085b53e185..29ad0e18cf 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -167,6 +167,10 @@ workflows: subclass: WDL primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl + - name: TestUltimaGenomicsWholeGenomeCramOnly + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl + - name: TestVariantCalling subclass: WDL primaryDescriptorPath: /verification/test-wdls/TestVariantCalling.wdl diff --git a/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml new file mode 100644 index 0000000000..d66eed66ab --- /dev/null +++ b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml @@ -0,0 +1,423 @@ +name: Test UltimaGenomicsWholeGenomeCramOnly + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/**' + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineQC.wdl' + - 'structs/dna_seq/UltimaGenomicsWholeGenomeGermlineStructs.wdl' + - 'tasks/broad/InternalTasks.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineAlignmentMarkDuplicates.wdl' + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' + - 'verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyNA12878.wdl' + - 'verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: true + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PROJECT_NAME: WARP + PIPELINE_NAME: TestUltimaGenomicsWholeGenomeCramOnly + DOCKSTORE_PIPELINE_NAME: UltimaGenomicsWholeGenomeCramOnly + PIPELINE_DIR: "pipelines/broad/dna_seq/somatic/single_sample/ugwgs/" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # github repo configuration + REPOSITORY_NAME: ${{ github.event.repository.name }} + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + run_pipeline: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + + steps: + # actions/checkout MUST come before auth action + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + - name: Fetch Dockstore Workflow Commit Hash + run: | + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + DOCKSTORE_COMMIT_HASH_FROM_FETCH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + + # Export the commit hash as an environment variable + echo "DOCKSTORE_COMMIT_HASH=$DOCKSTORE_COMMIT_HASH_FROM_FETCH" >> $GITHUB_ENV + echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH_FROM_FETCH" + env: + ## TODO NEED TO ADD DOCKSTORE_TOKEN FOR SERVICE ACCOUNT ## + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }} + BRANCH_NAME: ${{ env.BRANCH_NAME }} + + - name: Compare Dockstore and Commit Hashes + id: compare_hashes + run: | + echo "Comparing hashes..." + echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" != "$GITHUB_COMMIT_HASH" ]; then + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash!" + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + exit 1 + else + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + fi + env: + DOCKSTORE_COMMIT_HASH: ${{ env.DOCKSTORE_COMMIT_HASH }} + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For workflow_dispatch, use provided test type + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + env: + PIPELINE_NAME: ${{ env.PIPELINE_NAME }} + TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} + WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} + USER: ${{ env.USER }} + + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + # Create the submission_data.json file which will be the same for all inputs + SUBMISSION_DATA_FILE="submission_data.json" + + # Use a heredoc to generate the JSON file content dynamically + cat < "$SUBMISSION_DATA_FILE" + { + "methodConfigurationNamespace": "$WORKSPACE_NAMESPACE", + "methodConfigurationName": "$METHOD_CONFIG_NAME", + "useCallCache": $USE_CALL_CACHE_BOOL, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "Automated submission", + "ignoreEmptyOutputs": false + } + EOF + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + # Generate final summary tables with hyperlinks for Submission IDs + echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY + + # Add the workflows and statuses for this submission + echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY + + # Add a blank line for separation + echo "" >> $GITHUB_STEP_SUMMARY + done + done + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + exit 1 + fi + env: + PIPELINE_NAME: ${{ env.PIPELINE_NAME }} + TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} + METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }} + WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} + USER: ${{ env.USER }} + DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }} + PIPELINE_DIR: ${{ env.PIPELINE_DIR }} + + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "$METHOD_CONFIG_NAME") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + env: + PIPELINE_NAME: ${{ env.PIPELINE_NAME }} + BRANCH_NAME: ${{ env.BRANCH_NAME }} + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }} + WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }} + TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }} + USER: ${{ env.USER }} + + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl index 5203abb500..5275b62cee 100644 --- a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl +++ b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl" as UltimaGenomicsWholeGenomeCramOnly import "../../verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" as VerifyUltimaGenomicsWholeGenomeCramOnly import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsWholeGenomeCramOnly { @@ -23,8 +23,6 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -80,21 +78,17 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } }