Skip to content

Commit

Permalink
Enable selections in docker image build workflow (#2643)
Browse files Browse the repository at this point in the history
  • Loading branch information
HappyAmazonian authored Dec 30, 2024
1 parent b836683 commit 37606cf
Showing 1 changed file with 48 additions and 101 deletions.
149 changes: 48 additions & 101 deletions .github/workflows/docker-nightly-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,23 @@ on:
description: 'release/nightly/temp, default is nightly'
required: true
default: 'nightly'
arch:
description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]'
type: string
required: false
default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]'
workflow_call:
inputs:
mode:
description: 'release/nightly/temp, default is nightly'
type: string
required: true
default: 'nightly'
arch:
description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]'
type: string
required: false
default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]'
outputs:
djl_version:
description: "djl version"
Expand All @@ -30,102 +40,45 @@ env:
jobs:
create-runners:
runs-on: [ self-hosted, scheduler ]
strategy:
matrix:
arch: ${{ startsWith(inputs.arch, '[') && fromJson(inputs.arch) || fromJson(format('[{0}]', inputs.arch)) }}
steps:
- name: Create new CPU instance
id: create_cpu_1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu_3
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu_4
- name: Create new instance
id: create_cpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu_5
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu_6
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create Graviton instance
id: create_graviton_1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_graviton $token djl-serving
instance_type=action_cpu
if [ "${{matrix.arch}}" == "aarch64" ]; then
instance_type=action_graviton
fi
./start_instance.sh $instance_type $token djl-serving
instance_id=`grep "^instance_id=" $GITHUB_OUTPUT | cut -d'=' -f2`
echo "instance_id_${{matrix.arch}}=$instance_id" >>"$GITHUB_OUTPUT"
outputs:
cpu_instance_id_1: ${{ steps.create_cpu_1.outputs.action_cpu_instance_id }}
cpu_instance_id_2: ${{ steps.create_cpu_2.outputs.action_cpu_instance_id }}
cpu_instance_id_3: ${{ steps.create_cpu_3.outputs.action_cpu_instance_id }}
cpu_instance_id_4: ${{ steps.create_cpu_4.outputs.action_cpu_instance_id }}
cpu_instance_id_5: ${{ steps.create_cpu_5.outputs.action_cpu_instance_id }}
cpu_instance_id_6: ${{ steps.create_cpu_6.outputs.action_cpu_instance_id }}
graviton_instance_id_1: ${{ steps.create_graviton_1.outputs.action_graviton_instance_id }}
instance_id_cpu: ${{ steps.create_cpu.outputs.instance_id_cpu }}
instance_id_cpu-full: ${{ steps.create_cpu.outputs.instance_id_cpu-full }}
instance_id_pytorch-inf2: ${{ steps.create_cpu.outputs.instance_id_pytorch-inf2 }}
instance_id_pytorch-gpu: ${{ steps.create_cpu.outputs.instance_id_pytorch-gpu }}
instance_id_tensorrt-llm: ${{ steps.create_cpu.outputs.instance_id_tensorrt-llm }}
instance_id_lmi: ${{ steps.create_cpu.outputs.instance_id_lmi }}
instance_id_aarch64: ${{ steps.create_cpu.outputs.instance_id_aarch64 }}

nightly-build:
needs: create-runners
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
containers:
- name: cpu
instance: cpu
- name: cpu-full
instance: cpu
- name: pytorch-inf2
instance: cpu
- name: pytorch-gpu
instance: cpu
- name: tensorrt-llm
instance: cpu
- name: lmi
instance: cpu
- name: aarch64
instance: aarch64
arch: ${{ startsWith(inputs.arch, '[') && fromJson(inputs.arch) || fromJson(format('[{0}]', inputs.arch)) }}
runs-on:
- self-hosted
- ${{ matrix.containers.instance }}
- ${{ matrix.arch != 'aarch64' && 'cpu' || 'aarch64' }}
- RUN_ID-${{ github.run_id }}
- RUN_NUMBER-${{ github.run_number }}
- SHA-${{ github.sha }}
Expand Down Expand Up @@ -174,7 +127,7 @@ jobs:
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }} \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }} \
${{ matrix.containers.name }}
${{ matrix.arch }}
- name: Build temp docker image
if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }}
run: |
Expand All @@ -185,7 +138,7 @@ jobs:
docker compose build --no-cache \
--build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \
--build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \
${{ matrix.containers.name }}
${{ matrix.arch }}
- name: Tag and push temp image to ECR repo
working-directory: serving/docker
run: |
Expand All @@ -195,35 +148,29 @@ jobs:
if [ "${{ inputs.mode }}" == "release" ]; then
mode=${{ env.DJL_VERSION }}
fi
tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_RUN_ID}"
tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_SHA}"
tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_RUN_ID}"
tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_SHA}"
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempRunIdTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempCommitTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag
if ${{ inputs.mode == 'nightly' }}; then
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-nightly
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly
fi
time docker push --all-tags ${{ env.AWS_ECR_REPO }}
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [nightly-build, create-runners]
env:
runner_output: ${{ toJson(needs.create-runners.outputs) }}
steps:
- name: Stop all instances
continue-on-error: true
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_2 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_3 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_4 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_5 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.cpu_instance_id_6 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.graviton_instance_id_1 }}
./stop_instance.sh $instance_id
for key in $(echo $runner_output | jq -r 'keys[]'); do
instance_id=$(echo $runner_output | jq -r ".[\"$key\"]")
echo "Key: $key, instance_id: $instance_id"
./stop_instance.sh $instance_id
done

0 comments on commit 37606cf

Please sign in to comment.