diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index e96c885e7..64cf26686 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -72,8 +72,69 @@ RUN scripts/install_python.sh && \ rm -rf /opt/djl/logs && \ chown -R djl:djl /opt/djl && \ rm -rf scripts && pip3 cache purge && \ - apt-get clean -y && rm -rf /var/lib/apt/lists/* \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true" -LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-27-0.cpu-full="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true" LABEL torch-version=$torch_version + + +FROM ubuntu:22.04 as vllm-build +# Borrowed from https://github.com/vllm-project/vllm/blob/v0.4.3/Dockerfile.cpu +ARG vllm_version=v0.4.3 + +WORKDIR /usr/src + +RUN apt-get update -y \ + && apt-get install -y git wget vim numactl gcc-12 g++-12 python3 python3-pip \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 + +RUN pip install --upgrade pip \ + && pip install wheel packaging ninja setuptools>=49.4.0 numpy + +RUN git clone https://github.com/vllm-project/vllm -b ${vllm_version} && cd vllm && \ + pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \ + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel + + +FROM base AS lmi-cpu + +ARG torch_version=2.3.0 +ARG protobuf_version=3.20.3 +ARG transformers_version=4.41.1 +ARG accelerate_version=0.30.1 +ARG datasets_version=2.19.1 +ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl" +ARG peft_version=0.11.1 + +COPY scripts scripts/ +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev gcc-12 g++-12 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \ + scripts/install_python.sh && \ + scripts/install_djl_serving.sh $djl_version $torch_version && \ + scripts/install_s5cmd.sh x64 && \ + echo "${djl_version} cpufull" > /opt/djl/bin/telemetry && \ + djl-serving -i ai.djl.pytorch:pytorch-native-cpu:$torch_version:linux-x86_64 && \ + djl-serving -i ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:linux-x86_64 && \ + pip3 cache purge && \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* + +COPY --from=vllm-build /usr/src/vllm/dist/*.whl scripts/ + +# FIXME remove Trtion import in the next release +RUN pip3 install torch==${torch_version}+cpu triton>=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \ + ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \ + transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \ + mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} scripts/vllm*.whl \ + jinja2 safetensors ninja scipy sentence_transformers && \ + pip3 cache purge + +# final cleanup \ +RUN scripts/patch_oss_dlc.sh python && \ + rm -rf /opt/djl/logs && \ + chown -R djl:djl /opt/djl && \ + rm -rf scripts && pip3 cache purge && \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* + +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.lmi-cpu="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.lmi-cpu="true" diff --git a/serving/docker/docker-compose.yml b/serving/docker/docker-compose.yml index 201c2e550..caeecc7b0 100644 --- a/serving/docker/docker-compose.yml +++ b/serving/docker/docker-compose.yml @@ -12,6 +12,12 @@ services: target: cpu-full dockerfile: Dockerfile image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}cpu-full${NIGHTLY}" + lmi-cpu: + build: + context: . + target: lmi-cpu + dockerfile: Dockerfile + image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}lmi-cpu${NIGHTLY}" aarch64: build: context: .