bluefish
  • Joined on 2026-03-23

xinference (main)

Published 2026-06-21 18:41:24 +08:00 by bluefish

Installation

docker pull gitea.gzitvs.cn/bluefish/xinference:main
sha256:0ce11ee2bf7f47e831ab1cb74c65bfd5f1b3b46817b39597508d10c469137917

Images

Digest OS / Arch Size
c6079c429f linux/amd64 18 GiB

Image Layers ( linux/amd64)

ARG RELEASE
ARG LAUNCHPAD_BUILD_ARCH
LABEL org.opencontainers.image.ref.name=ubuntu
LABEL org.opencontainers.image.version=22.04
ADD file:32d41b6329e8f89fa4ac92ef97c04b7cfd5e90fb74e1509c3e27d7c91195b7c7 in /
CMD ["/bin/bash"]
ENV NVARCH=x86_64
ENV NVIDIA_REQUIRE_CUDA=cuda>=13.0 brand=unknown,driver>=535,driver<536 brand=grid,driver>=535,driver<536 brand=tesla,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=vapps,driver>=535,driver<536 brand=vpc,driver>=535,driver<536 brand=vcs,driver>=535,driver<536 brand=vws,driver>=535,driver<536 brand=cloudgaming,driver>=535,driver<536 brand=unknown,driver>=550,driver<551 brand=grid,driver>=550,driver<551 brand=tesla,driver>=550,driver<551 brand=nvidia,driver>=550,driver<551 brand=quadro,driver>=550,driver<551 brand=quadrortx,driver>=550,driver<551 brand=nvidiartx,driver>=550,driver<551 brand=vapps,driver>=550,driver<551 brand=vpc,driver>=550,driver<551 brand=vcs,driver>=550,driver<551 brand=vws,driver>=550,driver<551 brand=cloudgaming,driver>=550,driver<551 brand=unknown,driver>=565,driver<566 brand=grid,driver>=565,driver<566 brand=tesla,driver>=565,driver<566 brand=nvidia,driver>=565,driver<566 brand=quadro,driver>=565,driver<566 brand=quadrortx,driver>=565,driver<566 brand=nvidiartx,driver>=565,driver<566 brand=vapps,driver>=565,driver<566 brand=vpc,driver>=565,driver<566 brand=vcs,driver>=565,driver<566 brand=vws,driver>=565,driver<566 brand=cloudgaming,driver>=565,driver<566 brand=unknown,driver>=570,driver<571 brand=grid,driver>=570,driver<571 brand=tesla,driver>=570,driver<571 brand=nvidia,driver>=570,driver<571 brand=quadro,driver>=570,driver<571 brand=quadrortx,driver>=570,driver<571 brand=nvidiartx,driver>=570,driver<571 brand=vapps,driver>=570,driver<571 brand=vpc,driver>=570,driver<571 brand=vcs,driver>=570,driver<571 brand=vws,driver>=570,driver<571 brand=cloudgaming,driver>=570,driver<571 brand=unknown,driver>=575,driver<576 brand=grid,driver>=575,driver<576 brand=tesla,driver>=575,driver<576 brand=nvidia,driver>=575,driver<576 brand=quadro,driver>=575,driver<576 brand=quadrortx,driver>=575,driver<576 brand=nvidiartx,driver>=575,driver<576 brand=vapps,driver>=575,driver<576 brand=vpc,driver>=575,driver<576 brand=vcs,driver>=575,driver<576 brand=vws,driver>=575,driver<576 brand=cloudgaming,driver>=575,driver<576
ENV NV_CUDA_CUDART_VERSION=13.0.96-1
ARG TARGETARCH
LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com>
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit
ENV CUDA_VERSION=13.0.2
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-13-0=${NV_CUDA_CUDART_VERSION} cuda-compat-13-0 && rm -rf /var/lib/apt/lists/* # buildkit
RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64
COPY NGC-DL-CONTAINER-LICENSE / # buildkit
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ARG CUDA_VERSION
ARG PYTHON_VERSION
ARG DEADSNAKES_MIRROR_URL
ARG DEADSNAKES_GPGKEY_URL
ARG GET_PIP_URL
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /vllm-workspace
RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment # buildkit
RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c apt-get update -y && apt-get install -y --no-install-recommends software-properties-common curl sudo ffmpeg libsm6 libxext6 libgl1 && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then mkdir -p -m 0755 /etc/apt/keyrings ; curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; fi ; else for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; done ; fi && apt-get update -y && apt-get install -y --no-install-recommends python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev && rm -rf /var/lib/apt/lists/* && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && rm -f /usr/lib/python${PYTHON_VERSION}/EXTERNALLY-MANAGED && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} && python3 --version && python3 -m pip --version # buildkit
RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && CUDA_VERSION_SHORT=$(echo $CUDA_VERSION | cut -d. -f1,2) && apt-get update -y && apt-get install -y --no-install-recommends --allow-change-held-packages cuda-nvcc-${CUDA_VERSION_DASH} cuda-cudart-${CUDA_VERSION_DASH} cuda-nvrtc-${CUDA_VERSION_DASH} cuda-cuobjdump-${CUDA_VERSION_DASH} libcurand-dev-${CUDA_VERSION_DASH} libcublas-dev-${CUDA_VERSION_DASH} libnuma-dev numactl && NCCL_VER=$(apt-cache madison libnccl-dev | grep "+cuda${CUDA_VERSION_SHORT}" | head -1 | awk -F'|' '{gsub(/^ +| +$/, "", $2); print $2}') && apt-get install -y --no-install-recommends --allow-change-held-packages libnccl-dev=${NCCL_VER} libnccl2=${NCCL_VER} && rm -rf /var/lib/apt/lists/* # buildkit
RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c python3 -m pip install uv # buildkit
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY=unsafe-best-match
ENV UV_LINK_MODE=copy
ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0
ARG PYTORCH_CUDA_INDEX_BASE_URL
COPY requirements/common.txt /tmp/common.txt # buildkit
COPY requirements/cuda.txt /tmp/requirements-cuda.txt # buildkit
RUN |6 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl /bin/sh -c if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; fi && uv pip install --system -r /tmp/requirements-cuda.txt --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && rm /tmp/requirements-cuda.txt /tmp/common.txt # buildkit
ARG FLASHINFER_VERSION=0.6.8.post1
RUN |7 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 /bin/sh -c uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit
ARG GDRCOPY_CUDA_VERSION=12.8
ARG GDRCOPY_OS_VERSION=Ubuntu22_04
ARG TARGETPLATFORM
COPY tools/install_gdrcopy.sh /tmp/install_gdrcopy.sh # buildkit
RUN |10 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 /bin/sh -c set -eux; case "${TARGETPLATFORM}" in linux/arm64) UUARCH="aarch64" ;; linux/amd64) UUARCH="x64" ;; *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; esac; /tmp/install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}" && rm /tmp/install_gdrcopy.sh # buildkit
ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=>=1.0.17
ARG RUNAI_MODEL_STREAMER_VERSION=>=0.15.7
RUN |14 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 /bin/sh -c if [ "$TARGETPLATFORM" = "linux/arm64" ]; then BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; else BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; fi; uv pip install --system accelerate modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}" # buildkit
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
ARG PYTORCH_NIGHTLY
COPY /workspace/torch_lib_versions.txt torch_lib_versions.txt # buildkit
RUN |22 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled PYTORCH_NIGHTLY= /bin/sh -c if [ "${PYTORCH_NIGHTLY}" = "1" ]; then echo "Installing torch nightly..." && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && echo "Installing vLLM..." && uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); else echo "Installing vLLM..." && uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); fi # buildkit
RUN |22 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled PYTORCH_NIGHTLY= /bin/sh -c . /etc/environment && uv pip list # buildkit
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64
RUN |22 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled PYTORCH_NIGHTLY= /bin/sh -c uv pip install --system ep_kernels/dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit
RUN |22 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled PYTORCH_NIGHTLY= /bin/sh -c flashinfer show-config && flashinfer download-cubin # buildkit
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64
COPY examples examples # buildkit
COPY benchmarks benchmarks # buildkit
COPY ./vllm/collect_env.py . # buildkit
ARG TARGETPLATFORM
ARG INSTALL_KV_CONNECTORS=false
ARG CUDA_VERSION
ARG VLLM_BUILD_COMMIT
ARG VLLM_BUILD_PIPELINE
ARG VLLM_BUILD_URL
ARG VLLM_IMAGE_TAG
ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ENV UV_HTTP_TIMEOUT=500
ARG torch_cuda_arch_list=7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX
ENV TORCH_CUDA_ARCH_LIST=7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX
RUN |12 TARGETPLATFORM=linux/amd64 INSTALL_KV_CONNECTORS=true CUDA_VERSION=13.0.2 VLLM_BUILD_COMMIT=ad7125a431e176d4161099480a66f0169609a690 VLLM_BUILD_PIPELINE=019d130e-464e-4ff7-b84b-492992c0c06b VLLM_BUILD_URL=https://buildkite.com/vllm/release-v2/builds/1649 VLLM_IMAGE_TAG=vllm/vllm-openai:v0.21.0 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= torch_cuda_arch_list=7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX /bin/sh -c CUDA_MAJOR="${CUDA_VERSION%%.*}"; CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); CUDA_HOME=/usr/local/cuda; BUILD_PKGS="libcusparse-dev-${CUDA_VERSION_DASH} libcublas-dev-${CUDA_VERSION_DASH} libcusolver-dev-${CUDA_VERSION_DASH}"; if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( apt-get update -y && apt-get install -y --no-install-recommends --allow-change-held-packages ${BUILD_PKGS} && uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && apt-get purge -y ${BUILD_PKGS} && rm -rf /var/lib/apt/lists/* ); uv pip install --system --force-reinstall --no-deps nixl-cu${CUDA_MAJOR}; fi # buildkit
ENV VLLM_USAGE_SOURCE=production-docker-image
ENV VLLM_BUILD_COMMIT=ad7125a431e176d4161099480a66f0169609a690 VLLM_BUILD_PIPELINE=019d130e-464e-4ff7-b84b-492992c0c06b VLLM_BUILD_URL=https://buildkite.com/vllm/release-v2/builds/1649 VLLM_IMAGE_TAG=vllm/vllm-openai:v0.21.0
LABEL org.opencontainers.image.source=https://github.com/vllm-project/vllm org.opencontainers.image.revision=ad7125a431e176d4161099480a66f0169609a690 org.opencontainers.image.version=vllm/vllm-openai:v0.21.0 org.opencontainers.image.url=https://buildkite.com/vllm/release-v2/builds/1649 ai.vllm.build.commit=ad7125a431e176d4161099480a66f0169609a690 ai.vllm.build.pipeline=019d130e-464e-4ff7-b84b-492992c0c06b ai.vllm.build.url=https://buildkite.com/vllm/release-v2/builds/1649 ai.vllm.image.tag=vllm/vllm-openai:v0.21.0
ENTRYPOINT ["vllm" "serve"]
WORKDIR /opt/inference
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/lib/python3.12/dist-packages/nvidia/cublas/lib
COPY xinference/deploy/docker/requirements /tmp/docker-requirements # buildkit
COPY xinference/deploy/docker/prepare-virtualenv-wheelhouse.sh /usr/local/bin/prepare-virtualenv-wheelhouse.sh # buildkit
COPY xinference/deploy/docker/generate-virtualenv-requirements.py /usr/local/bin/generate-virtualenv-requirements.py # buildkit
ARG LLAMA_CPP_USE_CUDA=true
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
ARG XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128
ARG PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130
ARG GITHUB_PROXY_PREFIX=https://gh-proxy.org/
ARG XINFERENCE_PREPARE_VENV_WHEELHOUSE=true
ARG INSTALL_KOKORO_EN_SPACY_MODELS=false
ARG SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download
ENV XINFERENCE_WHEELHOUSE_DIR=/opt/xinference-wheelhouse XINFERENCE_ENABLE_VIRTUAL_ENV=1 XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED=1 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ PIP_FIND_LINKS=/opt/xinference-wheelhouse UV_FIND_LINKS=/opt/xinference-wheelhouse
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c apt-get -y update && apt install -y wget curl procps git libgl1 libfst-dev cmake libssl-dev rsync sqlite libpcre3 libpcre3-dev dmidecode perl make build-essential zlib1g-dev && printf "\ndeb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse" >> /etc/apt/sources.list && apt-get -y update && apt-get install -y --only-upgrade libstdc++6 && apt install -y libc6 && apt-get -yq clean && rm -rf /var/lib/apt/lists/* # buildkit
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c mkdir -p "$XINFERENCE_WHEELHOUSE_DIR" && pip install --upgrade -i "$PIP_INDEX" pip "setuptools<81" wheel && chmod +x /usr/local/bin/prepare-virtualenv-wheelhouse.sh /usr/local/bin/generate-virtualenv-requirements.py && apt-get -y update && ( wget -O openfst-1.7.2.tar.gz http://www.openslr.org/resources/2/openfst-1.7.2.tar.gz || wget -O openfst-1.7.2.tar.gz https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.7.2.tar.gz ) && tar zxvf openfst-1.7.2.tar.gz && cd openfst-1.7.2 && ./configure --enable-shared --enable-static && make -j"$(nproc)" && make install && ldconfig && CPLUS_INCLUDE_PATH=/usr/local/include LIBRARY_PATH=/usr/local/lib LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH pip install -i "$PIP_INDEX" pynini==2.1.6.post1 && pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && pip install -i "$PIP_INDEX" "cython>=0.29" && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-base.txt && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-ml.txt && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-models.txt && if [ "$INSTALL_KOKORO_EN_SPACY_MODELS" = "true" ]; then pip install "${SPACY_MODEL_BASE_URL}/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl" "${SPACY_MODEL_BASE_URL}/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"; fi && pip install -i "$PIP_INDEX" transformers==5.5.0 && pip install -i "$PIP_INDEX" --no-deps sglang==0.5.6 && pip install -i "$PIP_INDEX" sgl-kernel==0.3.18.post2 && pip install -i "$PIP_INDEX" wetext && pip uninstall flashinfer -y && pip install -i "$PIP_INDEX" SQLAlchemy==1.4.54 && pip download --dest "$XINFERENCE_WHEELHOUSE_DIR" --prefer-binary --index-url "$XLLAMACPP_INDEX_URL" xllamacpp && pip install --no-index --find-links "$XINFERENCE_WHEELHOUSE_DIR" xllamacpp && apt-get -yq clean && rm -rf /var/lib/apt/lists/* openfst-1.7.2 openfst-1.7.2.tar.gz && pip cache purge # buildkit
COPY xinference/model/llm/llm_family.json /tmp/xinference-model/llm_family.json # buildkit
COPY xinference/model/embedding/model_spec.json /tmp/xinference-model/embedding_model_spec.json # buildkit
COPY xinference/model/rerank/model_spec.json /tmp/xinference-model/rerank_model_spec.json # buildkit
COPY xinference/model/image/model_spec.json /tmp/xinference-model/image_model_spec.json # buildkit
COPY xinference/model/audio/model_spec.json /tmp/xinference-model/audio_model_spec.json # buildkit
COPY xinference/model/video/model_spec.json /tmp/xinference-model/video_model_spec.json # buildkit
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c if [ "$XINFERENCE_PREPARE_VENV_WHEELHOUSE" = "true" ]; then PIP_INDEX="$PIP_INDEX" PIP_EXTRA_INDEX_URLS="$PIP_EXTRA_INDEX_URLS" GITHUB_PROXY_PREFIX="$GITHUB_PROXY_PREFIX" /usr/local/bin/prepare-virtualenv-wheelhouse.sh; pip install --no-index --find-links "$XINFERENCE_WHEELHOUSE_DIR" flash-attn==2.8.3+cu130torch2.11; else github_proxy_prefix="${GITHUB_PROXY_PREFIX%/}"; pip install "${github_proxy_prefix:+${github_proxy_prefix}/}https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.8.3+cu130torch2.11-cp312-cp312-linux_x86_64.whl"; fi && rm -rf /tmp/docker-requirements /tmp/xinference-model && pip cache purge # buildkit
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c github_proxy_prefix="${GITHUB_PROXY_PREFIX%/}" && wget -O Miniforge3.sh "${github_proxy_prefix:+${github_proxy_prefix}/}https://github.com/conda-forge/miniforge/releases/download/4.12.0-0/Miniforge3-4.12.0-0-Linux-x86_64.sh" && bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh # buildkit
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c /opt/conda/bin/conda create -n ffmpeg-env -c conda-forge 'ffmpeg<7' -y && ln -s /opt/conda/envs/ffmpeg-env/bin/ffmpeg /usr/local/bin/ffmpeg && ln -s /opt/conda/envs/ffmpeg-env/bin/ffprobe /usr/local/bin/ffprobe && /opt/conda/bin/conda clean --all -y # buildkit
RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c pip install torchcodec==0.10.0 && pip cache purge # buildkit
ENTRYPOINT []
CMD ["/bin/bash"]
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
ARG PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130
ARG GITHUB_PROXY_PREFIX=https://gh-proxy.org/
ARG XINFERENCE_PREPARE_VENV_WHEELHOUSE=true
ENV NEXT_TELEMETRY_DISABLED=1 XINFERENCE_BACKEND_HOST=127.0.0.1 XINFERENCE_BACKEND_PORT=9998 XINFERENCE_FRONTEND_HOST=0.0.0.0 XINFERENCE_FRONTEND_PORT=9997 XINFERENCE_INTERNAL_API_URL=http://127.0.0.1:9998 XINFERENCE_WHEELHOUSE_DIR=/opt/xinference-wheelhouse XINFERENCE_ENABLE_VIRTUAL_ENV=1 XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED=1 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ PIP_FIND_LINKS=/opt/xinference-wheelhouse UV_FIND_LINKS=/opt/xinference-wheelhouse
WORKDIR /opt/inference
COPY . /opt/inference # buildkit
RUN |4 PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true /bin/sh -c python3 -m pip install --no-build-isolation . # buildkit
COPY xinference/deploy/docker/requirements /tmp/docker-requirements # buildkit
COPY xinference/deploy/docker/prepare-virtualenv-wheelhouse.sh /usr/local/bin/prepare-virtualenv-wheelhouse.sh # buildkit
COPY xinference/deploy/docker/generate-virtualenv-requirements.py /usr/local/bin/generate-virtualenv-requirements.py # buildkit
RUN |4 PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true /bin/sh -c chmod +x /usr/local/bin/prepare-virtualenv-wheelhouse.sh /usr/local/bin/generate-virtualenv-requirements.py && if [ "$XINFERENCE_PREPARE_VENV_WHEELHOUSE" = "true" ] && [ ! -s /opt/xinference-wheelhouse/.wheelhouse-ready ]; then XINFERENCE_MODEL_ROOT=/opt/inference/xinference/model PIP_INDEX="$PIP_INDEX" PIP_EXTRA_INDEX_URLS="$PIP_EXTRA_INDEX_URLS" GITHUB_PROXY_PREFIX="$GITHUB_PROXY_PREFIX" /usr/local/bin/prepare-virtualenv-wheelhouse.sh; fi && rm -rf /tmp/docker-requirements && pip cache purge # buildkit
COPY /usr/local/bin/node /usr/local/bin/node20 # buildkit
COPY /workspace/frontend/.next/standalone /opt/inference/frontend-runtime # buildkit
COPY /workspace/frontend/.next/static /opt/inference/frontend-runtime/.next/static # buildkit
COPY /workspace/frontend/public /opt/inference/frontend-runtime/public # buildkit
COPY xinference/deploy/docker/start-next-stack.sh /usr/local/bin/start-next-stack.sh # buildkit
COPY xinference/deploy/docker/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor-wrapper.sh # buildkit
RUN |4 PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true /bin/sh -c mv /usr/local/bin/xinference-supervisor /usr/local/bin/xinference-supervisor-backend && cp /usr/local/bin/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor && chmod +x /usr/local/bin/start-next-stack.sh /usr/local/bin/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor # buildkit
EXPOSE map[9997/tcp:{}]
ENTRYPOINT ["/usr/local/bin/start-next-stack.sh"]

Labels

Key Value
ai.vllm.build.commit ad7125a431e176d4161099480a66f0169609a690
ai.vllm.build.pipeline 019d130e-464e-4ff7-b84b-492992c0c06b
ai.vllm.build.url https://buildkite.com/vllm/release-v2/builds/1649
ai.vllm.image.tag vllm/vllm-openai:v0.21.0
maintainer NVIDIA CORPORATION <cudatools@nvidia.com>
org.opencontainers.image.ref.name ubuntu
org.opencontainers.image.revision 560d56aeb1ae944cbfe9a3d1cd7f3cba31959414
org.opencontainers.image.source https://gitea.gzitvs.cn/bluefish/xinference
org.opencontainers.image.url https://buildkite.com/vllm/release-v2/builds/1649
org.opencontainers.image.version vllm/vllm-openai:v0.21.0
Details
Container
2026-06-21 18:41:24 +08:00
128
OCI / Docker
Versions (2) View all
main 2026-06-21
base-gpu 2026-06-21