bluefish

Joined on 2026-03-23

xinference (main)

Published 2026-06-21 18:41:24 +08:00 by bluefish

Pull the image from the command line:

docker pull gitea.gzitvs.cn/bluefish/xinference:main

Digest

sha256:0ce11ee2bf7f47e831ab1cb74c65bfd5f1b3b46817b39597508d10c469137917

For more information on the Container registry, see the documentation.

Digest	OS / Arch	Size
c6079c429f	linux/amd64	18 GiB

Image Layers ( linux/amd64)

ARG RELEASE

ARG LAUNCHPAD_BUILD_ARCH

LABEL org.opencontainers.image.ref.name=ubuntu

LABEL org.opencontainers.image.version=22.04

ADD file:32d41b6329e8f89fa4ac92ef97c04b7cfd5e90fb74e1509c3e27d7c91195b7c7 in /

CMD ["/bin/bash"]

ENV NVARCH=x86_64

ENV NVIDIA_REQUIRE_CUDA=cuda>=13.0 brand=unknown,driver>=535,driver<536 brand=grid,driver>=535,driver<536 brand=tesla,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=vapps,driver>=535,driver<536 brand=vpc,driver>=535,driver<536 brand=vcs,driver>=535,driver<536 brand=vws,driver>=535,driver<536 brand=cloudgaming,driver>=535,driver<536 brand=unknown,driver>=550,driver<551 brand=grid,driver>=550,driver<551 brand=tesla,driver>=550,driver<551 brand=nvidia,driver>=550,driver<551 brand=quadro,driver>=550,driver<551 brand=quadrortx,driver>=550,driver<551 brand=nvidiartx,driver>=550,driver<551 brand=vapps,driver>=550,driver<551 brand=vpc,driver>=550,driver<551 brand=vcs,driver>=550,driver<551 brand=vws,driver>=550,driver<551 brand=cloudgaming,driver>=550,driver<551 brand=unknown,driver>=565,driver<566 brand=grid,driver>=565,driver<566 brand=tesla,driver>=565,driver<566 brand=nvidia,driver>=565,driver<566 brand=quadro,driver>=565,driver<566 brand=quadrortx,driver>=565,driver<566 brand=nvidiartx,driver>=565,driver<566 brand=vapps,driver>=565,driver<566 brand=vpc,driver>=565,driver<566 brand=vcs,driver>=565,driver<566 brand=vws,driver>=565,driver<566 brand=cloudgaming,driver>=565,driver<566 brand=unknown,driver>=570,driver<571 brand=grid,driver>=570,driver<571 brand=tesla,driver>=570,driver<571 brand=nvidia,driver>=570,driver<571 brand=quadro,driver>=570,driver<571 brand=quadrortx,driver>=570,driver<571 brand=nvidiartx,driver>=570,driver<571 brand=vapps,driver>=570,driver<571 brand=vpc,driver>=570,driver<571 brand=vcs,driver>=570,driver<571 brand=vws,driver>=570,driver<571 brand=cloudgaming,driver>=570,driver<571 brand=unknown,driver>=575,driver<576 brand=grid,driver>=575,driver<576 brand=tesla,driver>=575,driver<576 brand=nvidia,driver>=575,driver<576 brand=quadro,driver>=575,driver<576 brand=quadrortx,driver>=575,driver<576 brand=nvidiartx,driver>=575,driver<576 brand=vapps,driver>=575,driver<576 brand=vpc,driver>=575,driver<576 brand=vcs,driver>=575,driver<576 brand=vws,driver>=575,driver<576 brand=cloudgaming,driver>=575,driver<576

ENV NV_CUDA_CUDART_VERSION=13.0.96-1

ARG TARGETARCH

LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com>

RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit

ENV CUDA_VERSION=13.0.2

RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-13-0=${NV_CUDA_CUDART_VERSION} cuda-compat-13-0 && rm -rf /var/lib/apt/lists/* # buildkit

RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit

ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64

COPY NGC-DL-CONTAINER-LICENSE / # buildkit

ENV NVIDIA_VISIBLE_DEVICES=all

ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

ARG CUDA_VERSION

ARG PYTHON_VERSION

ARG DEADSNAKES_MIRROR_URL

ARG DEADSNAKES_GPGKEY_URL

ARG GET_PIP_URL

ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /vllm-workspace

RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment # buildkit

RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c apt-get update -y && apt-get install -y --no-install-recommends software-properties-common curl sudo ffmpeg libsm6 libxext6 libgl1 && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then mkdir -p -m 0755 /etc/apt/keyrings ; curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; fi ; else for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; done ; fi && apt-get update -y && apt-get install -y --no-install-recommends python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev && rm -rf /var/lib/apt/lists/* && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && rm -f /usr/lib/python${PYTHON_VERSION}/EXTERNALLY-MANAGED && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} && python3 --version && python3 -m pip --version # buildkit

RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && CUDA_VERSION_SHORT=$(echo $CUDA_VERSION | cut -d. -f1,2) && apt-get update -y && apt-get install -y --no-install-recommends --allow-change-held-packages cuda-nvcc-${CUDA_VERSION_DASH} cuda-cudart-${CUDA_VERSION_DASH} cuda-nvrtc-${CUDA_VERSION_DASH} cuda-cuobjdump-${CUDA_VERSION_DASH} libcurand-dev-${CUDA_VERSION_DASH} libcublas-dev-${CUDA_VERSION_DASH} libnuma-dev numactl && NCCL_VER=$(apt-cache madison libnccl-dev | grep "+cuda${CUDA_VERSION_SHORT}" | head -1 | awk -F'|' '{gsub(/^ +| +$/, "", $2); print $2}') && apt-get install -y --no-install-recommends --allow-change-held-packages libnccl-dev=${NCCL_VER} libnccl2=${NCCL_VER} && rm -rf /var/lib/apt/lists/* # buildkit

RUN |5 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c python3 -m pip install uv # buildkit

ENV UV_HTTP_TIMEOUT=500

ENV UV_INDEX_STRATEGY=unsafe-best-match

ENV UV_LINK_MODE=copy

ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0

ARG PYTORCH_CUDA_INDEX_BASE_URL

COPY requirements/common.txt /tmp/common.txt # buildkit

COPY requirements/cuda.txt /tmp/requirements-cuda.txt # buildkit

RUN |6 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl /bin/sh -c if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; fi && uv pip install --system -r /tmp/requirements-cuda.txt --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && rm /tmp/requirements-cuda.txt /tmp/common.txt # buildkit

ARG FLASHINFER_VERSION=0.6.8.post1

RUN |7 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 /bin/sh -c uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit

ARG GDRCOPY_CUDA_VERSION=12.8

ARG GDRCOPY_OS_VERSION=Ubuntu22_04

ARG TARGETPLATFORM

COPY tools/install_gdrcopy.sh /tmp/install_gdrcopy.sh # buildkit

RUN |10 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 /bin/sh -c set -eux; case "${TARGETPLATFORM}" in linux/arm64) UUARCH="aarch64" ;; linux/amd64) UUARCH="x64" ;; *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; esac; /tmp/install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}" && rm /tmp/install_gdrcopy.sh # buildkit

ARG BITSANDBYTES_VERSION_X86=0.46.1

ARG BITSANDBYTES_VERSION_ARM64=0.42.0

ARG TIMM_VERSION=>=1.0.17

ARG RUNAI_MODEL_STREAMER_VERSION=>=0.15.7

RUN |14 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 /bin/sh -c if [ "$TARGETPLATFORM" = "linux/arm64" ]; then BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; else BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; fi; uv pip install --system accelerate modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}" # buildkit

ARG PIP_INDEX_URL UV_INDEX_URL

ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL

ARG PYTORCH_CUDA_INDEX_BASE_URL

ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER

ARG PYTORCH_NIGHTLY

COPY /workspace/torch_lib_versions.txt torch_lib_versions.txt # buildkit

RUN |22 CUDA_VERSION=13.0.2 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.6.8.post1 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 BITSANDBYTES_VERSION_X86=0.46.1 BITSANDBYTES_VERSION_ARM64=0.42.0 TIMM_VERSION=>=1.0.17 RUNAI_MODEL_STREAMER_VERSION=>=0.15.7 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled PYTORCH_NIGHTLY= /bin/sh -c if [ "${PYTORCH_NIGHTLY}" = "1" ]; then echo "Installing torch nightly..." && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && echo "Installing vLLM..." && uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); else echo "Installing vLLM..." && uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); fi # buildkit

ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64

ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64

COPY examples examples # buildkit

COPY benchmarks benchmarks # buildkit

COPY ./vllm/collect_env.py . # buildkit

ARG TARGETPLATFORM

ARG INSTALL_KV_CONNECTORS=false

ARG CUDA_VERSION

ARG VLLM_BUILD_COMMIT

ARG VLLM_BUILD_PIPELINE

ARG VLLM_BUILD_URL

ARG VLLM_IMAGE_TAG

ARG PIP_INDEX_URL UV_INDEX_URL

ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL

ENV UV_HTTP_TIMEOUT=500

ARG torch_cuda_arch_list=7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX

ENV TORCH_CUDA_ARCH_LIST=7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX

RUN |12 TARGETPLATFORM=linux/amd64 INSTALL_KV_CONNECTORS=true CUDA_VERSION=13.0.2 VLLM_BUILD_COMMIT=ad7125a431e176d4161099480a66f0169609a690 VLLM_BUILD_PIPELINE=019d130e-464e-4ff7-b84b-492992c0c06b VLLM_BUILD_URL=https://buildkite.com/vllm/release-v2/builds/1649 VLLM_IMAGE_TAG=vllm/vllm-openai:v0.21.0 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= torch_cuda_arch_list=7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX /bin/sh -c CUDA_MAJOR="${CUDA_VERSION%%.*}"; CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); CUDA_HOME=/usr/local/cuda; BUILD_PKGS="libcusparse-dev-${CUDA_VERSION_DASH} libcublas-dev-${CUDA_VERSION_DASH} libcusolver-dev-${CUDA_VERSION_DASH}"; if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( apt-get update -y && apt-get install -y --no-install-recommends --allow-change-held-packages ${BUILD_PKGS} && uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && apt-get purge -y ${BUILD_PKGS} && rm -rf /var/lib/apt/lists/* ); uv pip install --system --force-reinstall --no-deps nixl-cu${CUDA_MAJOR}; fi # buildkit

ENV VLLM_USAGE_SOURCE=production-docker-image

ENV VLLM_BUILD_COMMIT=ad7125a431e176d4161099480a66f0169609a690 VLLM_BUILD_PIPELINE=019d130e-464e-4ff7-b84b-492992c0c06b VLLM_BUILD_URL=https://buildkite.com/vllm/release-v2/builds/1649 VLLM_IMAGE_TAG=vllm/vllm-openai:v0.21.0

LABEL org.opencontainers.image.source=https://github.com/vllm-project/vllm org.opencontainers.image.revision=ad7125a431e176d4161099480a66f0169609a690 org.opencontainers.image.version=vllm/vllm-openai:v0.21.0 org.opencontainers.image.url=https://buildkite.com/vllm/release-v2/builds/1649 ai.vllm.build.commit=ad7125a431e176d4161099480a66f0169609a690 ai.vllm.build.pipeline=019d130e-464e-4ff7-b84b-492992c0c06b ai.vllm.build.url=https://buildkite.com/vllm/release-v2/builds/1649 ai.vllm.image.tag=vllm/vllm-openai:v0.21.0

ENTRYPOINT ["vllm" "serve"]

WORKDIR /opt/inference

ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/lib/python3.12/dist-packages/nvidia/cublas/lib

COPY xinference/deploy/docker/requirements /tmp/docker-requirements # buildkit

COPY xinference/deploy/docker/prepare-virtualenv-wheelhouse.sh /usr/local/bin/prepare-virtualenv-wheelhouse.sh # buildkit

COPY xinference/deploy/docker/generate-virtualenv-requirements.py /usr/local/bin/generate-virtualenv-requirements.py # buildkit

ARG LLAMA_CPP_USE_CUDA=true

ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

ARG XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128

ARG PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130

ARG GITHUB_PROXY_PREFIX=https://gh-proxy.org/

ARG XINFERENCE_PREPARE_VENV_WHEELHOUSE=true

ARG INSTALL_KOKORO_EN_SPACY_MODELS=false

ARG SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download

ENV XINFERENCE_WHEELHOUSE_DIR=/opt/xinference-wheelhouse XINFERENCE_ENABLE_VIRTUAL_ENV=1 XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED=1 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ PIP_FIND_LINKS=/opt/xinference-wheelhouse UV_FIND_LINKS=/opt/xinference-wheelhouse

RUN |8 LLAMA_CPP_USE_CUDA=true PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple XLLAMACPP_INDEX_URL=https://xorbitsai.github.io/xllamacpp/whl/cu128 PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true INSTALL_KOKORO_EN_SPACY_MODELS=false SPACY_MODEL_BASE_URL=https://github.com/explosion/spacy-models/releases/download /bin/sh -c mkdir -p "$XINFERENCE_WHEELHOUSE_DIR" && pip install --upgrade -i "$PIP_INDEX" pip "setuptools<81" wheel && chmod +x /usr/local/bin/prepare-virtualenv-wheelhouse.sh /usr/local/bin/generate-virtualenv-requirements.py && apt-get -y update && ( wget -O openfst-1.7.2.tar.gz http://www.openslr.org/resources/2/openfst-1.7.2.tar.gz || wget -O openfst-1.7.2.tar.gz https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.7.2.tar.gz ) && tar zxvf openfst-1.7.2.tar.gz && cd openfst-1.7.2 && ./configure --enable-shared --enable-static && make -j"$(nproc)" && make install && ldconfig && CPLUS_INCLUDE_PATH=/usr/local/include LIBRARY_PATH=/usr/local/lib LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH pip install -i "$PIP_INDEX" pynini==2.1.6.post1 && pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && pip install -i "$PIP_INDEX" "cython>=0.29" && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-base.txt && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-ml.txt && pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /tmp/docker-requirements/requirements-models.txt && if [ "$INSTALL_KOKORO_EN_SPACY_MODELS" = "true" ]; then pip install "${SPACY_MODEL_BASE_URL}/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl" "${SPACY_MODEL_BASE_URL}/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"; fi && pip install -i "$PIP_INDEX" transformers==5.5.0 && pip install -i "$PIP_INDEX" --no-deps sglang==0.5.6 && pip install -i "$PIP_INDEX" sgl-kernel==0.3.18.post2 && pip install -i "$PIP_INDEX" wetext && pip uninstall flashinfer -y && pip install -i "$PIP_INDEX" SQLAlchemy==1.4.54 && pip download --dest "$XINFERENCE_WHEELHOUSE_DIR" --prefer-binary --index-url "$XLLAMACPP_INDEX_URL" xllamacpp && pip install --no-index --find-links "$XINFERENCE_WHEELHOUSE_DIR" xllamacpp && apt-get -yq clean && rm -rf /var/lib/apt/lists/* openfst-1.7.2 openfst-1.7.2.tar.gz && pip cache purge # buildkit

COPY xinference/model/llm/llm_family.json /tmp/xinference-model/llm_family.json # buildkit

COPY xinference/model/embedding/model_spec.json /tmp/xinference-model/embedding_model_spec.json # buildkit

COPY xinference/model/rerank/model_spec.json /tmp/xinference-model/rerank_model_spec.json # buildkit

COPY xinference/model/image/model_spec.json /tmp/xinference-model/image_model_spec.json # buildkit

COPY xinference/model/audio/model_spec.json /tmp/xinference-model/audio_model_spec.json # buildkit

COPY xinference/model/video/model_spec.json /tmp/xinference-model/video_model_spec.json # buildkit

ENTRYPOINT []

CMD ["/bin/bash"]

ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

ARG PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130

ARG GITHUB_PROXY_PREFIX=https://gh-proxy.org/

ARG XINFERENCE_PREPARE_VENV_WHEELHOUSE=true

ENV NEXT_TELEMETRY_DISABLED=1 XINFERENCE_BACKEND_HOST=127.0.0.1 XINFERENCE_BACKEND_PORT=9998 XINFERENCE_FRONTEND_HOST=0.0.0.0 XINFERENCE_FRONTEND_PORT=9997 XINFERENCE_INTERNAL_API_URL=http://127.0.0.1:9998 XINFERENCE_WHEELHOUSE_DIR=/opt/xinference-wheelhouse XINFERENCE_ENABLE_VIRTUAL_ENV=1 XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED=1 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ PIP_FIND_LINKS=/opt/xinference-wheelhouse UV_FIND_LINKS=/opt/xinference-wheelhouse

WORKDIR /opt/inference

COPY . /opt/inference # buildkit

COPY xinference/deploy/docker/requirements /tmp/docker-requirements # buildkit

COPY xinference/deploy/docker/prepare-virtualenv-wheelhouse.sh /usr/local/bin/prepare-virtualenv-wheelhouse.sh # buildkit

COPY xinference/deploy/docker/generate-virtualenv-requirements.py /usr/local/bin/generate-virtualenv-requirements.py # buildkit

RUN |4 PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true /bin/sh -c chmod +x /usr/local/bin/prepare-virtualenv-wheelhouse.sh /usr/local/bin/generate-virtualenv-requirements.py && if [ "$XINFERENCE_PREPARE_VENV_WHEELHOUSE" = "true" ] && [ ! -s /opt/xinference-wheelhouse/.wheelhouse-ready ]; then XINFERENCE_MODEL_ROOT=/opt/inference/xinference/model PIP_INDEX="$PIP_INDEX" PIP_EXTRA_INDEX_URLS="$PIP_EXTRA_INDEX_URLS" GITHUB_PROXY_PREFIX="$GITHUB_PROXY_PREFIX" /usr/local/bin/prepare-virtualenv-wheelhouse.sh; fi && rm -rf /tmp/docker-requirements && pip cache purge # buildkit

COPY /usr/local/bin/node /usr/local/bin/node20 # buildkit

COPY /workspace/frontend/.next/standalone /opt/inference/frontend-runtime # buildkit

COPY /workspace/frontend/.next/static /opt/inference/frontend-runtime/.next/static # buildkit

COPY /workspace/frontend/public /opt/inference/frontend-runtime/public # buildkit

COPY xinference/deploy/docker/start-next-stack.sh /usr/local/bin/start-next-stack.sh # buildkit

COPY xinference/deploy/docker/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor-wrapper.sh # buildkit

RUN |4 PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple PIP_EXTRA_INDEX_URLS=https://xorbitsai.github.io/xllamacpp/whl/cu128 https://wheels.vllm.ai/0.14.0/cu130 https://download.pytorch.org/whl/cu130 GITHUB_PROXY_PREFIX=https://gh-proxy.org/ XINFERENCE_PREPARE_VENV_WHEELHOUSE=true /bin/sh -c mv /usr/local/bin/xinference-supervisor /usr/local/bin/xinference-supervisor-backend && cp /usr/local/bin/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor && chmod +x /usr/local/bin/start-next-stack.sh /usr/local/bin/xinference-supervisor-wrapper.sh /usr/local/bin/xinference-supervisor # buildkit

EXPOSE map[9997/tcp:{}]

ENTRYPOINT ["/usr/local/bin/start-next-stack.sh"]

Key	Value
ai.vllm.build.commit	ad7125a431e176d4161099480a66f0169609a690
ai.vllm.build.pipeline	019d130e-464e-4ff7-b84b-492992c0c06b
ai.vllm.build.url	https://buildkite.com/vllm/release-v2/builds/1649
ai.vllm.image.tag	vllm/vllm-openai:v0.21.0
maintainer	NVIDIA CORPORATION <cudatools@nvidia.com>
org.opencontainers.image.ref.name	ubuntu
org.opencontainers.image.revision	560d56aeb1ae944cbfe9a3d1cd7f3cba31959414
org.opencontainers.image.source	https://gitea.gzitvs.cn/bluefish/xinference
org.opencontainers.image.url	https://buildkite.com/vllm/release-v2/builds/1649
org.opencontainers.image.version	vllm/vllm-openai:v0.21.0

Details

Container

2026-06-21 18:41:24 +08:00

128

OCI / Docker

Versions (2) View all

main

2026-06-21

base-gpu

2026-06-21

xinference (main)

Installation

Images

Image Layers ( linux/amd64)

Labels