Skip to content

Commit b4c36ea

Browse files
committed
fix: build vLLM from source for ARM64 CUDA 13 (NVIDIA DGX)
The prebuilt vLLM ARM64 wheels have ABI incompatibility with PyTorch CUDA 13 nightly builds. For ARM64 with CUDA 13 (e.g., NVIDIA DGX GB300 Blackwell, DGX GB200): - Install CUDA toolkit 13.0 for compilation - Use PyTorch nightly with cu130 support - Build vLLM from source to ensure ABI compatibility Add VLLM_ARM64_BUILD_FROM_SOURCE build arg (default: true) to allow opting out of source builds for faster build times on non-CUDA 13 systems. Also: - Update AMD64 wheel path to manylinux_2_35 (required for cu130) - Bump vLLM to 0.15.1 Signed-off-by: Dorin Geman <dorin.geman@docker.com>
1 parent e9d05d6 commit b4c36ea

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

Dockerfile

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,26 +85,54 @@ ENTRYPOINT ["/app/model-runner"]
8585
# --- vLLM variant ---
8686
FROM llamacpp AS vllm
8787

88-
ARG VLLM_VERSION=0.12.0
88+
ARG VLLM_VERSION=0.15.1
8989
ARG VLLM_CUDA_VERSION=cu130
9090
ARG VLLM_PYTHON_TAG=cp38-abi3
9191
ARG TARGETARCH
92+
# Build vLLM from source on ARM64 for CUDA 13 compatibility (e.g., NVIDIA DGX).
93+
# Set to "false" to use prebuilt wheels instead (faster build, but may not work on CUDA 13).
94+
ARG VLLM_ARM64_BUILD_FROM_SOURCE=true
9295

9396
USER root
9497

95-
RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/*
98+
# Install build dependencies including CUDA toolkit for compiling vLLM from source on ARM64
99+
# Note: Base image already has CUDA repo configured, just install cuda-toolkit directly
100+
RUN apt update && apt install -y \
101+
python3 python3-venv python3-dev \
102+
curl ca-certificates build-essential \
103+
git cmake ninja-build \
104+
&& if [ "$(uname -m)" = "aarch64" ] && [ "$VLLM_ARM64_BUILD_FROM_SOURCE" = "true" ]; then \
105+
apt install -y cuda-toolkit-13-0; \
106+
fi \
107+
&& rm -rf /var/lib/apt/lists/*
108+
109+
# Set CUDA paths for ARM64 builds
110+
ENV PATH=/usr/local/cuda-13.0/bin:$PATH
111+
ENV LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH
96112

97113
RUN mkdir -p /opt/vllm-env && chown -R modelrunner:modelrunner /opt/vllm-env
98114

99115
USER modelrunner
100116

101117
# Install uv and vLLM as modelrunner user
118+
# For AMD64: Use prebuilt CUDA 13 wheels (PyTorch pulled as dependency)
119+
# For ARM64 with VLLM_ARM64_BUILD_FROM_SOURCE=true: Build from source against PyTorch nightly
120+
# For ARM64 with VLLM_ARM64_BUILD_FROM_SOURCE=false: Use prebuilt wheel (old behavior)
102121
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
103122
&& ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
104123
&& if [ "$TARGETARCH" = "amd64" ]; then \
105-
WHEEL_ARCH="manylinux_2_31_x86_64"; \
124+
WHEEL_ARCH="manylinux_2_35_x86_64"; \
106125
WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl"; \
107126
~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL"; \
127+
elif [ "$VLLM_ARM64_BUILD_FROM_SOURCE" = "true" ]; then \
128+
~/.local/bin/uv pip install --python /opt/vllm-env/bin/python \
129+
torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130 \
130+
&& git clone --depth 1 --branch v${VLLM_VERSION} https://github.com/vllm-project/vllm.git /tmp/vllm \
131+
&& cd /tmp/vllm \
132+
&& /opt/vllm-env/bin/python use_existing_torch.py \
133+
&& ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python -r requirements/build.txt \
134+
&& VLLM_TARGET_DEVICE=cuda ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python . --no-build-isolation \
135+
&& rm -rf /tmp/vllm; \
108136
else \
109137
~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"; \
110138
fi

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ DOCKER_TARGET ?= final-llamacpp
1313
PORT := 8080
1414
MODELS_PATH := $(shell pwd)/models-store
1515
LLAMA_ARGS ?=
16+
EXTRA_DOCKER_BUILD_ARGS ?=
1617
DOCKER_BUILD_ARGS := \
1718
--load \
1819
--platform linux/$(shell docker version --format '{{.Server.Arch}}') \
@@ -84,11 +85,11 @@ lint:
8485

8586
# Build Docker image
8687
docker-build:
87-
docker buildx build $(DOCKER_BUILD_ARGS) .
88+
docker buildx build $(DOCKER_BUILD_ARGS) $(EXTRA_DOCKER_BUILD_ARGS) .
8889

8990
# Build multi-platform Docker image
9091
docker-build-multiplatform:
91-
docker buildx build --platform linux/amd64,linux/arm64 $(DOCKER_BUILD_ARGS) .
92+
docker buildx build --platform linux/amd64,linux/arm64 $(DOCKER_BUILD_ARGS) $(EXTRA_DOCKER_BUILD_ARGS) .
9293

9394
# Run in Docker container with TCP port access and mounted model storage
9495
docker-run: docker-build

0 commit comments

Comments
 (0)