Skip to content

Commit ad2faee

Browse files
committed
fix: pre-compile vllm-metal paged_ops extension in tarball build
vllm-metal v0.2.0 JIT-compiles a paged_ops C++ extension using clang++ at runtime. This fails inside the macOS sandbox which blocks compiler invocations. Instead, compile the extension during the tarball build (where Xcode CLT is available) and ship the .so in a prebuilt/ dir. At install time, model-runner copies the pre-built .so into the user's ~/.cache/vllm-metal/ cache directory. vllm-metal's build.py sees the cached .so is newer than the sources and skips JIT compilation. This also reverts the include/ directory preservation since the Python headers are only needed for compilation, which now happens at build time.
1 parent 7e37085 commit ad2faee

2 files changed

Lines changed: 33 additions & 2 deletions

File tree

pkg/inference/backends/vllm/vllm_metal.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,29 @@ func (v *vllmMetal) downloadAndExtract(ctx context.Context, _ *http.Client) erro
176176
return fmt.Errorf("failed to make python3 executable: %w", err)
177177
}
178178

179+
// Copy pre-built Metal kernel extension to the user's cache directory
180+
// so vllm-metal skips JIT compilation at runtime (the macOS sandbox
181+
// blocks clang++ invocations needed by the JIT compiler).
182+
homeDir, err := os.UserHomeDir()
183+
if err == nil {
184+
cacheDir := filepath.Join(homeDir, ".cache", "vllm-metal")
185+
prebuiltDir := filepath.Join(v.installDir, "prebuilt")
186+
if entries, readErr := os.ReadDir(prebuiltDir); readErr == nil {
187+
if mkErr := os.MkdirAll(cacheDir, 0755); mkErr == nil {
188+
for _, entry := range entries {
189+
src := filepath.Join(prebuiltDir, entry.Name())
190+
dst := filepath.Join(cacheDir, entry.Name())
191+
if data, cpErr := os.ReadFile(src); cpErr == nil {
192+
if wErr := os.WriteFile(dst, data, 0755); wErr != nil {
193+
v.log.Warn("failed to copy prebuilt extension", "file", entry.Name(), "error", wErr)
194+
}
195+
}
196+
}
197+
v.log.Info("Copied pre-built Metal kernel extension to cache", "cacheDir", cacheDir)
198+
}
199+
}
200+
}
201+
179202
v.log.Info("vllm-metal installed successfully", "version", vllmMetalVersion)
180203
return nil
181204
}

scripts/build-vllm-metal-tarball.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,18 @@ curl -fsSL -O "$VLLM_METAL_WHEEL_URL"
7070
uv pip install --python "$PYTHON_DIR/bin/python3" --system vllm_metal-*.whl
7171
rm -f vllm_metal-*.whl
7272

73+
# Pre-compile the paged_ops Metal kernel extension so users don't need Xcode CLT
74+
# at runtime (the macOS sandbox blocks clang++ invocations). build.py caches the
75+
# compiled .so under ~/.cache/vllm-metal/; we redirect $HOME so the artefact
76+
# lands in a known temp location we can bundle into the tarball.
77+
echo "Pre-compiling vllm-metal paged_ops extension..."
78+
HOME="$WORK_DIR" "$PYTHON_DIR/bin/python3" -c "from vllm_metal.metal.build import build; build()"
79+
mkdir -p "$PYTHON_DIR/prebuilt"
80+
cp "$WORK_DIR/.cache/vllm-metal/"*_paged_ops* "$PYTHON_DIR/prebuilt/"
81+
7382
# Strip files not needed at runtime to reduce tarball size
7483
echo "Stripping unnecessary files..."
75-
# Keep include/python3.12 (needed by vllm-metal to compile Metal kernels at runtime)
76-
find "$PYTHON_DIR/include" -mindepth 1 -maxdepth 1 ! -name 'python3.12' -exec rm -rf {} + 2>/dev/null || true
84+
rm -rf "$PYTHON_DIR/include"
7785
rm -rf "$PYTHON_DIR/share"
7886
PYLIB="$PYTHON_DIR/lib/python3.12"
7987
rm -rf "$PYLIB/test" "$PYLIB/tests"

0 commit comments

Comments
 (0)