docker · ilopezluna · Jun 10, 2026 · Jun 11, 2026 · gemini-code-assist · Jun 11, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -311,13 +311,22 @@ jobs:
           password: ${{ secrets.ORG_ACCESS_TOKEN }}
 
       - name: Set up Buildx
+        id: buildx
         uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5  # v4.1.0
         with:
           version: "lab:latest"
           driver: cloud
           endpoint: "docker/make-product-smarter"
           install: true
 
+      # Purge the shared cloud builder's cache before building. The release
+      # builds 7 image variants (cpu/cuda on amd64+arm64) on one cloud builder,
+      # and accumulated cache from previous runs eventually fills its disk —
+      # surfacing as "no space left on device" while unpacking the (growing)
+      # upstream llama.cpp image snapshots. Starting clean avoids that.
+      - name: Free build cache on cloud builder
+        run: docker buildx prune -af --builder ${{ steps.buildx.outputs.name }}
+
       - name: Build CPU image
         uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf
         with:

diff --git a/.versions b/.versions
@@ -5,4 +5,4 @@ VLLM_UPSTREAM_VERSION=0.19.0
 VLLM_METAL_RELEASE=v0.2.0-20260420-142150
 DIFFUSERS_RELEASE=v0.1.0-20260216-000000
 SGLANG_VERSION=0.5.6
-LLAMA_SERVER_VERSION=b9501
+LLAMA_SERVER_VERSION=b9592
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,9 @@
 # syntax=docker/dockerfile:1
 
 ARG GO_VERSION=1.25
-ARG LLAMA_SERVER_VERSION=b9501
+ARG LLAMA_SERVER_VERSION=b9592
 ARG LLAMA_SERVER_VARIANT=cpu
-ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9501
+ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9592
-ARG LLAMA_SERVER_VERSION=b9592
-ARG LLAMA_SERVER_VARIANT=cpu
-ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9501
-ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9592
+ARG LLAMA_SERVER_VERSION=b9592
+ARG LLAMA_SERVER_VARIANT=cpu
+ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-${LLAMA_SERVER_VERSION}
-ARG LLAMA_SERVER_VERSION=b9592
-ARG LLAMA_SERVER_VARIANT=cpu
-ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9501
-ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b9592
+ARG LLAMA_SERVER_VERSION=b9592
+ARG LLAMA_SERVER_VARIANT=cpu
+ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-${LLAMA_SERVER_VERSION}
 
 ARG VERSION=dev
 

diff --git a/llamacpp/native/vendor/llama.cpp b/llamacpp/native/vendor/llama.cpp