From 90532bd03de575a98a28ab48aa53f2c9be1491ad Mon Sep 17 00:00:00 2001 From: Benjamin Knofe-Vider Date: Mon, 1 Jun 2026 11:18:45 +0200 Subject: [PATCH] build: bundle iceberg extension in worker images Iceberg extension was downloaded on-demand at first use, unlike httpfs, ducklake, json, and postgres_scanner which the Dockerfiles pre-seed into the bundled extension cache. That on-demand INSTALL silently blocks the iceberg-only tenant activation past the ~60s activate-tenant deadline (observed on mw-dev with the per-step logging from #642: count-catalogs completes in ~1ms, load-iceberg-extension never returns, worker is retired at 60.7s with no DuckDB-level error). Iceberg+DuckLake tenants don't hit it because LoadExtensions(delta) runs first and primes DuckDB's extension subsystem. Bundle iceberg the same way as the others: curl the .duckdb_extension.gz from ${DUCKDB_EXTENSION_REPOSITORY} at build time, gunzip into /build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/, and add it to the size-check loop. Applies to both the standalone Dockerfile and Dockerfile.worker so worker pods get a local cache hit on LoadExtensions("iceberg") instead of a CDN fetch. Eliminates the iceberg-only activation timeout and brings the activation cost of LoadExtensions("iceberg") in line with the other bundled extensions for every iceberg-using tenant (lakekeeper + s3tables backends alike). --- Dockerfile | 4 +++- Dockerfile.worker | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index adf4471f..dd775835 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,9 @@ RUN : "${DUCKDB_EXTENSION_VERSION:?must be set}" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${POSTGRES_SCANNER_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" \ - && for f in httpfs ducklake json postgres_scanner; do \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension" \ + && for f in httpfs ducklake json postgres_scanner iceberg; do \ [ -s "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/$f.duckdb_extension" ] \ || { echo "ERROR: $f.duckdb_extension is empty after fetch" >&2; exit 1; }; \ done diff --git a/Dockerfile.worker b/Dockerfile.worker index d62b638c..736e5f0b 100644 --- a/Dockerfile.worker +++ b/Dockerfile.worker @@ -120,7 +120,9 @@ RUN : "${DUCKDB_EXTENSION_VERSION:?must be set}" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \ && curl -fsSL "${POSTGRES_SCANNER_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" \ - && for f in httpfs ducklake json postgres_scanner; do \ + && curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension" \ + && for f in httpfs ducklake json postgres_scanner iceberg; do \ [ -s "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/$f.duckdb_extension" ] \ || { echo "ERROR: $f.duckdb_extension is empty after fetch" >&2; exit 1; }; \ done