From b0e682efefe9f36b38012496096a699bffa2a984 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 15:26:32 +0000 Subject: [PATCH 01/16] ci(docker): publish multi-arch (amd64 + arm64) images to GHCR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror FLASHApp's split-build / manifest-merge approach so both linux/amd64 and linux/arm64 are published for the full and simple variants. Existing `-full` / `-simple` / `latest` tags become multi-arch manifests — k8s overlays, docker-compose users, and direct `docker pull` callers transparently get the right arch. Dockerfile.arm (delta from Dockerfile): - aarch64 miniforge installer - conditional THIRDPARTY/Linux/aarch64 copy (some OpenMS releases ship an empty/missing aarch64 dir) - pruned thirdparty PATH to tools that actually have ARM builds: LuciPHOr2, MSGFPlus, ThermoRawFileParser, Comet, Percolator, Sage Dockerfile_simple.arm (delta from Dockerfile_simple): - aarch64 miniforge installer only — pyOpenMS ships aarch64 wheels on PyPI, so `pip install -r requirements.txt` works as-is The shared docker/entrypoint.sh is reused as-is on ARM: its apptainer/read-only-root handling is arch-neutral and worth keeping. Base stays ubuntu:22.04 (Redis 6.0 predates the ARM64-COW-BUG warning, so no `--ignore-warnings` flag needed). Workflow changes (build-and-test.yml): - `build` renamed `build-amd64`; per-arch tags carry `-amd64`. - New `build-arm64` job runs on `ubuntu-24.04-arm`, builds the `.arm` Dockerfiles for both variants, ends with a pull-back + /_stcore/health probe on push events. - New `create-manifest` job stitches `--amd64` + `--arm64` into multi-arch `-` and `latest` manifests. - test-apptainer / test-nginx / test-traefik / publish-apptainer keep consuming the amd64 artifact only. SIF publishing stays amd64-only this iteration. - PRs build both arches (registry cache keeps warm runs cheap) but don't push; manifest creation also skipped on PRs. Branch-protection note: the `build` required check is renamed to `build-amd64`. Admins should update protected-branch rules and add `build-arm64` / `create-manifest` if those should also be required. --- .github/workflows/build-and-test.yml | 185 +++++++++++++++++++++-- Dockerfile.arm | 211 +++++++++++++++++++++++++++ Dockerfile_simple.arm | 127 ++++++++++++++++ 3 files changed, 513 insertions(+), 10 deletions(-) create mode 100644 Dockerfile.arm create mode 100644 Dockerfile_simple.arm diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 6ed2406..5ca863a 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -38,7 +38,10 @@ jobs: kubectl kustomize k8s/overlays/prod/ | \ kubeconform -summary -strict -kubernetes-version 1.28.0 -skip IngressRoute - build: + build-amd64: + # amd64 path. Produces per-arch tags `--amd64`; the + # multi-arch manifest under `-` (and `latest`) is stitched + # together in `create-manifest` once the sibling `build-arm64` succeeds. needs: lint-manifests runs-on: ubuntu-latest permissions: @@ -75,22 +78,23 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=ref,event=branch,suffix=-${{ matrix.variant }} - type=ref,event=tag,suffix=-${{ matrix.variant }} - type=sha,prefix=,suffix=-${{ matrix.variant }} - type=raw,value=latest,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + type=ref,event=branch,suffix=-${{ matrix.variant }}-amd64 + type=ref,event=tag,suffix=-${{ matrix.variant }}-amd64 + type=sha,prefix=,suffix=-${{ matrix.variant }}-amd64 + type=raw,value=latest-amd64,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - name: Build and conditionally push uses: docker/build-push-action@v5 with: context: . file: ${{ matrix.dockerfile }} + platforms: linux/amd64 load: true push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }} - cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2},mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-amd64 + cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-amd64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} @@ -111,13 +115,174 @@ jobs: path: /tmp/image.tar retention-days: 1 + build-arm64: + # arm64 path. Runs on a native ARM64 runner (no QEMU). Produces per-arch + # tags `--arm64`; gets merged into the multi-arch manifest + # under `-` by the `create-manifest` job below. The build + # uses a separate `Dockerfile.arm` / `Dockerfile_simple.arm` that swaps + # the miniforge installer to aarch64 and (for the full variant) guards + # the THIRDPARTY/Linux/aarch64 copy. Apptainer/nginx/traefik integration + # tests still run only on the amd64 artifact — those gates do not need + # arch duplication right now (HPC consumers of the SIF are amd64). + needs: lint-manifests + runs-on: ubuntu-24.04-arm + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - variant: full + dockerfile: Dockerfile.arm + - variant: simple + dockerfile: Dockerfile_simple.arm + steps: + - name: Free disk space + # OpenMS source build needs ~25 GB of scratch space; the ARM runner + # image is tighter than the AMD one out of the box. Mirrors what + # FLASHApp's publish-docker-images.yml does at the top of its ARM job. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + + - uses: actions/checkout@v4 + + - name: Compute lowercase image name (OCI refs must be lowercase) + run: echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >> "$GITHUB_ENV" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch,suffix=-${{ matrix.variant }}-arm64 + type=ref,event=tag,suffix=-${{ matrix.variant }}-arm64 + type=sha,prefix=,suffix=-${{ matrix.variant }}-arm64 + type=raw,value=latest-arm64,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + + - name: Build and conditionally push + uses: docker/build-push-action@v5 + with: + context: . + file: ${{ matrix.dockerfile }} + platforms: linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-arm64 + cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-arm64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} + provenance: false + build-args: | + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} + + - name: Smoke test the just-pushed arm64 image + # PRs build (validates Dockerfile.arm parses + compiles) but don't + # push, so there's nothing to pull back on PR events. On push/tag, + # pull the just-published image and verify /_stcore/health to catch + # entrypoint regressions that wouldn't surface in the build itself. + if: github.event_name != 'pull_request' + run: | + set -euo pipefail + IMAGE_REF="${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:${{ github.sha }}-${{ matrix.variant }}-arm64" + echo "Smoke-testing $IMAGE_REF" + docker pull "$IMAGE_REF" + docker run -d --rm --name smoketest -p 8501:8501 "$IMAGE_REF" + for i in $(seq 1 90); do + if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8501/_stcore/health; then + echo "Streamlit healthy after ${i} attempts" + docker stop smoketest + exit 0 + fi + sleep 2 + done + echo "ERROR: /_stcore/health never returned 200" + docker logs smoketest || true + docker stop smoketest || true + exit 1 + + create-manifest: + # Stitch the per-arch tags into multi-arch manifest lists. The manifest + # tags reuse the OLD scheme (`-`, `latest`) so existing + # consumers (k8s overlays, docker-compose users, `docker pull` callers) + # keep working transparently — docker now auto-selects the right arch + # on pull. PRs don't push per-arch tags, so there's nothing to merge. + needs: [build-amd64, build-arm64] + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + variant: [full, simple] + steps: + - name: Compute lowercase image name + run: echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >> "$GITHUB_ENV" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute manifest tags + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # NB: no -amd64/-arm64 suffix here. These are the multi-arch + # manifest names; they must match the pre-arm64 tag scheme so + # `:main-full`, `:v1.0.0-full`, `:latest` continue to resolve. + tags: | + type=ref,event=branch,suffix=-${{ matrix.variant }} + type=ref,event=tag,suffix=-${{ matrix.variant }} + type=sha,prefix=,suffix=-${{ matrix.variant }} + type=raw,value=latest,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + + - name: Create and push multi-arch manifests + # Iterate over manifest tags (newline-separated from metadata-action) + # and merge the matching `-amd64` / `-arm64` per-arch tags into each. + # `--amend` makes the step idempotent across workflow_dispatch reruns. + # `docker manifest push` accepts only one ref per invocation, hence + # the loop. + run: | + set -euo pipefail + while IFS= read -r manifest_tag; do + [ -z "$manifest_tag" ] && continue + amd_tag="${manifest_tag}-amd64" + arm_tag="${manifest_tag}-arm64" + echo "Creating manifest ${manifest_tag} from:" + echo " amd: ${amd_tag}" + echo " arm: ${arm_tag}" + docker manifest create "$manifest_tag" \ + --amend "$amd_tag" \ + --amend "$arm_tag" + docker manifest push "$manifest_tag" + done <<< "${{ steps.meta.outputs.tags }}" + test-apptainer: # Apptainer/Singularity is the dominant container runtime on HPC clusters. # It mounts the root filesystem read-only and runs as the host user's UID # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false @@ -335,7 +500,7 @@ jobs: done <<< "${{ steps.meta.outputs.tags }}" test-nginx: - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false @@ -422,7 +587,7 @@ jobs: done test-traefik: - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/Dockerfile.arm b/Dockerfile.arm new file mode 100644 index 0000000..8571bc5 --- /dev/null +++ b/Dockerfile.arm @@ -0,0 +1,211 @@ +# This Dockerfile builds OpenMS, the TOPP tools, pyOpenMS and thidparty tools. +# It also adds a basic streamlit server that serves a pyOpenMS-based app. +# hints: +# build image and give it a name (here: streamlitapp) with: docker build -f Dockerfile.arm --no-cache -t streamlitapp:latest-arm64 --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log +# check if image was build: docker image ls +# run container: docker run -p 8501:8501 streamlitappsimple:latest +# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell +# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force + +FROM ubuntu:22.04 AS setup-build-system +ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git +ARG OPENMS_BRANCH=release/3.5.0 +ARG PORT=8501 +# Streamlit app GitHub user name (to download artifact from). +ARG GITHUB_USER=OpenMS +# Streamlit app GitHub repository name (to download artifact from). +ARG GITHUB_REPO=streamlit-template + +USER root + +# Install required Ubuntu packages. +RUN apt-get -y update +RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron +RUN update-ca-certificates +RUN apt-get install -y --no-install-recommends --no-install-suggests libsvm-dev libeigen3-dev coinor-libcbc-dev libglpk-dev libzip-dev zlib1g-dev libxerces-c-dev libbz2-dev libomp-dev libhdf5-dev +RUN apt-get install -y --no-install-recommends --no-install-suggests libboost-date-time1.74-dev \ + libboost-iostreams1.74-dev \ + libboost-regex1.74-dev \ + libboost-math1.74-dev \ + libboost-random1.74-dev +RUN apt-get install -y --no-install-recommends --no-install-suggests qt6-base-dev libqt6svg6-dev libqt6opengl6-dev libqt6openglwidgets6 libgl-dev + +# Install Github CLI +RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \ + && mkdir -p -m 755 /etc/apt/keyrings \ + && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ + && apt-get update \ + && apt-get install gh -y + +# Download and install miniforge. +ENV PATH="/root/miniforge3/bin:${PATH}" +RUN wget -q \ + https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh \ + && bash Miniforge3-Linux-aarch64.sh -b \ + && rm -f Miniforge3-Linux-aarch64.sh +RUN mamba --version + +# Make /root traversable so the entrypoint can `source +# /root/miniforge3/bin/activate ...` when the container runs as a non-root +# user (apptainer/singularity maps the host UID into the container; the +# default ubuntu /root is 0700 which would block path traversal). +x only, +# not +r, so the directory listing remains private. +RUN chmod o+x /root + +# Setup mamba environment. +RUN mamba create -n streamlit-env python=3.10 +RUN echo "mamba activate streamlit-env" >> ~/.bashrc +SHELL ["/bin/bash", "--rcfile", "~/.bashrc"] +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +# Install up-to-date cmake via mamba and packages for pyOpenMS build. +RUN mamba install cmake +RUN pip install --upgrade pip && python -m pip install -U setuptools nose cython "autowrap<=0.24" pandas numpy pytest + +# Clone OpenMS branch and the associcated contrib+thirdparties+pyOpenMS-doc submodules. +RUN git clone --recursive --depth=1 -b ${OPENMS_BRANCH} --single-branch ${OPENMS_REPO} && cd /OpenMS + +# Pull Linux compatible third-party dependencies and store them in directory thirdparty. +WORKDIR /OpenMS +RUN mkdir /thirdparty && \ + git submodule update --init THIRDPARTY && \ + cp -r THIRDPARTY/All/* /thirdparty && \ + if [ -d "THIRDPARTY/Linux/aarch64" ]; then \ + cp -r THIRDPARTY/Linux/aarch64/* /thirdparty; \ + fi && \ + chmod -R +x /thirdparty +ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Percolator:/thirdparty/Sage:${PATH}" + +# Build OpenMS and pyOpenMS. +FROM setup-build-system AS compile-openms +WORKDIR / + +# Set up build directory. +RUN mkdir /openms-build +WORKDIR /openms-build + +# Configure. +RUN /bin/bash -c "cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF -DPYOPENMS=ON ../OpenMS -DPY_MEMLEAK_DISABLE=On" + +# Build TOPP tools and clean up. +RUN make -j4 TOPP +RUN rm -rf src doc CMakeFiles + +# Build pyOpenMS wheels and install via pip. +RUN make -j4 pyopenms +WORKDIR /openms-build/pyOpenMS +RUN pip install dist/*.whl + +# Install other dependencies (excluding pyopenms) +COPY requirements.txt ./requirements.txt +RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +RUN pip install -r requirements.txt + +WORKDIR / +RUN mkdir openms + +# Copy TOPP tools bin directory, add to PATH. +RUN cp -r openms-build/bin /openms/bin +ENV PATH="/openms/bin/:${PATH}" + +# Copy TOPP tools bin directory, add to PATH. +RUN cp -r openms-build/lib /openms/lib +ENV LD_LIBRARY_PATH="/openms/lib/:${LD_LIBRARY_PATH}" + +# Copy share folder, add to PATH, remove source directory. +RUN cp -r OpenMS/share/OpenMS /openms/share +RUN rm -rf OpenMS +ENV OPENMS_DATA_PATH="/openms/share/" + +# Remove build directory. +RUN rm -rf openms-build + +# Prepare and run streamlit app. +FROM compile-openms AS run-app + +# Install Redis server for job queue and nginx for load balancing. +# Redis data lives under $RUNTIME_DIR at runtime (see entrypoint.sh) so no +# /var/lib/redis setup is needed - that path is not writable under Apptainer. +RUN apt-get update && apt-get install -y --no-install-recommends redis-server nginx \ + && rm -rf /var/lib/apt/lists/* + +# Create Redis data directory. Default 0755 root-owned is enough: the docker +# entrypoint runs as root (can write regardless of mode), and the apptainer +# entrypoint relocates Redis state to /tmp/openms-runtime-* so this dir is +# never written under apptainer. +RUN mkdir -p /var/lib/redis + +# Pre-create bind-mount targets so apptainer/singularity has a real attach +# point. Docker auto-creates missing `-v` targets, but singularity uses a +# read-only underlay and silently ignores `:rw` when the target isn't a +# real directory in the SIF — writes then fail with EROFS even though the +# host bind path is writable. Pre-creating these directories costs one +# inode each and changes nothing in docker mode (the user's volume mount +# shadows them). +RUN mkdir -p /workspaces-streamlit-template /mounted-data + +# Create workdir and copy over all streamlit related files/folders. + +# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure +WORKDIR /app +COPY assets/ /app/assets +COPY content/ /app/content +COPY docs/ /app/docs +COPY example-data/ /app/example-data +COPY gdpr_consent/ /app/gdpr_consent +COPY hooks/ /app/hooks +COPY src/ /app/src +COPY utils/ /app/utils +COPY app.py /app/app.py +COPY settings.json /app/settings.json +COPY default-parameters.json /app/default-parameters.json +COPY presets.json /app/presets.json + +# For streamlit configuration +COPY .streamlit/ /app/.streamlit/ +COPY clean-up-workspaces.py /app/clean-up-workspaces.py + +# add cron job to the crontab +RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - + +# Set default worker count (can be overridden via environment variable) +ENV RQ_WORKER_COUNT=1 +ENV REDIS_URL=redis://localhost:6379/0 + +# Number of Streamlit server instances for load balancing (default: 1 = no load balancer) +# Set to >1 to enable nginx load balancer with multiple Streamlit instances +ENV STREAMLIT_SERVER_COUNT=1 + +# Install the apptainer-compatible entrypoint that starts cron (when the root +# FS is writable), Redis, RQ workers, optional nginx load balancer, and the +# Streamlit server. The script falls back to /tmp paths under apptainer. +COPY docker/entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Patch Analytics +RUN mamba run -n streamlit-env python hooks/hook-analytics.py + +# Set Online Deployment +RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json + +# Point the in-app mounted-drive browser at the conventional bind-mount path. +# The browser only renders when this directory exists at runtime, i.e. when +# the user starts the container with `-v /host/path:/mounted-data`. +RUN jq '.local_data_dir = "/mounted-data"' settings.json > tmp.json && mv tmp.json settings.json + +# Download latest OpenMS App executable as a ZIP file. +# ARG declared here (not at the top) — otherwise the per-run token busts the cache. +ARG GITHUB_TOKEN +RUN if [ -n "$GITHUB_TOKEN" ]; then \ + echo "GITHUB_TOKEN is set, proceeding to download the release asset..."; \ + gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \ + else \ + echo "GITHUB_TOKEN is not set, skipping the release asset download."; \ + fi + + +# Run app as container entrypoint. +EXPOSE $PORT +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm new file mode 100644 index 0000000..be57317 --- /dev/null +++ b/Dockerfile_simple.arm @@ -0,0 +1,127 @@ +# This Dockerfile creates a container with pyOpenMS +# It also adds a basic streamlit server that serves a pyOpenMS-based app. +# hints: +# build image with: docker build -f Dockerfile_simple.arm --no-cache -t streamlitapp:latest-arm64 --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log +# check if image was build: docker image ls +# run container: docker run -p 8501:8501 streamlitapp:latest +# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell +# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force + +FROM ubuntu:22.04 AS stage1 +ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git +ARG OPENMS_BRANCH=develop +ARG PORT=8501 +# Streamlit app GitHub user name (to download artifact from). +ARG GITHUB_USER=OpenMS +# Streamlit app GitHub repository name (to download artifact from). +ARG GITHUB_REPO=streamlit-template + + +# Step 1: set up a sane build system +USER root + +RUN apt-get -y update +# note: streamlit in docker needs libgtk2.0-dev (see https://yugdamor.medium.com/importerror-libgthread-2-0-so-0-cannot-open-shared-object-file-no-such-file-or-directory-895b94a7827b) +RUN apt-get install -y --no-install-recommends --no-install-suggests wget ca-certificates libgtk2.0-dev curl jq cron nginx +RUN update-ca-certificates + +# Install Github CLI +RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \ + && mkdir -p -m 755 /etc/apt/keyrings \ + && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ + && apt-get update \ + && apt-get install gh -y + +# Download and install miniforge. +ENV PATH="/root/miniforge3/bin:${PATH}" +RUN wget -q \ + https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh \ + && bash Miniforge3-Linux-aarch64.sh -b \ + && rm -f Miniforge3-Linux-aarch64.sh +RUN mamba --version + +# Make /root traversable so the entrypoint can `source +# /root/miniforge3/bin/activate ...` when the container runs as a non-root +# user (apptainer/singularity maps the host UID into the container; the +# default ubuntu /root is 0700 which would block path traversal). +x only, +# not +r, so the directory listing remains private. +RUN chmod o+x /root + +# Setup mamba environment. +RUN mamba create -n streamlit-env python=3.10 +RUN echo "mamba activate streamlit-env" >> ~/.bashrc +SHELL ["/bin/bash", "--rcfile", "~/.bashrc"] +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +#################################### install streamlit +# install packages +COPY requirements.txt requirements.txt +RUN mamba install pip +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt + +# Pre-create bind-mount targets so apptainer/singularity has a real attach +# point. Docker auto-creates missing `-v` targets, but singularity uses a +# read-only underlay and silently ignores `:rw` when the target isn't a +# real directory in the SIF — writes then fail with EROFS even though the +# host bind path is writable. +RUN mkdir -p /workspaces-streamlit-template /mounted-data + +# create workdir and copy over all streamlit related files/folders +WORKDIR /app +# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure +WORKDIR /app +COPY assets/ /app/assets +COPY content/ /app/content +COPY docs/ /app/docs +COPY example-data/ /app/example-data +COPY gdpr_consent/ /app/gdpr_consent +COPY hooks/ /app/hooks +COPY src/ /app/src +COPY utils/ /app/utils +COPY app.py /app/app.py +COPY settings.json /app/settings.json +COPY default-parameters.json /app/default-parameters.json +COPY presets.json /app/presets.json + +# For streamlit configuration +COPY .streamlit/ /app/.streamlit/ + +COPY clean-up-workspaces.py /app/clean-up-workspaces.py + +# add cron job to the crontab +RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - + +# Number of Streamlit server instances for load balancing (default: 1 = no load balancer) +# Set to >1 to enable nginx load balancer with multiple Streamlit instances +ENV STREAMLIT_SERVER_COUNT=1 + +# Install the apptainer-compatible entrypoint (shared with the full image). +# The script auto-skips the Redis/RQ section when redis-server is not +# installed, so it works equally well in the simple variant. +COPY docker/entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Patch Analytics +RUN mamba run -n streamlit-env python hooks/hook-analytics.py + +# Set Online Deployment +RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json + +# Download latest OpenMS App executable as a ZIP file. +# ARG declared here (not at the top) — otherwise the per-run token busts the cache. +ARG GITHUB_TOKEN +RUN if [ -n "$GITHUB_TOKEN" ]; then \ + echo "GITHUB_TOKEN is set, proceeding to download the release asset..."; \ + gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \ + else \ + echo "GITHUB_TOKEN is not set, skipping the release asset download."; \ + fi + +# make sure that mamba environment is used +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +EXPOSE $PORT +ENTRYPOINT ["/app/entrypoint.sh"] From d32c1b9cbc97c4dbf5e2111273416bb6b0e6b6d1 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 19:43:35 +0000 Subject: [PATCH 02/16] ci(docker): extend apptainer/nginx/traefik tests to cover arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the apptainer/nginx/traefik integration tests only ran against the amd64 artifact, so the arm64 image was validated solely by its build succeeding plus a post-push /_stcore/health probe. Now all three integration matrices fan out over arch=[amd64, arm64] with a matrix-driven runs-on, exercising the read-only-root apptainer contract and both kind-based ingress paths on a native ARM runner too. Changes: - `build-amd64` artifact renamed from `openms-streamlit--image` to `openms-streamlit--amd64-image` for symmetry. - `build-arm64` now also `load: true`'s the built image, retags to the kind-friendly `openms-streamlit:test`, saves it as a tar, and uploads it as `openms-streamlit--arm64-image`. The post-push pull-back smoke test is removed — the new apptainer/ nginx/traefik runs subsume it and avoid the slow GHCR pull. - `test-apptainer`, `test-nginx`, `test-traefik` matrices switched from `variant: [full, simple]` to an `include:` list with {variant, arch, runner} tuples; `runs-on: ${{ matrix.runner }}` selects `ubuntu-latest` for amd64 and `ubuntu-24.04-arm` for arm64. Artifact download names get `${{ matrix.arch }}` interpolated. - SIF upload at the tail of `test-apptainer` gated on `matrix.arch == 'amd64'`: arm64 still runs the full apptainer contract end-to-end, but only amd64 produces the SIF that `publish-apptainer` ships to GHCR (HPC SIF consumers are amd64). Note on `publish-apptainer`: it stays on `needs: test-apptainer`, which now waits for the arm64 matrix entries too — meaning an arm64 apptainer regression will block amd64 SIF publishing. Conservative on purpose; happy to decouple via separate jobs if that turns out to be too strict in practice. --- .github/workflows/build-and-test.yml | 115 +++++++++++++++++---------- 1 file changed, 74 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 5ca863a..012291f 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -111,7 +111,7 @@ jobs: - name: Upload image artifact uses: actions/upload-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-amd64-image path: /tmp/image.tar retention-days: 1 @@ -121,9 +121,9 @@ jobs: # under `-` by the `create-manifest` job below. The build # uses a separate `Dockerfile.arm` / `Dockerfile_simple.arm` that swaps # the miniforge installer to aarch64 and (for the full variant) guards - # the THIRDPARTY/Linux/aarch64 copy. Apptainer/nginx/traefik integration - # tests still run only on the amd64 artifact — those gates do not need - # arch duplication right now (HPC consumers of the SIF are amd64). + # the THIRDPARTY/Linux/aarch64 copy. The built image is also uploaded as + # an artifact so the apptainer / nginx / traefik integration jobs can + # exercise the ARM image on a native ARM runner (matrix arch=arm64). needs: lint-manifests runs-on: ubuntu-24.04-arm permissions: @@ -180,6 +180,7 @@ jobs: context: . file: ${{ matrix.dockerfile }} platforms: linux/arm64 + load: true push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} @@ -189,30 +190,22 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Smoke test the just-pushed arm64 image - # PRs build (validates Dockerfile.arm parses + compiles) but don't - # push, so there's nothing to pull back on PR events. On push/tag, - # pull the just-published image and verify /_stcore/health to catch - # entrypoint regressions that wouldn't surface in the build itself. - if: github.event_name != 'pull_request' + - name: Retag for kind (stable local tag) run: | - set -euo pipefail - IMAGE_REF="${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:${{ github.sha }}-${{ matrix.variant }}-arm64" - echo "Smoke-testing $IMAGE_REF" - docker pull "$IMAGE_REF" - docker run -d --rm --name smoketest -p 8501:8501 "$IMAGE_REF" - for i in $(seq 1 90); do - if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8501/_stcore/health; then - echo "Streamlit healthy after ${i} attempts" - docker stop smoketest - exit 0 - fi - sleep 2 - done - echo "ERROR: /_stcore/health never returned 200" - docker logs smoketest || true - docker stop smoketest || true - exit 1 + # load:true above loaded all meta-action tags into local docker. + # Retag the first one to the stable name the kustomize overlay expects. + FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) + docker tag "$FIRST_TAG" openms-streamlit:test + + - name: Save image as tar + run: docker save openms-streamlit:test -o /tmp/image.tar + + - name: Upload image artifact + uses: actions/upload-artifact@v4 + with: + name: openms-streamlit-${{ matrix.variant }}-arm64-image + path: /tmp/image.tar + retention-days: 1 create-manifest: # Stitch the per-arch tags into multi-arch manifest lists. The manifest @@ -282,19 +275,31 @@ jobs: # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Install apptainer @@ -424,8 +429,12 @@ jobs: if: always() run: apptainer instance stop openms-test || true - - name: Upload validated SIF artifact (push events only) - if: success() && github.event_name != 'pull_request' + - name: Upload validated SIF artifact (amd64 push events only) + # SIF publishing stays amd64-only this iteration (HPC consumers of + # the SIF are amd64). The arm64 matrix entry still exercises the + # full apptainer contract end-to-end; it just doesn't upload the + # resulting SIF for downstream publishing. + if: success() && github.event_name != 'pull_request' && matrix.arch == 'amd64' uses: actions/upload-artifact@v4 with: name: openms-streamlit-${{ matrix.variant }}-sif @@ -500,19 +509,31 @@ jobs: done <<< "${{ steps.meta.outputs.tags }}" test-nginx: - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Load image into local docker @@ -587,19 +608,31 @@ jobs: done test-traefik: - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Load image into local docker From 1d73b6726e8286c27cc3771bfaf63fb691352458 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:07:21 +0000 Subject: [PATCH 03/16] fix(arm): use two-pass cmake so TOPP links against system libstdc++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARM build of `make -j4 TOPP` failed at the link step with /usr/bin/ld: /root/miniforge3/lib/libyaml-cpp.so.0.8: undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32' The conda-forge libyaml-cpp wheel for aarch64 is built against GLIBCXX_3.4.32 (gcc 13+), but Ubuntu 22.04's system g++ ships with an older libstdc++. Running cmake inside the mamba shell lets it discover /root/miniforge3/lib first, so the conda-forge yaml-cpp gets linked into every TOPP binary and breaks. amd64 happens to work because the conda-forge amd64 yaml-cpp build is older. Fix mirrors FLASHApp's Dockerfile.arm: configure OpenMS in two cmake passes — pass 1 under plain `/bin/bash` with `-DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3` so cmake resolves C++ deps from the system tree (libyaml-cpp from contrib, boost from apt, etc.); pass 2 under `mamba run` with `-DPYOPENMS=ON` so the Python bindings still find conda-forge Python / Cython / NumPy. The IGNORE_PREFIX_PATH flag is repeated on pass 2 to keep the cached C++ link command unchanged. Only Dockerfile.arm changes; Dockerfile (amd64) keeps its single-pass cmake to avoid disturbing the working x86 path. --- Dockerfile.arm | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 8571bc5..2e9a6ab 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -86,8 +86,21 @@ WORKDIR / RUN mkdir /openms-build WORKDIR /openms-build -# Configure. -RUN /bin/bash -c "cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF -DPYOPENMS=ON ../OpenMS -DPY_MEMLEAK_DISABLE=On" +# Configure (two-pass — mirrors FLASHApp.arm). +# Pass 1 runs under plain bash so cmake does NOT search /root/miniforge3 +# when resolving C++ system dependencies. On ARM the conda-forge build of +# libyaml-cpp.so.0.8 is linked against a newer libstdc++ (GLIBCXX_3.4.32, +# i.e. gcc 13+) than ubuntu:22.04's system g++ ships, so letting cmake +# pick the miniforge yaml-cpp makes TOPP linking fail with +# undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32` +# amd64 happens to work because its conda-forge yaml-cpp build is older. +# Pass 2 re-runs cmake inside the mamba env with PYOPENMS=ON so the Python +# bindings can find the conda-forge Python/Cython/NumPy; CMAKE_IGNORE_PREFIX_PATH +# keeps the C++ link command unchanged from pass 1. +SHELL ["/bin/bash", "-c"] +RUN cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] +RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 . # Build TOPP tools and clean up. RUN make -j4 TOPP From f11bc99fd559bc79ed54e3849a2a7f0be4487f0c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:13:37 +0000 Subject: [PATCH 04/16] fix(arm): install cmake via apt so pass-1 cmake is on plain bash PATH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two-pass cmake split from 1d73b67 runs pass 1 under `SHELL ["/bin/bash", "-c"]`, but the only cmake on the image is the one from `mamba install cmake` at /root/miniforge3/envs/streamlit-env/bin/cmake — not on plain bash's PATH. Result: exit 127 (command not found) the moment pass 1 invokes cmake. FLASHApp.arm sidesteps this by installing cmake via apt; do the same here (just append `cmake` to the existing apt-get install line). The mamba cmake install stays, so pass 2 under the mamba shell continues to use the conda-forge cmake exactly as it did before. Ubuntu 22.04 ships cmake 3.22, comfortably above OpenMS 3.5's 3.15 floor. --- Dockerfile.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 2e9a6ab..e0bbd0f 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -20,7 +20,7 @@ USER root # Install required Ubuntu packages. RUN apt-get -y update -RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron +RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron cmake RUN update-ca-certificates RUN apt-get install -y --no-install-recommends --no-install-suggests libsvm-dev libeigen3-dev coinor-libcbc-dev libglpk-dev libzip-dev zlib1g-dev libxerces-c-dev libbz2-dev libomp-dev libhdf5-dev RUN apt-get install -y --no-install-recommends --no-install-suggests libboost-date-time1.74-dev \ From 5185c3e0ddd8ade74fd6264ea26d0a07aa077508 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:24:52 +0000 Subject: [PATCH 05/16] fix(arm): call mamba cmake by full path in pass 1 (apt 3.22 is too old) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix (install cmake via apt) didn't actually help: OpenMS 3.5's CMakeLists.txt requires cmake >= 3.24, and Ubuntu 22.04's apt cmake is 3.22.1, which fails configure with CMake Error at src/openms/extern/CMakeLists.txt:11 (cmake_minimum_required): CMake 3.24 or higher is required. You are running version 3.22.1 That's exactly why the existing x86 Dockerfile installs cmake via mamba (the conda-forge build is 3.30+). FLASHApp.arm escapes this by using ubuntu:24.04 (apt cmake 3.28); we stay on 22.04 to minimize churn vs. the working x86 Dockerfile. Fix: in pass 1, call the mamba-env cmake by its full path `/root/miniforge3/envs/streamlit-env/bin/cmake`. The plain-bash SHELL is still in effect, so cmake doesn't pick up any conda-forge environment side effects, and CMAKE_IGNORE_PREFIX_PATH keeps it from auto-discovering miniforge libraries during find_package. The cmake binary itself runs against miniforge's libstdc++, but that's a runtime detail of cmake — it doesn't leak into the configured project's link command. The apt cmake addition from f11bc99 is now redundant but harmless; leaving it in place to keep this diff focused. --- Dockerfile.arm | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index e0bbd0f..53d99d9 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -94,11 +94,15 @@ WORKDIR /openms-build # pick the miniforge yaml-cpp makes TOPP linking fail with # undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32` # amd64 happens to work because its conda-forge yaml-cpp build is older. +# We call the mamba-env cmake by full path so we get a version >= 3.24 +# (OpenMS 3.5's floor); ubuntu:22.04's apt cmake is 3.22 which is too old. +# CMAKE_IGNORE_PREFIX_PATH keeps cmake from auto-discovering miniforge libs +# even though the binary itself lives there. # Pass 2 re-runs cmake inside the mamba env with PYOPENMS=ON so the Python # bindings can find the conda-forge Python/Cython/NumPy; CMAKE_IGNORE_PREFIX_PATH # keeps the C++ link command unchanged from pass 1. SHELL ["/bin/bash", "-c"] -RUN cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS +RUN /root/miniforge3/envs/streamlit-env/bin/cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 . From 0bab3ae16d64dbc5bfa5b7248c77f711b2348263 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:31:53 +0000 Subject: [PATCH 06/16] ci: free disk space at the start of each integration test job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two failures in the previous run (test-traefik full, test-nginx simple) ended with the runner reporting "No space left on device" while flushing its diagnostic log. ubuntu-latest starts with ~14 GB free; downloading the full image artifact (5-8 GB), loading it into docker (decompressed, larger), pulling kind's node image, then loading the OCI tar into the kind cluster easily exceeds that budget. Mirror the cleanup already used by `build-arm64`: drop the runner's preinstalled dotnet / android SDK / ghc / hostedtoolcache to recover ~30 GB. Same step now runs at the top of test-apptainer, test-nginx, and test-traefik on both amd64 (ubuntu-latest) and arm64 (ubuntu-24.04-arm) matrix entries — the arm runner is at least as tight as amd64. --- .github/workflows/build-and-test.yml | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 012291f..ae256dc 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -296,6 +296,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: @@ -530,6 +540,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: @@ -629,6 +649,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: From 4790d46f8a835788ea5104fa1a49e61187020455 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:59:34 +0000 Subject: [PATCH 07/16] fix(arm): keep CMakeFiles/ between make TOPP and make pyopenms After the two-pass cmake configure landed in 5185c3e, the next attempt got past `make -j4 TOPP` (the link error is fixed) but failed fast in `make -j4 pyopenms` with: CMake Error: Not a file: /openms-build/CMakeFiles/VerifyGlobs.cmake CMake Error: Error processing file: /openms-build/CMakeFiles/VerifyGlobs.cmake make: *** [Makefile:11553: cmake_check_build_system] Error 1 `VerifyGlobs.cmake` is generated by cmake for `file(GLOB CONFIGURE_DEPENDS ...)` targets and is consulted by `cmake_check_build_system` at the top of every subsequent `make` invocation. The intermediate cleanup line RUN rm -rf src doc CMakeFiles deleted it, which is fine on the x86 single-pass build (different cmake codepath when PYOPENMS=ON is set in the initial configure, no VerifyGlobs.cmake generated) but breaks the ARM two-pass build. Stop deleting CMakeFiles/ between `make TOPP` and `make pyopenms`. We still drop `src/` and `doc/` for disk savings; keeping CMakeFiles costs only a few hundred MB on the intermediate layer. --- Dockerfile.arm | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 53d99d9..1765980 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -108,7 +108,16 @@ RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root # Build TOPP tools and clean up. RUN make -j4 TOPP -RUN rm -rf src doc CMakeFiles +# NOTE: do NOT delete CMakeFiles/ here. The two-pass cmake configure used +# above generates CMakeFiles/VerifyGlobs.cmake for the pyOpenMS targets' +# CONFIGURE_DEPENDS globs; the next `make -j4 pyopenms` runs +# `cmake_check_build_system` which fails fast if VerifyGlobs.cmake is gone: +# CMake Error: Not a file: /openms-build/CMakeFiles/VerifyGlobs.cmake +# The x86 single-pass build seems to avoid generating that file (different +# cmake codepath when PYOPENMS is set during the initial configure), which +# is why it can still `rm -rf CMakeFiles` here. CMakeFiles/ adds ~a few +# hundred MB to the intermediate layer — acceptable. +RUN rm -rf src doc # Build pyOpenMS wheels and install via pip. RUN make -j4 pyopenms From c7fdf00070e0d3613c3f12c69cf130f84b299a69 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 22:12:35 +0000 Subject: [PATCH 08/16] =?UTF-8?q?ci(apptainer):=20drop=20arm64=20=E2=80=94?= =?UTF-8?q?=20no=20upstream=20aarch64=20.deb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eWaterCycle/setup-apptainer@v2 installs apptainer from the upstream .deb asset on the GitHub release. Upstream apptainer only publishes amd64 .debs (verified: every v1.3.x release lists only `apptainer__amd64.deb`, no _arm64 / _aarch64 variant). On the ubuntu-24.04-arm runner the action's `apt-get install ./apptainer_*.deb` fails with sudo exit code 100 because the package can't be resolved. Building apptainer from source on the ARM runner would add ~15 minutes and a maintenance surface (Go toolchain, suid configuration) for limited value — HPC SIF consumers remain amd64. Revert test-apptainer to amd64-only and document why. test-nginx and test-traefik still exercise the ARM image via kind, which gives us functional ARM coverage at the docker-runtime level even without apptainer. Side cleanups now that arm64 is gone from this matrix: - artifact name back to a literal `*-amd64-image` (no matrix.arch) - SIF upload gate drops the `matrix.arch == 'amd64'` check --- .github/workflows/build-and-test.yml | 37 +++++++++++----------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index ae256dc..d915583 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -275,24 +275,21 @@ jobs: # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: [build-amd64, build-arm64] - runs-on: ${{ matrix.runner }} + # + # amd64 only: upstream apptainer does NOT publish arm64 .deb assets + # (https://github.com/apptainer/apptainer/releases — every release lists + # only `apptainer__amd64.deb`), so eWaterCycle/setup-apptainer fails + # on ubuntu-24.04-arm with "sudo exit code 100" when its + # `apt-get install ./apptainer_*.deb` resolves a non-existent package. + # Building apptainer from source on the arm runner would add ~15 min and + # significant maintenance surface for limited value (HPC SIF consumers + # remain amd64). Re-evaluate if upstream starts publishing arm64 builds. + needs: build-amd64 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - include: - - variant: full - arch: amd64 - runner: ubuntu-latest - - variant: full - arch: arm64 - runner: ubuntu-24.04-arm - - variant: simple - arch: amd64 - runner: ubuntu-latest - - variant: simple - arch: arm64 - runner: ubuntu-24.04-arm + variant: [full, simple] steps: - uses: actions/checkout@v4 @@ -309,7 +306,7 @@ jobs: - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image + name: openms-streamlit-${{ matrix.variant }}-amd64-image path: /tmp - name: Install apptainer @@ -439,12 +436,8 @@ jobs: if: always() run: apptainer instance stop openms-test || true - - name: Upload validated SIF artifact (amd64 push events only) - # SIF publishing stays amd64-only this iteration (HPC consumers of - # the SIF are amd64). The arm64 matrix entry still exercises the - # full apptainer contract end-to-end; it just doesn't upload the - # resulting SIF for downstream publishing. - if: success() && github.event_name != 'pull_request' && matrix.arch == 'amd64' + - name: Upload validated SIF artifact (push events only) + if: success() && github.event_name != 'pull_request' uses: actions/upload-artifact@v4 with: name: openms-streamlit-${{ matrix.variant }}-sif From f466229e60e41209f50a5d7841e8ab721f08893c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 07:03:16 +0000 Subject: [PATCH 09/16] fix(ci): keep /opt/hostedtoolcache when freeing disk space kind/kubectl/helm setup actions fail with "Cache directory '/opt/hostedtoolcache' does not exist". Drop just dotnet/android/ghc (~34 GB) and leave the tool cache in place. --- .github/workflows/build-and-test.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index d915583..b2c3b31 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -143,7 +143,9 @@ jobs: # image is tighter than the AMD one out of the box. Mirrors what # FLASHApp's publish-docker-images.yml does at the top of its ARM job. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -299,7 +301,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -539,7 +543,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -648,7 +654,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h From f0d1db16c36d71c987f1e2f11160402f63e43803 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 08:20:45 +0000 Subject: [PATCH 10/16] ci: dump cluster state on test-nginx/test-traefik failure curl exit-22 doesn't tell us whether the pod, service, or ingress is the broken link. Dump pods/logs/ingress/controller logs on failure so the next run surfaces the actual cause. --- .github/workflows/build-and-test.yml | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b2c3b31..8fb5450 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -626,6 +626,27 @@ jobs: echo "$host -> 200 OK" done + - name: Dump cluster state on failure + if: failure() + run: | + echo "=== nodes ===" + kubectl get nodes -o wide || true + echo "=== pods (all namespaces) ===" + kubectl get pods -A -o wide || true + echo "=== app pods describe ===" + kubectl describe pod -n openms -l app=${SLUG} || true + echo "=== app pod logs ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix || true + echo "=== app pod previous logs (if crashed) ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix --previous || true + echo "=== ingress ===" + kubectl get ingress -A -o wide || true + kubectl describe ingress -n openms || true + echo "=== services + endpoints ===" + kubectl get svc,endpoints -n openms || true + echo "=== ingress-nginx controller logs ===" + kubectl logs -n ingress-nginx -l app.kubernetes.io/component=controller --tail=200 || true + test-traefik: needs: [build-amd64, build-arm64] runs-on: ${{ matrix.runner }} @@ -741,3 +762,23 @@ jobs: echo "" echo "$host -> 200 OK" done + + - name: Dump cluster state on failure + if: failure() + run: | + echo "=== nodes ===" + kubectl get nodes -o wide || true + echo "=== pods (all namespaces) ===" + kubectl get pods -A -o wide || true + echo "=== app pods describe ===" + kubectl describe pod -n openms -l app=${SLUG} || true + echo "=== app pod logs ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix || true + echo "=== app pod previous logs (if crashed) ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix --previous || true + echo "=== traefik ingressroute ===" + kubectl get ingressroute -A -o yaml || true + echo "=== services + endpoints ===" + kubectl get svc,endpoints -n openms || true + echo "=== traefik controller logs ===" + kubectl logs -n traefik -l app.kubernetes.io/name=traefik --tail=200 || true From 7aadb0656d89e0611bd80fa287f5c3dae681829b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 11:48:30 +0000 Subject: [PATCH 11/16] fix(ci): load images into kind via image-archive, drop docker load \`docker load\` + \`kind load docker-image\` keeps the image in both host docker AND each kind node's containerd. With a 5-8 GB image and two kind nodes that's ~25 GB of duplicated storage, which trips the "no space left on device" error in kind's ctr import. Switch to \`kind load image-archive\` so the tar streams directly into each node, and rm the tar after to reclaim /tmp. --- .github/workflows/build-and-test.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8fb5450..7890072 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -555,9 +555,6 @@ jobs: name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - - name: Load image into local docker - run: docker load -i /tmp/image.tar - - name: Create kind cluster uses: helm/kind-action@v1 with: @@ -565,7 +562,13 @@ jobs: config: .github/kind-config.yaml - name: Load image into kind cluster - run: kind load docker-image openms-streamlit:test --name test-cluster + # Use `kind load image-archive` (not docker-image) so we never store + # the image in host docker. Saves ~5-8 GB on /var/lib/docker. Delete + # the tar afterwards to free the same again on /tmp — the image is + # now in both kind nodes' containerd, which is enough. + run: | + kind load image-archive /tmp/image.tar --name test-cluster + rm -f /tmp/image.tar - name: Install nginx ingress controller run: | @@ -687,9 +690,6 @@ jobs: name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - - name: Load image into local docker - run: docker load -i /tmp/image.tar - - name: Create kind cluster uses: helm/kind-action@v1 with: @@ -697,7 +697,13 @@ jobs: config: .github/kind-config.yaml - name: Load image into kind cluster - run: kind load docker-image openms-streamlit:test --name traefik-test + # Use `kind load image-archive` (not docker-image) so we never store + # the image in host docker. Saves ~5-8 GB on /var/lib/docker. Delete + # the tar afterwards to free the same again on /tmp — the image is + # now in both kind nodes' containerd, which is enough. + run: | + kind load image-archive /tmp/image.tar --name traefik-test + rm -f /tmp/image.tar - name: Set up Helm uses: azure/setup-helm@v4 From 611902101e1eba9e41080b055ec668711e009f24 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 12:04:23 +0000 Subject: [PATCH 12/16] ci: re-trigger workflow run From 88dcdb03a40dccf0e1c6b23ddc4cc994118b6fc2 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 13:18:35 +0000 Subject: [PATCH 13/16] ci: re-trigger workflow run after outage From 39d6d25935fe8e193ba96b2c0d15aa0bbee286b6 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 17:57:30 +0000 Subject: [PATCH 14/16] fix(ci): match kind image to what the kustomize overlay references 503s in test-nginx/test-traefik traced to two issues: 1. The prod overlay maps openms-streamlit -> ghcr.io/openms/streamlit-template:main-full, but the build job was re-tagging the local image as openms-streamlit:test. Rendered manifests pointed at the registry name; kind only had :test loaded; pods stayed ErrImagePull. Retag as :main-full so kind has exactly the ref the manifests use. 2. Three of the four pod specs declare imagePullPolicy: Always; the existing sed only rewrote IfNotPresent. With Always and no registry creds in kind, pods loop on ImagePullBackOff. Extend the sed to catch both. --- .github/workflows/build-and-test.yml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7890072..91b2473 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -98,15 +98,17 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Retag for kind (stable local tag) + - name: Retag for kind (image name the kustomize overlay points at) run: | - # load:true above loaded all meta-action tags into local docker. - # Retag the first one to the stable name the kustomize overlay expects. + # The prod overlay sets `newName: ghcr.io/openms/streamlit-template`, + # `newTag: main-full`. The rendered manifests reference that exact + # ref, so we need it loaded into kind under that name. Tag invariant + # across branches/variants so the test always works. FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) - docker tag "$FIRST_TAG" openms-streamlit:test + docker tag "$FIRST_TAG" ghcr.io/openms/streamlit-template:main-full - name: Save image as tar - run: docker save openms-streamlit:test -o /tmp/image.tar + run: docker save ghcr.io/openms/streamlit-template:main-full -o /tmp/image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 @@ -192,15 +194,17 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Retag for kind (stable local tag) + - name: Retag for kind (image name the kustomize overlay points at) run: | - # load:true above loaded all meta-action tags into local docker. - # Retag the first one to the stable name the kustomize overlay expects. + # The prod overlay sets `newName: ghcr.io/openms/streamlit-template`, + # `newTag: main-full`. The rendered manifests reference that exact + # ref, so we need it loaded into kind under that name. Tag invariant + # across branches/variants so the test always works. FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) - docker tag "$FIRST_TAG" openms-streamlit:test + docker tag "$FIRST_TAG" ghcr.io/openms/streamlit-template:main-full - name: Save image as tar - run: docker save openms-streamlit:test -o /tmp/image.tar + run: docker save ghcr.io/openms/streamlit-template:main-full -o /tmp/image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 @@ -580,7 +584,7 @@ jobs: # Filter out Traefik IngressRoute (kind cluster uses nginx) and force imagePullPolicy=Never kubectl kustomize k8s/overlays/prod/ | \ yq 'select(.kind != "IngressRoute")' | \ - sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ + sed -E 's|imagePullPolicy: (IfNotPresent\|Always)|imagePullPolicy: Never|g' | \ sed 's|storageClassName: cinder-csi|storageClassName: standard|g' > /tmp/manifests.yaml for i in 1 2 3 4 5; do if kubectl apply -f /tmp/manifests.yaml; then @@ -720,7 +724,7 @@ jobs: - name: Deploy with Kustomize (full manifests, no filter) run: | kubectl kustomize k8s/overlays/prod/ | \ - sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ + sed -E 's|imagePullPolicy: (IfNotPresent\|Always)|imagePullPolicy: Never|g' | \ sed 's|storageClassName: cinder-csi|storageClassName: standard|g' > /tmp/manifests.yaml for i in 1 2 3 4 5; do if kubectl apply -f /tmp/manifests.yaml; then From 510fc89ae0d65e895735325196a70688cf481f3f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 16:06:51 +0000 Subject: [PATCH 15/16] fix(ci): disable provenance on build-amd64 to keep pushes single-manifest create-manifest fails with "ghcr.io/openms/streamlit-template:main-full-amd64 is a manifest list" because docker/build-push-action v5 adds a provenance attestation by default, which buildx packs as a manifest list (image + attestation entries). docker manifest create rejects manifest lists as components. build-arm64 already sets provenance: false for the same reason; mirror that on the amd64 path so both per-arch tags are flat image manifests that can be merged into the multi-arch manifest. --- .github/workflows/build-and-test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 85d59be..39dbc80 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -91,6 +91,11 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + # provenance/attestations turn the pushed tag into a manifest list, + # which the create-manifest job's `docker manifest create` then + # refuses ("is a manifest list"). Keep the push as a single-platform + # image manifest — same as the build-arm64 job. + provenance: false cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-amd64 cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-amd64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} build-args: | From 0c29d06218529a5c1d32aa4b59a71cbaed156b22 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 18:59:27 +0000 Subject: [PATCH 16/16] fix(ci): free more disk before kind image load test-traefik failed unpacking the OpenMS image into containerd overlayfs on the kind node: no space left on device while extracting layer /usr/include/boost/json/object.hpp The image is loaded into BOTH kind nodes (control-plane + worker) so the OpenMS source / boost headers / pyopenms layers consume ~2x their unpacked size. The previous cleanup only removed dotnet, android and ghc, leaving the runner well short of what kind's double-extraction needs. Drop the next biggest unused chunks (~16 GB more): - /usr/local/.ghcup (~2.7 GB) - /usr/share/swift (~2 GB) - /usr/local/share/boost (~1.5 GB) - /opt/hostedtoolcache/CodeQL (~5 GB; keep the rest of hostedtoolcache since helm/kind-action and setup-kubectl cache binaries there) - docker image prune (~5-7 GB of pre-installed runner images) Applied uniformly to build-arm64 / test-apptainer / test-nginx / test-traefik since they all eat the same image. --- .github/workflows/build-and-test.yml | 36 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 39dbc80..d533ec9 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -147,8 +147,15 @@ jobs: run: | # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl # cache binaries there and fail if the directory is missing. - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true + # /opt/hostedtoolcache/CodeQL is ~5 GB and not used in these jobs. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/.ghcup /usr/share/swift \ + /usr/local/share/boost \ + /opt/hostedtoolcache/CodeQL || true sudo apt-get clean + # Pre-installed docker images (node, php, mysql, ...) aren't used + # in kind-based tests; reclaim that space too. + sudo docker image prune --all --force || true df -h - uses: actions/checkout@v4 @@ -307,8 +314,15 @@ jobs: run: | # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl # cache binaries there and fail if the directory is missing. - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true + # /opt/hostedtoolcache/CodeQL is ~5 GB and not used in these jobs. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/.ghcup /usr/share/swift \ + /usr/local/share/boost \ + /opt/hostedtoolcache/CodeQL || true sudo apt-get clean + # Pre-installed docker images (node, php, mysql, ...) aren't used + # in kind-based tests; reclaim that space too. + sudo docker image prune --all --force || true df -h - name: Download image artifact @@ -543,8 +557,15 @@ jobs: run: | # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl # cache binaries there and fail if the directory is missing. - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true + # /opt/hostedtoolcache/CodeQL is ~5 GB and not used in these jobs. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/.ghcup /usr/share/swift \ + /usr/local/share/boost \ + /opt/hostedtoolcache/CodeQL || true sudo apt-get clean + # Pre-installed docker images (node, php, mysql, ...) aren't used + # in kind-based tests; reclaim that space too. + sudo docker image prune --all --force || true df -h - name: Download image artifact @@ -672,8 +693,15 @@ jobs: run: | # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl # cache binaries there and fail if the directory is missing. - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true + # /opt/hostedtoolcache/CodeQL is ~5 GB and not used in these jobs. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/.ghcup /usr/share/swift \ + /usr/local/share/boost \ + /opt/hostedtoolcache/CodeQL || true sudo apt-get clean + # Pre-installed docker images (node, php, mysql, ...) aren't used + # in kind-based tests; reclaim that space too. + sudo docker image prune --all --force || true df -h - name: Download image artifact