From b50b4d0ae45f6761858def9c6b77c3072521a969 Mon Sep 17 00:00:00 2001 From: Gabriele Battimelli Date: Thu, 4 Jun 2026 12:50:12 +0200 Subject: [PATCH] Cap jixia concurrency to avoid OOM in the load step The 'Load jixia data into PostgreSQL' step was OOM-killed deterministically ~85 modules in (runner lost, post-steps skipped, no error logged), while processing the most Mathlib-heavy modules. jixia_py defaults to CPUs+4 parallel workers; each loads ~2-3 GB of Mathlib, exceeding the 16 GB runner. Make the worker count configurable via JIXIA_MAX_WORKERS and set it to 2 in the weekly index workflow. --- .github/workflows/weekly-index.yml | 3 +++ database/jixia_db.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/weekly-index.yml b/.github/workflows/weekly-index.yml index 17071c7..11c78bb 100644 --- a/.github/workflows/weekly-index.yml +++ b/.github/workflows/weekly-index.yml @@ -14,6 +14,9 @@ jobs: JIXIA_REPO: https://github.com/frenzymath/jixia MODULE_NAMES: Physlib DRY_RUN: 'false' + # Each jixia worker loads ~2-3 GB of Mathlib; cap concurrency so the + # runner (16 GB) doesn't get OOM-killed during the load step. + JIXIA_MAX_WORKERS: '2' CHROMA_PATH: chroma CONNECTION_STRING: ${{ secrets.DATABASE_URL }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} diff --git a/database/jixia_db.py b/database/jixia_db.py index f57e4f6..1601fd5 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -176,12 +176,18 @@ def topological_sort(): with conn.cursor() as cursor: lean_sysroot = Path(os.environ["LEAN_SYSROOT"]) lean_src = lean_sysroot / "src" / "lean" + # Each jixia worker loads the full Mathlib environment (~2-3 GB), so the + # default thread count (CPUs + 4) can exhaust memory and get the process + # OOM-killed. Cap it via JIXIA_MAX_WORKERS in memory-constrained CI. + max_workers_env = os.environ.get("JIXIA_MAX_WORKERS") + max_workers = int(max_workers_env) if max_workers_env else None all_modules = [] for d in project.root, lean_src: results = project.batch_run_jixia( base_dir=d, prefixes=prefixes, plugins=["module", "declaration", "symbol"], + max_workers=max_workers, ) modules = [r[0] for r in results] load_module(modules, d)