From 92757f33fc1eadc407ba8d14e82529703c698053 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 19 Mar 2026 15:20:53 -0400 Subject: [PATCH 1/7] ci: add aarch64 build for nemotron-ocr wheel The nemotron_ocr_cpp C++ extension was only built on x86_64 runners, causing ModuleNotFoundError on ARM hosts like the DGX Spark. Matrix the build_ocr_cuda job across x86_64 (ubuntu-latest) and aarch64 (ubuntu-24.04-arm) so pip can install the correct platform wheel on either architecture. Made-with: Cursor --- .github/workflows/huggingface-nightly.yml | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index e285a227c..9f405d0ae 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -126,10 +126,21 @@ jobs: # nemotron-ocr-v1 needs nvcc/CUDA headers to build its extension. # Build with Python 3.12 to match upstream package constraints and # avoid producing an extension for the wrong Python ABI. - runs-on: ubuntu-latest + # Matrix across x86_64 and aarch64 so the wheel works on ARM hosts + # (e.g. DGX Spark) as well as conventional x86 machines. + runs-on: ${{ matrix.platform.runner }} + strategy: + fail-fast: false + matrix: + platform: + - runner: ubuntu-latest + cuda_image: nvidia/cuda:13.0.0-devel-ubuntu24.04 + arch: x86_64 + - runner: ubuntu-24.04-arm + cuda_image: nvidia/cuda:13.0.0-devel-ubuntu24.04 + arch: aarch64 container: - # Build extension with CUDA 13 toolchain (nvcc/headers from devel image). - image: nvidia/cuda:13.0.0-devel-ubuntu24.04 + image: ${{ matrix.platform.cuda_image }} steps: - name: Install system deps (git, lfs, build tools) shell: bash @@ -253,5 +264,5 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: dist-nemotron-ocr-v1 + name: dist-nemotron-ocr-v1-${{ matrix.platform.arch }} path: dist-out/nemotron-ocr-v1/* From 02a445b82d66d711881de49e3bc609b3c2723486 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 19 Mar 2026 15:33:09 -0400 Subject: [PATCH 2/7] ci: strip -mavx2 flag on aarch64 OCR builds The upstream nemotron-ocr build script hard-codes -mavx2 (an x86-only SIMD flag) in its C++ extension compile args. On aarch64 runners this causes an immediate compile failure. Add a generic --strip-cflag option to nightly_build_publish.py that removes quoted occurrences of a given compiler flag from upstream Python build scripts after cloning. The workflow now passes --strip-cflag=-mavx2 when building on aarch64. Made-with: Cursor --- .github/workflows/huggingface-nightly.yml | 7 +++++ ci/scripts/nightly_build_publish.py | 31 +++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index 9f405d0ae..b97083a34 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -212,6 +212,12 @@ jobs: skip_existing_flag="--skip-existing" fi + # -mavx2 is an x86-only SIMD flag; strip it when building on aarch64. + strip_cflag_arg="" + if [[ "$(uname -m)" == "aarch64" ]]; then + strip_cflag_arg="--strip-cflag=-mavx2" + fi + python --version python ci/scripts/nightly_build_publish.py \ --repo-id "nemotron-ocr-v1" \ @@ -226,6 +232,7 @@ jobs: --venv-pip-install "torchvision==0.24.1" \ --build-env "BUILD_CPP_EXTENSION=1" \ --build-env "BUILD_CPP_FORCE=1" \ + ${strip_cflag_arg} \ ${upload_flag} \ --repository-url "${repo_url}" \ --token-env "${token_env}" \ diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index a53a0bf94..fefbca335 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -168,6 +168,26 @@ def _auto_project_subdir(repo_dir: Path, repo_id: str) -> str: return "" +def _strip_cflags(project_dir: Path, flags: list[str]) -> None: + """Remove arch-specific compiler flags from upstream Python build scripts. + + Scans all ``*.py`` files under *project_dir* and deletes quoted + occurrences of each flag (e.g. ``'-mavx2',``). This lets the same + source tree compile on architectures that don't support the flag. + """ + if not flags: + return + for py_file in project_dir.rglob("*.py"): + text = py_file.read_text(encoding="utf-8") + original = text + for flag in flags: + escaped = re.escape(flag) + text = re.sub(rf"""['"]{escaped}['"],?\s*""", "", text) + if text != original: + py_file.write_text(text, encoding="utf-8") + print(f"Stripped {flags} from {py_file.relative_to(project_dir)}") + + def _apply_build_env_overrides(env: dict[str, str], build_env: list[str]) -> dict[str, str]: """ Apply KEY=VALUE overrides to the environment dict. @@ -307,6 +327,13 @@ def main() -> int: default=[], help="Extra packages to pip install into the build venv before building (repeatable)", ) + ap.add_argument( + "--strip-cflag", + action="append", + default=[], + help="Remove a C/C++ compiler flag from upstream build scripts after cloning (repeatable). " + "Useful for stripping arch-specific flags like -mavx2 on non-x86 builds.", + ) ap.add_argument("--upload", action="store_true", help="Upload built dists via twine") ap.add_argument("--repository-url", default="https://test.pypi.org/legacy/", help="Twine repository URL") ap.add_argument("--token-env", default="TEST_PYPI_API_TOKEN", help="Env var containing API token") @@ -337,6 +364,10 @@ def main() -> int: if not patched: print("No static version field found to patch (continuing).") + if args.strip_cflag: + print(f"=== Stripping compiler flags: {args.strip_cflag} ===") + _strip_cflags(project_dir, args.strip_cflag) + print("=== Building sdist + wheel ===") out_dir = dist_root / args.repo_id _ensure_clean_dir(out_dir) From d0b7dc1a40e2879ed33fdf9aa7f175699bfc54b8 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 19 Mar 2026 15:40:41 -0400 Subject: [PATCH 3/7] ci: use upstream ARCH env var instead of --strip-cflag The upstream nemotron-ocr build script already checks ARCH to skip x86-only SIMD flags (-mavx2) on ARM. Pass ARCH=arm64 via --build-env on the aarch64 leg instead of our regex-based --strip-cflag approach. This removes the _strip_cflags machinery in favour of the upstream's own escape hatch. Made-with: Cursor --- .github/workflows/huggingface-nightly.yml | 9 ++++--- ci/scripts/nightly_build_publish.py | 31 ----------------------- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index b97083a34..55e06261e 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -212,10 +212,11 @@ jobs: skip_existing_flag="--skip-existing" fi - # -mavx2 is an x86-only SIMD flag; strip it when building on aarch64. - strip_cflag_arg="" + # The upstream build script checks ARCH to skip x86-only SIMD + # flags (e.g. -mavx2) that are invalid on aarch64. + arch_env_arg="" if [[ "$(uname -m)" == "aarch64" ]]; then - strip_cflag_arg="--strip-cflag=-mavx2" + arch_env_arg="--build-env ARCH=arm64" fi python --version @@ -232,7 +233,7 @@ jobs: --venv-pip-install "torchvision==0.24.1" \ --build-env "BUILD_CPP_EXTENSION=1" \ --build-env "BUILD_CPP_FORCE=1" \ - ${strip_cflag_arg} \ + ${arch_env_arg} \ ${upload_flag} \ --repository-url "${repo_url}" \ --token-env "${token_env}" \ diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index fefbca335..a53a0bf94 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -168,26 +168,6 @@ def _auto_project_subdir(repo_dir: Path, repo_id: str) -> str: return "" -def _strip_cflags(project_dir: Path, flags: list[str]) -> None: - """Remove arch-specific compiler flags from upstream Python build scripts. - - Scans all ``*.py`` files under *project_dir* and deletes quoted - occurrences of each flag (e.g. ``'-mavx2',``). This lets the same - source tree compile on architectures that don't support the flag. - """ - if not flags: - return - for py_file in project_dir.rglob("*.py"): - text = py_file.read_text(encoding="utf-8") - original = text - for flag in flags: - escaped = re.escape(flag) - text = re.sub(rf"""['"]{escaped}['"],?\s*""", "", text) - if text != original: - py_file.write_text(text, encoding="utf-8") - print(f"Stripped {flags} from {py_file.relative_to(project_dir)}") - - def _apply_build_env_overrides(env: dict[str, str], build_env: list[str]) -> dict[str, str]: """ Apply KEY=VALUE overrides to the environment dict. @@ -327,13 +307,6 @@ def main() -> int: default=[], help="Extra packages to pip install into the build venv before building (repeatable)", ) - ap.add_argument( - "--strip-cflag", - action="append", - default=[], - help="Remove a C/C++ compiler flag from upstream build scripts after cloning (repeatable). " - "Useful for stripping arch-specific flags like -mavx2 on non-x86 builds.", - ) ap.add_argument("--upload", action="store_true", help="Upload built dists via twine") ap.add_argument("--repository-url", default="https://test.pypi.org/legacy/", help="Twine repository URL") ap.add_argument("--token-env", default="TEST_PYPI_API_TOKEN", help="Env var containing API token") @@ -364,10 +337,6 @@ def main() -> int: if not patched: print("No static version field found to patch (continuing).") - if args.strip_cflag: - print(f"=== Stripping compiler flags: {args.strip_cflag} ===") - _strip_cflags(project_dir, args.strip_cflag) - print("=== Building sdist + wheel ===") out_dir = dist_root / args.repo_id _ensure_clean_dir(out_dir) From 1ee103dba625326bb104fe3d4ae5ea5f6ba77eae Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 19 Mar 2026 15:59:05 -0400 Subject: [PATCH 4/7] ci: use second-resolution timestamps for nightly dev versions Change the dev suffix from YYYYMMDD to YYYYMMDDHHmmss so multiple builds on the same calendar day each receive a unique, monotonically increasing version that PyPI will accept. The legacy NIGHTLY_DATE_YYYYMMDD env var override is still honoured; a new NIGHTLY_DATE_SUFFIX var is also supported. Made-with: Cursor --- ci/scripts/nightly_build_publish.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index a53a0bf94..10e4e2bb2 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -4,7 +4,7 @@ Behavior: - Clones a HF git repo with Git LFS smudge disabled (so large weights are not downloaded). -- Attempts to append a PEP 440 dev version suffix (YYYYMMDD) to pyproject.toml or setup.cfg. +- Attempts to append a PEP 440 dev version suffix (YYYYMMDDHHmmss) to pyproject.toml or setup.cfg. - Builds sdist + wheel via `python -m build`. - Optionally uploads to (Test)PyPI via twine. @@ -36,12 +36,18 @@ def _write_text(path: Path, text: str) -> None: path.write_text(text, encoding="utf-8") -def _nightly_suffix_yyyymmdd() -> str: - # Allow overriding for reproducibility. - forced = os.environ.get("NIGHTLY_DATE_YYYYMMDD") +def _nightly_suffix() -> str: + """Return a PEP 440 dev suffix that is unique per build. + + Uses ``YYYYMMDDHHmmss`` so multiple builds on the same calendar day + each receive a distinct, monotonically increasing version. + The ``NIGHTLY_DATE_SUFFIX`` (or legacy ``NIGHTLY_DATE_YYYYMMDD``) env + var can override the value for reproducible builds. + """ + forced = os.environ.get("NIGHTLY_DATE_SUFFIX") or os.environ.get("NIGHTLY_DATE_YYYYMMDD") if forced: return forced - return _dt.datetime.now(_dt.UTC).strftime("%Y%m%d") + return _dt.datetime.now(_dt.UTC).strftime("%Y%m%d%H%M%S") def _venv_python(venv_dir: Path) -> Path: @@ -80,17 +86,16 @@ def _ensure_venv(venv_dir: Path, *, system_site_packages: bool) -> Path: return py -def _pep440_nightly(base_version: str, yyyymmdd: str) -> str: +def _pep440_nightly(base_version: str, suffix: str) -> str: """ Convert a base version to a nightly dev version. Examples: - 1.2.3 -> 1.2.3.dev20260127 - 1.2.3+local -> 1.2.3.dev20260127 + 1.2.3 -> 1.2.3.dev20260127031517 + 1.2.3+local -> 1.2.3.dev20260127031517 """ base = base_version.split("+", 1)[0].strip() - # If already has a .dev segment, replace it to keep it monotonic daily. base = re.sub(r"\.dev\d+$", "", base) - return f"{base}.dev{yyyymmdd}" + return f"{base}.dev{suffix}" def _patch_pyproject_version(repo_dir: Path) -> bool: @@ -105,7 +110,7 @@ def _patch_pyproject_version(repo_dir: Path) -> bool: return False old_version = m.group(1) - new_version = _pep440_nightly(old_version, _nightly_suffix_yyyymmdd()) + new_version = _pep440_nightly(old_version, _nightly_suffix()) if new_version == old_version: return False @@ -127,7 +132,7 @@ def _patch_setup_cfg_version(repo_dir: Path) -> bool: return False old_version = m.group(1).strip().strip('"').strip("'") - new_version = _pep440_nightly(old_version, _nightly_suffix_yyyymmdd()) + new_version = _pep440_nightly(old_version, _nightly_suffix()) if new_version == old_version: return False From 57b65a35569570b88bdeb570eb9f729a1bb113eb Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:26:35 +0000 Subject: [PATCH 5/7] Get x86/ARM builds grouped and properly tagged --- .github/workflows/huggingface-nightly.yml | 22 ++++++ ci/scripts/nightly_build_publish.py | 96 ++++++++++++++++++++--- 2 files changed, 109 insertions(+), 9 deletions(-) diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index 55e06261e..2eba2eb45 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -25,7 +25,19 @@ permissions: contents: read jobs: + # One dev suffix for the whole workflow so matrix legs (e.g. OCR x86 + aarch64) share + # the same release. UTC YYYYMMDDHHMMSS + run id allows multiple uploads per day and + # avoids collisions if two runs start in the same second. + nightly_coordinate: + runs-on: ubuntu-latest + outputs: + nightly_date_suffix: ${{ steps.suffix.outputs.nightly_date_suffix }} + steps: + - id: suffix + run: echo "nightly_date_suffix=$(date -u +%Y%m%d%H%M%S)${{ github.run_id }}" >> "$GITHUB_OUTPUT" + build: + needs: nightly_coordinate runs-on: ubuntu-latest strategy: fail-fast: false @@ -80,6 +92,7 @@ jobs: - name: Build (and maybe upload) env: + NIGHTLY_DATE_SUFFIX: ${{ needs.nightly_coordinate.outputs.nightly_date_suffix }} TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} shell: bash @@ -123,6 +136,7 @@ jobs: path: dist-out/${{ matrix.repo.id }}/* build_ocr_cuda: + needs: nightly_coordinate # nemotron-ocr-v1 needs nvcc/CUDA headers to build its extension. # Build with Python 3.12 to match upstream package constraints and # avoid producing an extension for the wrong Python ABI. @@ -183,6 +197,7 @@ jobs: - name: Build nemotron-ocr-v1 (and maybe upload) env: + NIGHTLY_DATE_SUFFIX: ${{ needs.nightly_coordinate.outputs.nightly_date_suffix }} TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} CUDA_HOME: /usr/local/cuda @@ -226,6 +241,7 @@ jobs: --work-dir ".work" \ --dist-dir "dist-out" \ --project-subdir "nemotron-ocr" \ + --hatch-force-platform-wheel \ --build-no-isolation \ --venv-pip-install "hatchling" \ --venv-pip-install "setuptools>=68" \ @@ -265,6 +281,12 @@ jobs: raise SystemExit( f"Built extension ABI does not match runner Python ({py_tag}). Found:\n{formatted}" ) + for wheel in wheels: + if "py3-none-any" in wheel.name: + raise SystemExit( + "Wheel is tagged py3-none-any; pip cannot select x86_64 vs aarch64. " + f"Got: {wheel.name}" + ) print("Verified nemotron_ocr_cpp extension ABI:", py_tag) PY diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index 10e4e2bb2..117188840 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -4,7 +4,8 @@ Behavior: - Clones a HF git repo with Git LFS smudge disabled (so large weights are not downloaded). -- Attempts to append a PEP 440 dev version suffix (YYYYMMDDHHmmss) to pyproject.toml or setup.cfg. +- Patches a PEP 440 dev version into pyproject.toml or setup.cfg (default suffix: UTC + YYYYMMDD; override with NIGHTLY_DATE_SUFFIX or NIGHTLY_DATE_YYYYMMDD, e.g. from CI). - Builds sdist + wheel via `python -m build`. - Optionally uploads to (Test)PyPI via twine. @@ -37,17 +38,16 @@ def _write_text(path: Path, text: str) -> None: def _nightly_suffix() -> str: - """Return a PEP 440 dev suffix that is unique per build. + """Return the numeric part after ``.dev`` in the nightly PEP 440 version. - Uses ``YYYYMMDDHHmmss`` so multiple builds on the same calendar day - each receive a distinct, monotonically increasing version. - The ``NIGHTLY_DATE_SUFFIX`` (or legacy ``NIGHTLY_DATE_YYYYMMDD``) env - var can override the value for reproducible builds. + Default is UTC ``YYYYMMDD`` so parallel matrix legs (e.g. x86_64 + aarch64) publish + under the same version. Override with ``NIGHTLY_DATE_SUFFIX`` or + ``NIGHTLY_DATE_YYYYMMDD`` (e.g. set once per workflow run in CI). """ forced = os.environ.get("NIGHTLY_DATE_SUFFIX") or os.environ.get("NIGHTLY_DATE_YYYYMMDD") if forced: return forced - return _dt.datetime.now(_dt.UTC).strftime("%Y%m%d%H%M%S") + return _dt.datetime.now(_dt.UTC).strftime("%Y%m%d") def _venv_python(venv_dir: Path) -> Path: @@ -90,8 +90,8 @@ def _pep440_nightly(base_version: str, suffix: str) -> str: """ Convert a base version to a nightly dev version. Examples: - 1.2.3 -> 1.2.3.dev20260127031517 - 1.2.3+local -> 1.2.3.dev20260127031517 + 1.2.3 -> 1.2.3.dev20260127 + 1.2.3+local -> 1.2.3.dev20260127 """ base = base_version.split("+", 1)[0].strip() base = re.sub(r"\.dev\d+$", "", base) @@ -120,6 +120,75 @@ def _patch_pyproject_version(repo_dir: Path) -> bool: return True +def _patch_hatch_build_force_platform_wheel(project_dir: Path) -> bool: + """ + Hatchling may emit py3-none-any for extension builds unless the hook sets + build_data[\"pure_python\"] = False and build_data[\"infer_tag\"] = True so the + wheel tag matches the current interpreter/platform. Patch upstream hatch_build.py + when we recognize the Nemotron OCR layout. + """ + path = project_dir / "hatch_build.py" + if not path.is_file(): + return False + text = _read_text(path) + has_pp = 'build_data["pure_python"]' in text or "build_data['pure_python']" in text + has_it = 'build_data["infer_tag"]' in text or "build_data['infer_tag']" in text + if has_pp and has_it: + return False + if "CustomBuildHook" not in text or "def initialize" not in text: + return False + needle = "def initialize(self, version: str, build_data: dict) -> None:" + idx = text.find(needle) + if idx < 0: + return False + body_start = idx + len(needle) + insert_at = body_start + while insert_at < len(text) and text[insert_at] in " \t": + insert_at += 1 + if insert_at < len(text) and text[insert_at] == "\n": + insert_at += 1 + + block = ' build_data["pure_python"] = False\n' ' build_data["infer_tag"] = True\n' + + if not has_pp and not has_it: + patched = text[:insert_at] + block + text[insert_at:] + _write_text(path, patched) + print("Patched hatch_build.py: pure_python=False, infer_tag=True") + return True + + if has_pp and not has_it: + lines = text.splitlines(keepends=True) + out: list[str] = [] + inserted = False + for line in lines: + out.append(line) + if inserted: + continue + if ('build_data["pure_python"]' in line or "build_data['pure_python']" in line) and "False" in line: + out.append(' build_data["infer_tag"] = True\n') + inserted = True + if not inserted: + return False + _write_text(path, "".join(out)) + print('Patched hatch_build.py: build_data["infer_tag"] = True') + return True + + # has_it and not has_pp: insert pure_python line before first infer_tag line + lines = text.splitlines(keepends=True) + out = [] + inserted = False + for line in lines: + if not inserted and ('build_data["infer_tag"]' in line or "build_data['infer_tag']" in line): + out.append(' build_data["pure_python"] = False\n') + inserted = True + out.append(line) + if not inserted: + return False + _write_text(path, "".join(out)) + print('Patched hatch_build.py: build_data["pure_python"] = False') + return True + + def _patch_setup_cfg_version(repo_dir: Path) -> bool: setup_cfg = repo_dir / "setup.cfg" if not setup_cfg.exists(): @@ -316,6 +385,11 @@ def main() -> int: ap.add_argument("--repository-url", default="https://test.pypi.org/legacy/", help="Twine repository URL") ap.add_argument("--token-env", default="TEST_PYPI_API_TOKEN", help="Env var containing API token") ap.add_argument("--skip-existing", action="store_true", help="Pass --skip-existing to twine") + ap.add_argument( + "--hatch-force-platform-wheel", + action="store_true", + help="Patch hatch_build.py so hatchling emits a platform-specific wheel (not py3-none-any)", + ) args = ap.parse_args() root = Path.cwd() @@ -342,6 +416,10 @@ def main() -> int: if not patched: print("No static version field found to patch (continuing).") + if args.hatch_force_platform_wheel: + if not _patch_hatch_build_force_platform_wheel(project_dir): + print("hatch-force-platform-wheel: no applicable hatch_build.py patch applied") + print("=== Building sdist + wheel ===") out_dir = dist_root / args.repo_id _ensure_clean_dir(out_dir) From 031b382d9b00d66c841650905b20ecf30a36c42f Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:41:06 +0000 Subject: [PATCH 6/7] More verbose twine upload --- ci/scripts/nightly_build_publish.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index 117188840..1350b5547 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -318,7 +318,14 @@ def _build( shutil.copy2(artifact, out_dir / artifact.name) -def _twine_upload(dist_dir: Path, repository_url: str, token: str, *, skip_existing: bool) -> None: +def _twine_upload( + dist_dir: Path, + repository_url: str, + token: str, + *, + skip_existing: bool, + verbose: bool, +) -> None: venv_dir = Path(os.environ.get("ORCH_VENV_DIR", ".venv-build")) # Twine doesn't need system site packages; keep it off by default. py = _ensure_venv(venv_dir, system_site_packages=False) @@ -339,6 +346,8 @@ def _twine_upload(dist_dir: Path, repository_url: str, token: str, *, skip_exist ] if skip_existing: cmd.append("--skip-existing") + if verbose: + cmd.append("--verbose") cmd.append(str(dist_dir / "*")) _run(cmd, env=env) @@ -385,6 +394,12 @@ def main() -> int: ap.add_argument("--repository-url", default="https://test.pypi.org/legacy/", help="Twine repository URL") ap.add_argument("--token-env", default="TEST_PYPI_API_TOKEN", help="Env var containing API token") ap.add_argument("--skip-existing", action="store_true", help="Pass --skip-existing to twine") + ap.add_argument( + "--twine-verbose", + action=argparse.BooleanOptionalAction, + default=True, + help="Pass --verbose to twine upload (default: true; use --no-twine-verbose to silence)", + ) ap.add_argument( "--hatch-force-platform-wheel", action="store_true", @@ -438,7 +453,13 @@ def main() -> int: if not token: raise RuntimeError(f"Missing required env var: {args.token_env}") print(f"=== Uploading to {args.repository_url} ===") - _twine_upload(out_dir, args.repository_url, token, skip_existing=args.skip_existing) + _twine_upload( + out_dir, + args.repository_url, + token, + skip_existing=args.skip_existing, + verbose=args.twine_verbose, + ) return 0 From 11bcb5176f527264c8141277abd923edb8dd2d7b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:00:32 +0000 Subject: [PATCH 7/7] Try auditing wheels to fix platform tags before upload --- .github/workflows/huggingface-nightly.yml | 10 +++++ ci/scripts/nightly_build_publish.py | 46 +++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index 2eba2eb45..53c117f50 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -167,6 +167,7 @@ jobs: git-lfs \ build-essential \ ninja-build \ + patchelf \ python3 \ python3-venv \ python3-pip @@ -242,6 +243,7 @@ jobs: --dist-dir "dist-out" \ --project-subdir "nemotron-ocr" \ --hatch-force-platform-wheel \ + --auditwheel-repair \ --build-no-isolation \ --venv-pip-install "hatchling" \ --venv-pip-install "setuptools>=68" \ @@ -287,6 +289,14 @@ jobs: "Wheel is tagged py3-none-any; pip cannot select x86_64 vs aarch64. " f"Got: {wheel.name}" ) + if wheel.name.endswith("-linux_x86_64.whl") or wheel.name.endswith( + "-linux_aarch64.whl" + ): + raise SystemExit( + "Wheel still has a bare linux_* tag; TestPyPI rejects these. " + "auditwheel repair should emit manylinux_*. Got: " + f"{wheel.name}" + ) print("Verified nemotron_ocr_cpp extension ABI:", py_tag) PY diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index 1350b5547..6c2433ee6 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -7,6 +7,7 @@ - Patches a PEP 440 dev version into pyproject.toml or setup.cfg (default suffix: UTC YYYYMMDD; override with NIGHTLY_DATE_SUFFIX or NIGHTLY_DATE_YYYYMMDD, e.g. from CI). - Builds sdist + wheel via `python -m build`. +- Optional ``--auditwheel-repair`` rewrites ``linux_*`` wheels to ``manylinux_*`` for PyPI. - Optionally uploads to (Test)PyPI via twine. This is intentionally best-effort across heterogeneous upstream projects. @@ -318,6 +319,42 @@ def _build( shutil.copy2(artifact, out_dir / artifact.name) +def _auditwheel_repair_dist_dir(dist_dir: Path) -> None: + """ + Rewrite linux_* wheels to manylinux_* so TestPyPI/PyPI accept the upload. + Requires ``patchelf`` on PATH (e.g. apt install patchelf). + """ + wheels = sorted(dist_dir.glob("*.whl")) + if not wheels: + return + + venv_dir = Path(os.environ.get("ORCH_VENV_DIR", ".venv-build")) + py = _ensure_venv(venv_dir, system_site_packages=False) + env = os.environ.copy() + env["PIP_DISABLE_PIP_VERSION_CHECK"] = "1" + env = _ensure_tmpdir(env) + _pip_install(py, ["auditwheel"], cwd=dist_dir.parent, env=env) + + repair_out = dist_dir / ".auditwheel-out" + _ensure_clean_dir(repair_out) + cmd = [str(py), "-m", "auditwheel", "repair", *[str(w) for w in wheels], "-w", str(repair_out)] + _run(cmd, env=env) + + repaired = sorted(repair_out.glob("*.whl")) + if not repaired: + raise RuntimeError("auditwheel repair produced no wheels") + + for w in wheels: + w.unlink() + + for rw in repaired: + dest = dist_dir / rw.name + shutil.move(str(rw), dest) + print(f"auditwheel: {dest.name}") + + shutil.rmtree(repair_out) + + def _twine_upload( dist_dir: Path, repository_url: str, @@ -405,6 +442,11 @@ def main() -> int: action="store_true", help="Patch hatch_build.py so hatchling emits a platform-specific wheel (not py3-none-any)", ) + ap.add_argument( + "--auditwheel-repair", + action="store_true", + help="Run auditwheel repair on built wheels (manylinux tag; needed for PyPI/TestPyPI)", + ) args = ap.parse_args() root = Path.cwd() @@ -448,6 +490,10 @@ def main() -> int: ) print(f"Artifacts in: {out_dir}") + if args.auditwheel_repair: + print("=== auditwheel repair ===") + _auditwheel_repair_dist_dir(out_dir) + if args.upload: token = os.environ.get(args.token_env) if not token: