From 9754d1a9a8ef4de03e641a770644cb4f838a60e1 Mon Sep 17 00:00:00 2001 From: Tania Mathern Date: Tue, 2 Jun 2026 08:27:14 -0700 Subject: [PATCH 01/12] fix: Test harness --- .../python-3.10-slim-perf-Dockerfile | 22 + .../python-3.12-slim-perf-Dockerfile | 22 + .../Dockerfiles/ubuntu-22.04-perf-Dockerfile | 28 ++ .../Dockerfiles/ubuntu-24.04-perf-Dockerfile | 28 ++ tests/perf/README.md | 144 ++++++ tests/perf/__init__.py | 1 + tests/perf/baseline.json | 105 +++++ tests/perf/entrypoint.sh | 38 ++ tests/perf/reports/.gitkeep | 0 tests/perf/run_profile.py | 234 ++++++++++ tests/perf/scenario_names.py | 30 ++ tests/perf/scenarios.py | 410 ++++++++++++++++++ 12 files changed, 1062 insertions(+) create mode 100644 tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile create mode 100644 tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile create mode 100644 tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile create mode 100644 tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile create mode 100644 tests/perf/README.md create mode 100644 tests/perf/__init__.py create mode 100644 tests/perf/baseline.json create mode 100644 tests/perf/entrypoint.sh create mode 100644 tests/perf/reports/.gitkeep create mode 100644 tests/perf/run_profile.py create mode 100644 tests/perf/scenario_names.py create mode 100644 tests/perf/scenarios.py diff --git a/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile new file mode 100644 index 00000000..0db28b11 --- /dev/null +++ b/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.10.20-slim-bookworm + +WORKDIR /workspace + +# libunwind for memray native stack unwinding +RUN apt-get update && apt-get install -y --no-install-recommends \ + libunwind-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Pre-install Python deps using only the requirements files (layer-cached). +# The full project arrives via the -v mount at runtime. +COPY requirements.txt requirements-dev.txt ./ +RUN pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt + +RUN pip install --no-cache-dir memray==1.19.3 + +COPY tests/perf/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["python", "-m", "tests.perf.run_profile"] diff --git a/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile new file mode 100644 index 00000000..1e387d1c --- /dev/null +++ b/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.12.13-slim-bookworm + +WORKDIR /workspace + +# libunwind for memray native stack unwinding +RUN apt-get update && apt-get install -y --no-install-recommends \ + libunwind-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Pre-install Python deps using only the requirements files (layer-cached). +# The full project arrives via the -v mount at runtime. +COPY requirements.txt requirements-dev.txt ./ +RUN pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt + +RUN pip install --no-cache-dir memray==1.19.3 + +COPY tests/perf/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["python", "-m", "tests.perf.run_profile"] diff --git a/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile new file mode 100644 index 00000000..c0aad277 --- /dev/null +++ b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile @@ -0,0 +1,28 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /workspace + +# Ubuntu 22.04 ships Python 3.10 as python3 by default. +# libunwind for memray native stack unwinding. +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-venv \ + libunwind-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* \ + && ln -s /usr/bin/python3 /usr/bin/python + +# Pre-install runtime deps only. Project arrives via -v mount. +COPY requirements.txt ./ +RUN pip3 install --no-cache-dir -r requirements.txt + +RUN pip3 install --no-cache-dir memray==1.19.3 requests==2.34.2 + +COPY tests/perf/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["python", "-m", "tests.perf.run_profile"] diff --git a/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile new file mode 100644 index 00000000..425fcffb --- /dev/null +++ b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile @@ -0,0 +1,28 @@ +FROM ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /workspace + +# Ubuntu 24.04 ships Python 3.12 as python3 by default. +# libunwind used for memray native stack unwinding. +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-venv \ + libunwind-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* \ + && ln -s /usr/bin/python3 /usr/bin/python + +# Pre-install runtime deps only. Project arrives via -v mount. +COPY requirements.txt ./ +RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt + +RUN pip3 install --no-cache-dir --break-system-packages memray==1.19.3 requests==2.34.2 + +COPY tests/perf/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["python", "-m", "tests.perf.run_profile"] diff --git a/tests/perf/README.md b/tests/perf/README.md new file mode 100644 index 00000000..609017a3 --- /dev/null +++ b/tests/perf/README.md @@ -0,0 +1,144 @@ +# Memory Profiling Harness + +Uses [memray](https://github.com/bloomberg/memray) to track peak memory, allocation patterns, +and memory leaks across c2pa-python read and sign operations. + +## Files + +| File | Purpose | +| --- | --- | +| `scenarios.py` | Functions that exercise each profiling scenario. Imported by `run_profile.py`. | +| `run_profile.py` | Memory performance/usage analysis. Runs each scenario under `memray`, generates HTML reports, reads metrics, and compares against `baseline.json`. | +| `Dockerfiles/` | One Dockerfile per target environment. Selected via `PERF_ENV` at `make` time when running the memory analysis. | +| `entrypoint.sh` | Container entrypoint. Downloads the Linux native `libc2pa_c.so` at startup into the volume-mounted workspace so it sticks around even through the `-v` mount. | +| `reports/` | Generated HTML flamegraphs (gitignored). Two files per scenario: `.html` (peak/high-water view) and `-leaks.html` (leak view). | + +## Scenarios + +Each scenario loops multiple times so leaks accumulate and become visible in the leaks flamegraph and the memory use graph (defaults to 100). Change the count of iterations when running by setting the `MEMRAY_ITERATIONS` variable (the Makefile forwards it into the container): + +```bash +MEMRAY_ITERATIONS=1000 make memory-use-bench +``` + +## Environments + +Select the target environment with `PERF_ENV` (default: `python-3.12-slim`): + +| `PERF_ENV` value | Base image | Python | +| --- | --- | --- | +| `python-3.12-slim` | `python:3.12-slim` | 3.12 | +| `python-3.10-slim` | `python:3.10-slim` | 3.10 | +| `ubuntu-22.04` | `ubuntu:22.04` | 3.10 (apt default) | +| `ubuntu-24.04` | `ubuntu:24.04` | 3.12 (apt default) | + +## Running (via Docker) + +```bash +# First run (if there is no baseline.json): establishes baseline.json +make memory-use-bench + +# Subsequent runs: compares against baseline, fails if >10% regression +make memory-use-bench + +# Refresh baseline after an intentional memory change +make memory-use-bench PERF_ARGS=--update-baseline + +# Run against a different runner environment +make memory-use-bench PERF_ENV=ubuntu-24.04 + +# Remove all generated HTML reports +make clean-memory-perf-reports +``` + +Reports are written to `tests/perf/reports/` on the local machine. Two HTML files per scenario: `.html` for the peak/high-water view and `-leaks.html` for the leak view. Open either in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). + +## Running without Docker (if memray is supported and installed locally) + +```bash +pip install memray +python -m tests.perf.run_profile +``` + +## Environment variables + +| Variable | Default | Description | +| --- | --- | --- | +| `MEMRAY_ITERATIONS` | `100` | Loop count per scenario | +| `MEMRAY_THRESHOLD` | `1.1` | Regression multiplier (1.1 = 10% tolerance) | + +Override iteration count: + +```bash +MEMRAY_ITERATIONS=1000 make memory-use-bench +``` + +## Reading baseline.json + +`baseline.json` is committed to the repo and reports following data for each scenario: + +```json +{ + "_meta": { + "memray_version": "1.19.3", + "python_version": "3.12.13", + "c2pa_native_version": "c2pa-v0.85.0", + "iterations": 100, + "perf_env": "python-3.12-slim", + "arch": "x86_64" + }, + "scenario_name": { + "peak_bytes": 62914560, + "leaked_bytes": 3271766, + "total_allocations": 12840 + }, + ... +} +``` + +The `_meta` block records which toolchain produced the baseline so the numbers are reproducible. It is provenance only and is never compared against. The regression check only looks at the per-scenario entries. + +| `_meta` field | Meaning | +| --- | --- | +| `memray_version` | memray version that generated the metrics | +| `python_version` | Python version that ran the test harness | +| `c2pa_native_version` | native `libc2pa_c` version (from `c2pa-native-version.txt`) | +| `iterations` | `MEMRAY_ITERATIONS` used for the run | +| `perf_env` | `PERF_ENV` (target environment) | +| `arch` | machine architecture (`platform.machine()`) | + +`peak_bytes`, `total_allocations` and the `arch`/`python`/`memray` versions are all environment-sensitive: a baseline is most meaningful when compared against a run from the same `_meta`. + +**`peak_bytes`**: the highest amount of memory in use at any single point during the scenario. + +**`leaked_bytes`**: memory that was allocated during the run but never freed before the process exited. Static allocations will persist, as there are one-time loads (e.g. the native library). + +**`total_allocations`**: total number of individual memory allocation calls made. + +### Why leaked_bytes is not zero + +You might expect a the baseline to show `leaked_bytes: 0`. In practice it never does: when the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures that are designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching. So memray sees them as "never freed" even though they are not actually leaking. + +A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead, not an actual leak (since it does not grow depending on the work that ran). If signing 1000 images gave you 64 MB leaked, that would be a leak, as there is a memory leak growth growing depending on the work that was executed. + +The baseline captures this expected static overhead. Future runs compare against it: if `leaked_bytes` grows beyond the baseline by more than 10%, the run fails. + +### How to confirm no leak exists + +Run with a higher iteration count than default (100) and compare: + +```bash +MEMRAY_ITERATIONS=1000 make memory-use-bench PERF_ARGS=--update-baseline +``` + +If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. + +### When to update baseline + +Update `baseline.json` after any intentional change that affects memory use: + +```bash +make memory-use-bench PERF_ARGS=--update-baseline +``` + +Commit the updated `baseline.json` alongside the code change, so it becomes the new reference to compare against. diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 00000000..a56982a7 --- /dev/null +++ b/tests/perf/__init__.py @@ -0,0 +1 @@ +# Empty placeholder file to facilitate imports \ No newline at end of file diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json new file mode 100644 index 00000000..94c1f321 --- /dev/null +++ b/tests/perf/baseline.json @@ -0,0 +1,105 @@ +{ + "_meta": { + "memray_version": "1.19.3", + "python_version": "3.12.13", + "c2pa_native_version": "c2pa-v0.85.0", + "iterations": 1000, + "perf_env": "python-3.12-slim", + "arch": "aarch64" + }, + "reader_jpeg": { + "peak_bytes": 3919042, + "leaked_bytes": 3340989, + "total_allocations": 6288661 + }, + "reader_mp4": { + "peak_bytes": 4915860, + "leaked_bytes": 3188177, + "total_allocations": 20456930 + }, + "reader_wav": { + "peak_bytes": 5559685, + "leaked_bytes": 3198119, + "total_allocations": 3328210 + }, + "builder_sign_jpeg": { + "peak_bytes": 7727309, + "leaked_bytes": 3316737, + "total_allocations": 4456450 + }, + "builder_sign_gif": { + "peak_bytes": 14584435, + "leaked_bytes": 3316872, + "total_allocations": 71120346 + }, + "builder_sign_heic": { + "peak_bytes": 4648240, + "leaked_bytes": 3316873, + "total_allocations": 6998710 + }, + "builder_sign_m4a": { + "peak_bytes": 18888468, + "leaked_bytes": 3316920, + "total_allocations": 22022690 + }, + "builder_sign_png": { + "peak_bytes": 7965163, + "leaked_bytes": 3316920, + "total_allocations": 16175986 + }, + "builder_sign_webp": { + "peak_bytes": 8940091, + "leaked_bytes": 3316921, + "total_allocations": 4178289 + }, + "builder_sign_avi": { + "peak_bytes": 7078633, + "leaked_bytes": 3316788, + "total_allocations": 402442332 + }, + "builder_sign_mp4": { + "peak_bytes": 6202243, + "leaked_bytes": 3316920, + "total_allocations": 17384550 + }, + "builder_sign_tiff": { + "peak_bytes": 13164118, + "leaked_bytes": 3316921, + "total_allocations": 50687516 + }, + "builder_sign_jpeg_parent_of": { + "peak_bytes": 14214142, + "leaked_bytes": 3319472, + "total_allocations": 11387486 + }, + "builder_sign_jpeg_component_of": { + "peak_bytes": 14215420, + "leaked_bytes": 3319923, + "total_allocations": 11610536 + }, + "builder_sign_jpeg_parent_and_component": { + "peak_bytes": 14557670, + "leaked_bytes": 3462721, + "total_allocations": 20897062 + }, + "builder_sign_jpeg_parent_and_component_mixed_mime": { + "peak_bytes": 14516000, + "leaked_bytes": 3321316, + "total_allocations": 23867700 + }, + "builder_sign_jpeg_two_components_same_mime": { + "peak_bytes": 14547792, + "leaked_bytes": 3478198, + "total_allocations": 20795229 + }, + "builder_sign_jpeg_two_components_mixed_mime": { + "peak_bytes": 14512799, + "leaked_bytes": 3320210, + "total_allocations": 23765197 + }, + "builder_sign_jpeg_archive_roundtrip": { + "peak_bytes": 14239038, + "leaked_bytes": 3431958, + "total_allocations": 15966694 + } +} \ No newline at end of file diff --git a/tests/perf/entrypoint.sh b/tests/perf/entrypoint.sh new file mode 100644 index 00000000..f0f1f917 --- /dev/null +++ b/tests/perf/entrypoint.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +cd /workspace +export PYTHONPATH=/workspace/src + +# Download the Linux native library into the volume-mounted workspace. +# Runs at container start so libs land in the host-mounted tree, +# not in a build layer that gets shadowed by the -v mount. +C2PA_VERSION=$(cat c2pa-native-version.txt) +ARCH=$(uname -m) + +if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then + PLATFORM="aarch64-unknown-linux-gnu" +else + PLATFORM="x86_64-unknown-linux-gnu" +fi + +echo "Downloading c2pa native lib: $C2PA_VERSION / $PLATFORM" +C2PA_LIBS_PLATFORM=$PLATFORM python scripts/download_artifacts.py "$C2PA_VERSION" + +# Replicate what setup.py copy_platform_libraries() does: +# So the correct Linux library is here for the Dockerfile +python - < .bin +- Generates .html (peak memory flamegraph) +- Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader +- Compares against baseline.json (creates it on first run) +- Exits non-zero if any metric exceeds baseline * threshold + +Usage: + python -m tests.perf.run_profile [--update-baseline] + +Environment variables: +- MEMRAY_ITERATIONS: number of times each scenario loops (default: 100) +- MEMRAY_THRESHOLD: regression multiplier, e.g. 1.1 for 10% (default: 1.1) +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +import platform + +import memray + +# Scenario name list +from tests.perf.scenario_names import SCENARIO_NAMES + +HERE = Path(__file__).parent +REPORTS_DIR = HERE / "reports" +BASELINE_FILE = HERE / "baseline.json" + +ITERATIONS = int(os.environ.get("MEMRAY_ITERATIONS", "100")) +THRESHOLD = float(os.environ.get("MEMRAY_THRESHOLD", "1.1")) +PERF_ENV = os.environ.get("PERF_ENV", "") + + +def _run_scenario_under_memray(name: str, bin_path: Path) -> None: + """Spawn a subprocess that runs one scenario under memray --native.""" + repo_root = HERE.parent.parent + script = f""" +import sys +sys.path.insert(0, "{repo_root}") +sys.path.insert(0, "{repo_root / 'src'}") +from tests.perf.scenarios import SCENARIOS +SCENARIOS["{name}"]({ITERATIONS}) +""" + cmd = [ + sys.executable, "-m", "memray", "run", + "--native", + "--trace-python-allocators", + "--force", + "-o", str(bin_path), + "-c", script, + ] + # Pass the scenario name so the loop can label its progress + env = {**os.environ, "PERF_SCENARIO": name} + result = subprocess.run(cmd, text=True, env=env) + if result.returncode != 0: + print(f" memray run failed for {name} (exit {result.returncode})", file=sys.stderr) + sys.exit(1) + + +def _generate_flamegraph(bin_path: Path, out_path: Path, leaks: bool = False) -> None: + cmd = [sys.executable, "-m", "memray", "flamegraph", str(bin_path), "-o", str(out_path), "--force"] + if leaks: + # Default flamegraph renders the high-water-mark (peak) view. + # The leak view is a separate render gated behind --leaks. + cmd.append("--leaks") + # Stream memray's output instead of capturing it, so run does not look stuck + label = "leaks" if leaks else "peak" + print(f" flamegraph ({label})...", flush=True) + result = subprocess.run(cmd, text=True) + if result.returncode != 0: + print(f" flamegraph generation failed for {out_path.name} (exit {result.returncode})", file=sys.stderr) + sys.exit(1) + + +# get_allocation_records() yields deallocation records too... +# They carry size 0, so they don't affect byte sums, but they +# inflate record count, so we filter them out when counting alloc calls. +_DEALLOCATORS = { + memray.AllocatorType.FREE, + memray.AllocatorType.MUNMAP, + memray.AllocatorType.PYMALLOC_FREE, +} + + +def _read_metrics(bin_path: Path) -> dict: + """Extract peak_bytes, leaked_bytes and total_allocations from a memray .bin file.""" + with memray.FileReader(str(bin_path)) as reader: + # peak_bytes: the high-water mark of live memory, i.e. the most memory + # in use at any single instant. + peak_bytes = reader.metadata.peak_memory + + # total_allocations: number of allocation calls. + # We exclude deallocator records to count just allocations. + total_allocations = sum( + 1 + for record in reader.get_allocation_records() + if record.allocator not in _DEALLOCATORS + ) + + # leaked_bytes: memory still reachable when tracking ended (never freed). + leaked_bytes = sum( + record.size + for record in reader.get_leaked_allocation_records(merge_threads=True) + ) + + return { + "peak_bytes": peak_bytes, + "leaked_bytes": leaked_bytes, + "total_allocations": total_allocations, + } + + +def _build_meta() -> dict: + """Provenance for the baseline: which toolchain produced these numbers. + Recorded so a committed baseline is reproducible under same conditions. + """ + native_version = "" + try: + native_version = (HERE.parent.parent / "c2pa-native-version.txt").read_text().strip() + except OSError: + pass + return { + "memray_version": getattr(memray, "__version__", ""), + "python_version": platform.python_version(), + "c2pa_native_version": native_version, + "iterations": ITERATIONS, + "perf_env": PERF_ENV, + "arch": platform.machine(), + } + + +def _fmt(n: int) -> str: + if n >= 1024 ** 2: + return f"{n / 1024**2:.1f} MiB" + if n >= 1024: + return f"{n / 1024:.1f} KiB" + return f"{n} B" + + +def main() -> None: + parser = argparse.ArgumentParser(description="c2pa-python memory profiler") + parser.add_argument( + "--update-baseline", + action="store_true", + help="Overwrite baseline.json with current measurements and exit 0", + ) + args = parser.parse_args() + + REPORTS_DIR.mkdir(parents=True, exist_ok=True) + + baseline: dict = {} + if BASELINE_FILE.exists() and not args.update_baseline: + baseline = json.loads(BASELINE_FILE.read_text()) + + results: dict = {} + failures: list[str] = [] + + total = len(SCENARIO_NAMES) + for idx, name in enumerate(SCENARIO_NAMES, 1): + print(f"\n=== [{idx}/{total}] {name} (iterations={ITERATIONS}) ===") + + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp: + bin_path = Path(tmp.name) + + try: + print(f" profiling...") + _run_scenario_under_memray(name, bin_path) + + env_tag = f"-{PERF_ENV}" if PERF_ENV else "" + report_html = REPORTS_DIR / f"{name}{env_tag}.html" + leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html" + print(f" generating flamegraphs (peak + leaks)...") + _generate_flamegraph(bin_path, report_html) + _generate_flamegraph(bin_path, leaks_html, leaks=True) + + print(f" reading metrics...", flush=True) + metrics = _read_metrics(bin_path) + results[name] = metrics + + print(f" peak: {_fmt(metrics['peak_bytes'])}") + print(f" leaked: {_fmt(metrics['leaked_bytes'])}") + print(f" allocs: {metrics['total_allocations']}") + print(f" report: {report_html}") + print(f" leaks: {leaks_html}") + + if baseline and name in baseline: + b = baseline[name] + for metric in ("peak_bytes", "leaked_bytes"): + current = metrics[metric] + base = b.get(metric, 0) + limit = base * THRESHOLD + if current > limit: + diff_pct = (current - base) / base * 100 if base else float("inf") + failures.append( + f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}" + f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)" + ) + finally: + bin_path.unlink(missing_ok=True) + + if args.update_baseline or not baseline: + output = {"_meta": _build_meta()} + output.update(results) + BASELINE_FILE.write_text(json.dumps(output, indent=2)) + verb = "Updated" if baseline else "Created" + print(f"\n{verb} baseline: {BASELINE_FILE}") + + if failures: + print("\nREGRESSIONS DETECTED:", file=sys.stderr) + for f in failures: + print(f" {f}", file=sys.stderr) + sys.exit(1) + + print("\nAll scenarios within baseline thresholds.") + + +if __name__ == "__main__": + main() diff --git a/tests/perf/scenario_names.py b/tests/perf/scenario_names.py new file mode 100644 index 00000000..3b1c604e --- /dev/null +++ b/tests/perf/scenario_names.py @@ -0,0 +1,30 @@ +# Copyright 2026 Adobe. All rights reserved. +# This file is licensed to you under the Apache License, +# Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +# or the MIT license (http://opensource.org/licenses/MIT), +# at your option. + +""" +Canonical list of profiling scenario names. + +Single source of truth shared by: +- run_profile.py (driver) +- scenarios.py + +This module intentionally has zero imports so the driver can read the names +without pulling in c2pa or any other dependency. +""" + +SCENARIO_NAMES = ( + "reader_jpeg", "reader_mp4", "reader_wav", + "builder_sign_jpeg", "builder_sign_gif", "builder_sign_heic", + "builder_sign_m4a", "builder_sign_png", "builder_sign_webp", + "builder_sign_avi", "builder_sign_mp4", "builder_sign_tiff", + "builder_sign_jpeg_parent_of", + "builder_sign_jpeg_component_of", + "builder_sign_jpeg_parent_and_component", + "builder_sign_jpeg_parent_and_component_mixed_mime", + "builder_sign_jpeg_two_components_same_mime", + "builder_sign_jpeg_two_components_mixed_mime", + "builder_sign_jpeg_archive_roundtrip", +) diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py new file mode 100644 index 00000000..9fe41312 --- /dev/null +++ b/tests/perf/scenarios.py @@ -0,0 +1,410 @@ +# Copyright 2026 Adobe. All rights reserved. +# This file is licensed to you under the Apache License, +# Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +# or the MIT license (http://opensource.org/licenses/MIT), +# at your option. + +""" +Plain functions (no pytest dependencies) that exercise the profiling scenarios. +Each function is called N times by run_profile.py. +""" + +import io +import os +import sys +from pathlib import Path +from c2pa import Builder, C2paSignerInfo, Reader, Signer + +FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" +READING_FIXTURES_DIR = FIXTURES_DIR / "files-for-reading-tests" +SIGNING_FIXTURES_DIR = FIXTURES_DIR / "files-for-signing-tests" + +SIGNED_JPEG = FIXTURES_DIR / "C.jpg" +CLOUD_JPEG = FIXTURES_DIR / "cloud.jpg" +SOURCE_JPEG = FIXTURES_DIR / "A.jpg" +SIGNING_PNG = SIGNING_FIXTURES_DIR / "sample1.png" + +_DST_COMPOSITE = "http://cv.iptc.org/newscodes/digitalsourcetype/compositeWithTrainedAlgorithmicMedia" + +_PARENT_ID = "xmp:iid:aaaaaaaa-0001-0001-0001-aaaaaaaaaaaa" +_PLACED_ID = "xmp:iid:bbbbbbbb-0002-0002-0002-bbbbbbbbbbbb" +_PARENT_ID2 = "xmp:iid:cccccccc-0003-0003-0003-cccccccccccc" +_PLACED_ID2 = "xmp:iid:dddddddd-0004-0004-0004-dddddddddddd" +_PARENT_ID3 = "xmp:iid:eeeeeeee-0005-0005-0005-eeeeeeeeeeee" +_PLACED_ID3 = "xmp:iid:ffffffff-0006-0006-0006-ffffffffffff" +_PLACED_ID4 = "xmp:iid:11111111-0007-0007-0007-111111111111" +_PLACED_ID5 = "xmp:iid:22222222-0008-0008-0008-222222222222" + +MANIFEST_BASE = { + "claim_generator": "perf_test", + "claim_generator_info": [{"name": "perf_test", "version": "0.0.1"}], + "format": "image/jpeg", + "title": "Perf Test Image", + "ingredients": [], + "assertions": [ + { + "label": "c2pa.actions", + "data": { + "actions": [ + { + "action": "c2pa.created", + "digitalSourceType": "http://cv.iptc.org/newscodes/digitalsourcetype/digitalCreation", + } + ] + }, + } + ], +} + + +# Scenario name for progress output, set per-run by run_profile.py via the env. +_SCENARIO = os.environ.get("PERF_SCENARIO", "") + + +def _iterate(n: int): + """Yield range(n), printing a progress line to stderr ~every 10%. + + The memray run phase is otherwise silent for the whole scenario, which at + high iteration counts looks hung. The print is gated to ~10 lines total so + it stays readable at N=100 and N=100000 alike, and writes to stderr so it + never lands in the captured/parsed metrics output. + """ + step = max(1, n // 10) + label = f"{_SCENARIO}: " if _SCENARIO else "" + for i in range(n): + if i % step == 0: + print(f" {label}iter {i}/{n} ({i * 100 // n if n else 100}%)", + file=sys.stderr, flush=True) + yield i + print(f" {label}iter {n}/{n} (100%)", file=sys.stderr, flush=True) + + +def _make_signer() -> Signer: + certs = (FIXTURES_DIR / "es256_certs.pem").read_bytes() + key = (FIXTURES_DIR / "es256_private.key").read_bytes() + info = C2paSignerInfo( + alg=b"es256", + sign_cert=certs, + private_key=key, + ta_url=b"http://timestamp.digicert.com", + ) + return Signer.from_info(info) + + +def _sign_file(path: Path, mime: str, iterations: int) -> None: + signer = _make_signer() + source_bytes = path.read_bytes() + manifest = {**MANIFEST_BASE, "format": mime} + for _ in _iterate(iterations): + source = io.BytesIO(source_bytes) + output = io.BytesIO() + builder = Builder(manifest) + builder.sign(signer, mime, source, output) + + +def _read_file(path: Path, mime: str, iterations: int) -> None: + for _ in _iterate(iterations): + with open(path, "rb") as f: + reader = Reader(mime, f) + reader.json() + reader.close() + + +# Reader scenarios: read manifests from files with manifests + +def scenario_reader_jpeg(iterations: int = 100) -> None: + _read_file(SIGNED_JPEG, "image/jpeg", iterations) + + +def scenario_reader_mp4(iterations: int = 100) -> None: + _read_file(READING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) + + +def scenario_reader_wav(iterations: int = 100) -> None: + _read_file(READING_FIXTURES_DIR / "sample1_signed.wav", "audio/wav", iterations) + + +# Builder.sign (without ingredients)) + +def scenario_builder_sign_jpeg(iterations: int = 100) -> None: + _sign_file(SOURCE_JPEG, "image/jpeg", iterations) + + +def scenario_builder_sign_gif(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "sample1.gif", "image/gif", iterations) + + +def scenario_builder_sign_heic(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "sample1.heic", "image/heic", iterations) + + +def scenario_builder_sign_m4a(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "sample1.m4a", "audio/mp4", iterations) + + +def scenario_builder_sign_png(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "sample1.png", "image/png", iterations) + + +def scenario_builder_sign_webp(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "sample1.webp", "image/webp", iterations) + + +def scenario_builder_sign_avi(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "test.avi", "video/x-msvideo", iterations) + + +def scenario_builder_sign_mp4(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) + + +def scenario_builder_sign_tiff(iterations: int = 100) -> None: + _sign_file(SIGNING_FIXTURES_DIR / "TUSCANY.TIF", "image/tiff", iterations) + + +# Builder.sign scenarios with ingredient linking + +def scenario_builder_sign_jpeg_parent_of(iterations: int = 100) -> None: + """One parentOf ingredient linked to c2pa.opened action.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + ingredient_bytes = SIGNED_JPEG.read_bytes() + manifest = { + **MANIFEST_BASE, + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [{ + "action": "c2pa.opened", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PARENT_ID]}, + "digitalSourceType": _DST_COMPOSITE, + }]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(ingredient_bytes) as ing: + builder.add_ingredient( + {"relationship": "parentOf", "instance_id": _PARENT_ID}, + "image/jpeg", ing, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_component_of(iterations: int = 100) -> None: + """One componentOf ingredient linked to c2pa.placed action.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + ingredient_bytes = SIGNED_JPEG.read_bytes() + manifest = { + **MANIFEST_BASE, + "ingredients": [{"format": "image/jpeg", "relationship": "componentOf", "instance_id": _PLACED_ID}], + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [{ + "action": "c2pa.placed", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PLACED_ID]}, + "digitalSourceType": _DST_COMPOSITE, + }]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(ingredient_bytes) as ing: + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID}, + "image/jpeg", ing, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_parent_and_component(iterations: int = 100) -> None: + """parentOf + componentOf ingredients (both JPEG) linked to opened + placed actions.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + parent_bytes = SIGNED_JPEG.read_bytes() + placed_bytes = CLOUD_JPEG.read_bytes() + manifest = { + **MANIFEST_BASE, + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [ + { + "action": "c2pa.opened", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PARENT_ID2]}, + "digitalSourceType": _DST_COMPOSITE, + }, + { + "action": "c2pa.placed", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PLACED_ID2]}, + "digitalSourceType": _DST_COMPOSITE, + }, + ]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2: + builder.add_ingredient( + {"relationship": "parentOf", "instance_id": _PARENT_ID2}, "image/jpeg", ing1, + ) + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID2}, "image/jpeg", ing2, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_parent_and_component_mixed_mime(iterations: int = 100) -> None: + """parentOf JPEG + componentOf PNG linked to opened + placed actions.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + parent_bytes = SIGNED_JPEG.read_bytes() + placed_bytes = SIGNING_PNG.read_bytes() + manifest = { + **MANIFEST_BASE, + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [ + { + "action": "c2pa.opened", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PARENT_ID3]}, + "digitalSourceType": _DST_COMPOSITE, + }, + { + "action": "c2pa.placed", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PLACED_ID3]}, + "digitalSourceType": _DST_COMPOSITE, + }, + ]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2: + builder.add_ingredient( + {"relationship": "parentOf", "instance_id": _PARENT_ID3}, "image/jpeg", ing1, + ) + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID3}, "image/png", ing2, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_two_components_same_mime(iterations: int = 100) -> None: + """Two componentOf JPEG ingredients in a single c2pa.placed action.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + comp1_bytes = SIGNED_JPEG.read_bytes() + comp2_bytes = CLOUD_JPEG.read_bytes() + manifest = { + **MANIFEST_BASE, + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [{ + "action": "c2pa.placed", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PLACED_ID4, _PLACED_ID5]}, + "digitalSourceType": _DST_COMPOSITE, + }]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2: + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1, + ) + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/jpeg", ing2, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_two_components_mixed_mime(iterations: int = 100) -> None: + """componentOf JPEG + componentOf PNG in a single c2pa.placed action.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + comp1_bytes = SIGNED_JPEG.read_bytes() + comp2_bytes = SIGNING_PNG.read_bytes() + manifest = { + **MANIFEST_BASE, + "assertions": [{ + "label": "c2pa.actions.v2", + "data": {"actions": [{ + "action": "c2pa.placed", + "softwareAgent": {"name": "perf_test"}, + "parameters": {"ingredientIds": [_PLACED_ID4, _PLACED_ID5]}, + "digitalSourceType": _DST_COMPOSITE, + }]}, + }], + } + for _ in _iterate(iterations): + builder = Builder(manifest) + with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2: + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1, + ) + builder.add_ingredient( + {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/png", ing2, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +def scenario_builder_sign_jpeg_archive_roundtrip(iterations: int = 100) -> None: + """Serialize builder to archive, reload, add ingredient, sign.""" + signer = _make_signer() + source_bytes = SOURCE_JPEG.read_bytes() + ingredient_bytes = SIGNED_JPEG.read_bytes() + for _ in _iterate(iterations): + archive = io.BytesIO() + Builder(MANIFEST_BASE).to_archive(archive) + archive.seek(0) + builder = Builder.from_archive(archive) + with io.BytesIO(ingredient_bytes) as ing: + builder.add_ingredient( + {"relationship": "parentOf", "instance_id": _PARENT_ID}, + "image/jpeg", ing, + ) + builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +SCENARIOS = { + "reader_jpeg": scenario_reader_jpeg, + "reader_mp4": scenario_reader_mp4, + "reader_wav": scenario_reader_wav, + "builder_sign_jpeg": scenario_builder_sign_jpeg, + "builder_sign_gif": scenario_builder_sign_gif, + "builder_sign_heic": scenario_builder_sign_heic, + "builder_sign_m4a": scenario_builder_sign_m4a, + "builder_sign_png": scenario_builder_sign_png, + "builder_sign_webp": scenario_builder_sign_webp, + "builder_sign_avi": scenario_builder_sign_avi, + "builder_sign_mp4": scenario_builder_sign_mp4, + "builder_sign_tiff": scenario_builder_sign_tiff, + "builder_sign_jpeg_parent_of": scenario_builder_sign_jpeg_parent_of, + "builder_sign_jpeg_component_of": scenario_builder_sign_jpeg_component_of, + "builder_sign_jpeg_parent_and_component": scenario_builder_sign_jpeg_parent_and_component, + "builder_sign_jpeg_parent_and_component_mixed_mime": scenario_builder_sign_jpeg_parent_and_component_mixed_mime, + "builder_sign_jpeg_two_components_same_mime": scenario_builder_sign_jpeg_two_components_same_mime, + "builder_sign_jpeg_two_components_mixed_mime": scenario_builder_sign_jpeg_two_components_mixed_mime, + "builder_sign_jpeg_archive_roundtrip": scenario_builder_sign_jpeg_archive_roundtrip, +} + + +# Validate the SCENARIOS dict against the canonical name list so there is +# a single source of truth for scenario names. +from tests.perf.scenario_names import SCENARIO_NAMES + +_declared = set(SCENARIO_NAMES) +_defined = set(SCENARIOS) +if _declared != _defined: + raise RuntimeError( + "tests/perf scenario name mismatch:\n" + f" missing from SCENARIOS dict: {sorted(_declared - _defined)}\n" + f" extra in SCENARIOS dict: {sorted(_defined - _declared)}" + ) From 8076e12dd21c4da2aad8c5c6053edbb22cf4cdb3 Mon Sep 17 00:00:00 2001 From: Tania Mathern Date: Tue, 2 Jun 2026 08:30:06 -0700 Subject: [PATCH 02/12] fix: Initial test harness --- .gitignore | 4 ++++ Makefile | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/.gitignore b/.gitignore index c612bf1a..147e8357 100644 --- a/.gitignore +++ b/.gitignore @@ -122,3 +122,7 @@ target/ src/c2pa/libs/ !tests/fixtures/*.pem !tests/fixtures/*.key + +# Memory profiling reports +tests/perf/reports/*.html +tests/perf/reports/*.bin diff --git a/Makefile b/Makefile index ba70dfb3..d57a33bc 100644 --- a/Makefile +++ b/Makefile @@ -110,3 +110,22 @@ download-native-artifacts: # Build API documentation with Sphinx docs: python3 scripts/generate_api_docs.py + +# Memory profiling with memray (runs in Docker, reports go to tests/perf/reports/) +# Run: make memory-use-bench +# More details for usage are in tests/perf/README.md +PERF_ENV ?= python-3.12-slim +MEMRAY_ITERATIONS ?= 100 +MEMRAY_THRESHOLD ?= 1.1 +.PHONY: memory-use-bench +memory-use-bench: + docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) . + docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(PERF_ARGS) + @echo "" + @echo "Reports written to tests/perf/reports/" + @echo "Open tests/perf/reports/.html in a browser (use the leaks toggle for leak view)" + +.PHONY: clean-memory-perf-reports +clean-memory-perf-reports: + rm -f tests/perf/reports/*.html tests/perf/reports/*.bin + @echo "Cleared tests/perf/reports/" From a0f76fddcad250016c64dc453dae480f82880823 Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 18:59:13 -0700 Subject: [PATCH 03/12] fix: Readme --- Makefile | 5 +- .../Dockerfiles/ubuntu-22.04-perf-Dockerfile | 3 + .../Dockerfiles/ubuntu-24.04-perf-Dockerfile | 3 + tests/perf/README.md | 63 ++++++++--- tests/perf/baseline.json | 105 ------------------ tests/perf/run_profile.py | 24 +++- tests/perf/scenario_names.py | 30 ----- tests/perf/scenarios.py | 15 +-- 8 files changed, 78 insertions(+), 170 deletions(-) delete mode 100644 tests/perf/baseline.json delete mode 100644 tests/perf/scenario_names.py diff --git a/Makefile b/Makefile index d57a33bc..dcef53a3 100644 --- a/Makefile +++ b/Makefile @@ -112,15 +112,16 @@ docs: python3 scripts/generate_api_docs.py # Memory profiling with memray (runs in Docker, reports go to tests/perf/reports/) -# Run: make memory-use-bench # More details for usage are in tests/perf/README.md PERF_ENV ?= python-3.12-slim MEMRAY_ITERATIONS ?= 100 MEMRAY_THRESHOLD ?= 1.1 +SCENARIO ?= +SCENARIO_ARG := $(if $(SCENARIO),--scenario $(SCENARIO),) .PHONY: memory-use-bench memory-use-bench: docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) . - docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(PERF_ARGS) + docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS) @echo "" @echo "Reports written to tests/perf/reports/" @echo "Open tests/perf/reports/.html in a browser (use the leaks toggle for leak view)" diff --git a/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile index c0aad277..649422ac 100644 --- a/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile +++ b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile @@ -6,10 +6,13 @@ WORKDIR /workspace # Ubuntu 22.04 ships Python 3.10 as python3 by default. # libunwind for memray native stack unwinding. +# python3-dbg supplies the interpreter's debug symbols so memray can resolve +# file names + line numbers for native (C) frames in the flamegraphs. RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-venv \ + python3-dbg \ libunwind-dev \ ca-certificates \ && rm -rf /var/lib/apt/lists/* \ diff --git a/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile index 425fcffb..0fd3a523 100644 --- a/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile +++ b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile @@ -6,10 +6,13 @@ WORKDIR /workspace # Ubuntu 24.04 ships Python 3.12 as python3 by default. # libunwind used for memray native stack unwinding. +# python3-dbg supplies the interpreter's debug symbols so memray can resolve +# file names + line numbers for native (C) frames in the flamegraphs. RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-venv \ + python3-dbg \ libunwind-dev \ ca-certificates \ && rm -rf /var/lib/apt/lists/* \ diff --git a/tests/perf/README.md b/tests/perf/README.md index 609017a3..d0954f50 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -18,19 +18,21 @@ and memory leaks across c2pa-python read and sign operations. Each scenario loops multiple times so leaks accumulate and become visible in the leaks flamegraph and the memory use graph (defaults to 100). Change the count of iterations when running by setting the `MEMRAY_ITERATIONS` variable (the Makefile forwards it into the container): ```bash -MEMRAY_ITERATIONS=1000 make memory-use-bench +make memory-use-bench MEMRAY_ITERATIONS=1000 ``` ## Environments Select the target environment with `PERF_ENV` (default: `python-3.12-slim`): -| `PERF_ENV` value | Base image | Python | -| --- | --- | --- | -| `python-3.12-slim` | `python:3.12-slim` | 3.12 | -| `python-3.10-slim` | `python:3.10-slim` | 3.10 | -| `ubuntu-22.04` | `ubuntu:22.04` | 3.10 (apt default) | -| `ubuntu-24.04` | `ubuntu:24.04` | 3.12 (apt default) | +| `PERF_ENV` value | Base image | Python | Native symbols | +| --- | --- | --- | --- | +| `python-3.12-slim` | `python:3.12-slim` | 3.12 | interpreter frames unresolved | +| `python-3.10-slim` | `python:3.10-slim` | 3.10 | interpreter frames unresolved | +| `ubuntu-22.04` | `ubuntu:22.04` | 3.10 (apt default) | resolved (`python3-dbg`) | +| `ubuntu-24.04` | `ubuntu:24.04` | 3.12 (apt default) | resolved (`python3-dbg`) | + +The slim images run a source-built `/usr/local/bin/python` that ships stripped, and Debian's `python3-dbg` targets a different binary (build-id mismatch), so memray cannot resolve the interpreter's native (C) frames there — you will see a "No debug information was found for the Python interpreter" warning and native traces may lack file names/line numbers. The ubuntu images install `python3-dbg` for the matching apt interpreter, so their native flamegraphs are fully symbolized. Use an `ubuntu-*` `PERF_ENV` when you need resolved native traces. ## Running (via Docker) @@ -47,10 +49,18 @@ make memory-use-bench PERF_ARGS=--update-baseline # Run against a different runner environment make memory-use-bench PERF_ENV=ubuntu-24.04 +# Run a single scenario instead of the whole suite +make memory-use-bench SCENARIO=builder_sign_jpeg + +# Refresh just one scenario's baseline entry (others are preserved) +make memory-use-bench SCENARIO=builder_sign_jpeg PERF_ARGS=--update-baseline + # Remove all generated HTML reports make clean-memory-perf-reports ``` +The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--update-baseline`) are `make` variable overrides, not shell env vars. `make` parses `word=value` argument as a variable assignment. Each overrides a `?=` default in the Makefile, and the recipe interpolates them into the `docker build`/`docker run` commands. See [Configuration](#configuration) for the full list and what each forwards to. + Reports are written to `tests/perf/reports/` on the local machine. Two HTML files per scenario: `.html` for the peak/high-water view and `-leaks.html` for the leak view. Open either in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). ## Running without Docker (if memray is supported and installed locally) @@ -60,17 +70,36 @@ pip install memray python -m tests.perf.run_profile ``` -## Environment variables +Run a single scenario (useful for generating data for one operation without the full suite): + +```bash +python -m tests.perf.run_profile --scenario builder_sign_jpeg +``` + +With `--update-baseline`, a single-scenario run only rewrites that scenario's entry in `baseline.json`; the other scenarios' entries are preserved. + +```bash +python -m tests.perf.run_profile --scenario builder_sign_jpeg --update-baseline +``` + +## Configuration + +With `make memory-use-bench VAR=value` you set the **`make` variable** and the Makefile forwards it as shown in the "Forwarded as" column. Running `run_profile.py` without Docker, you set the **env var** (or pass the CLI arg) directly. + +| `make` variable | Forwarded as | Default | Description | +| --- | --- | --- | --- | +| `PERF_ENV` | `PERF_ENV` env var | `python-3.12-slim` | Target environment; selects the Dockerfile, tags report filenames (`-.html`), recorded in `baseline.json` `_meta`. See [Environments](#environments). | +| `MEMRAY_ITERATIONS` | `MEMRAY_ITERATIONS` env var | `100` | Loop count per scenario. | +| `MEMRAY_THRESHOLD` | `MEMRAY_THRESHOLD` env var | `1.1` | Regression multiplier (1.1 = 10% tolerance). | +| `SCENARIO` | `--scenario` CLI arg | _(all)_ | Run a single scenario (e.g. `SCENARIO=builder_sign_jpeg`). | +| `PERF_ARGS` | passed straight through | _(none)_ | Extra `run_profile.py` args (e.g. `PERF_ARGS=--update-baseline`). | -| Variable | Default | Description | -| --- | --- | --- | -| `MEMRAY_ITERATIONS` | `100` | Loop count per scenario | -| `MEMRAY_THRESHOLD` | `1.1` | Regression multiplier (1.1 = 10% tolerance) | +`PERF_SCENARIO` is an additional env var, but internal: the runner sets it per scenario so the loop can label its progress. Not user-configurable. -Override iteration count: +Example to override iteration count: ```bash -MEMRAY_ITERATIONS=1000 make memory-use-bench +make memory-use-bench MEMRAY_ITERATIONS=1000 ``` ## Reading baseline.json @@ -115,7 +144,7 @@ The `_meta` block records which toolchain produced the baseline so the numbers a **`total_allocations`**: total number of individual memory allocation calls made. -### Why leaked_bytes is not zero +### Why is leaked_bytes not zero? You might expect a the baseline to show `leaked_bytes: 0`. In practice it never does: when the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures that are designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching. So memray sees them as "never freed" even though they are not actually leaking. @@ -123,12 +152,12 @@ A memory leak grows proportionally with work done. If you sign 50 images and get The baseline captures this expected static overhead. Future runs compare against it: if `leaked_bytes` grows beyond the baseline by more than 10%, the run fails. -### How to confirm no leak exists +### How to confirm no leak exists? Run with a higher iteration count than default (100) and compare: ```bash -MEMRAY_ITERATIONS=1000 make memory-use-bench PERF_ARGS=--update-baseline +make memory-use-bench MEMRAY_ITERATIONS=1000 PERF_ARGS=--update-baseline ``` If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json deleted file mode 100644 index 94c1f321..00000000 --- a/tests/perf/baseline.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "_meta": { - "memray_version": "1.19.3", - "python_version": "3.12.13", - "c2pa_native_version": "c2pa-v0.85.0", - "iterations": 1000, - "perf_env": "python-3.12-slim", - "arch": "aarch64" - }, - "reader_jpeg": { - "peak_bytes": 3919042, - "leaked_bytes": 3340989, - "total_allocations": 6288661 - }, - "reader_mp4": { - "peak_bytes": 4915860, - "leaked_bytes": 3188177, - "total_allocations": 20456930 - }, - "reader_wav": { - "peak_bytes": 5559685, - "leaked_bytes": 3198119, - "total_allocations": 3328210 - }, - "builder_sign_jpeg": { - "peak_bytes": 7727309, - "leaked_bytes": 3316737, - "total_allocations": 4456450 - }, - "builder_sign_gif": { - "peak_bytes": 14584435, - "leaked_bytes": 3316872, - "total_allocations": 71120346 - }, - "builder_sign_heic": { - "peak_bytes": 4648240, - "leaked_bytes": 3316873, - "total_allocations": 6998710 - }, - "builder_sign_m4a": { - "peak_bytes": 18888468, - "leaked_bytes": 3316920, - "total_allocations": 22022690 - }, - "builder_sign_png": { - "peak_bytes": 7965163, - "leaked_bytes": 3316920, - "total_allocations": 16175986 - }, - "builder_sign_webp": { - "peak_bytes": 8940091, - "leaked_bytes": 3316921, - "total_allocations": 4178289 - }, - "builder_sign_avi": { - "peak_bytes": 7078633, - "leaked_bytes": 3316788, - "total_allocations": 402442332 - }, - "builder_sign_mp4": { - "peak_bytes": 6202243, - "leaked_bytes": 3316920, - "total_allocations": 17384550 - }, - "builder_sign_tiff": { - "peak_bytes": 13164118, - "leaked_bytes": 3316921, - "total_allocations": 50687516 - }, - "builder_sign_jpeg_parent_of": { - "peak_bytes": 14214142, - "leaked_bytes": 3319472, - "total_allocations": 11387486 - }, - "builder_sign_jpeg_component_of": { - "peak_bytes": 14215420, - "leaked_bytes": 3319923, - "total_allocations": 11610536 - }, - "builder_sign_jpeg_parent_and_component": { - "peak_bytes": 14557670, - "leaked_bytes": 3462721, - "total_allocations": 20897062 - }, - "builder_sign_jpeg_parent_and_component_mixed_mime": { - "peak_bytes": 14516000, - "leaked_bytes": 3321316, - "total_allocations": 23867700 - }, - "builder_sign_jpeg_two_components_same_mime": { - "peak_bytes": 14547792, - "leaked_bytes": 3478198, - "total_allocations": 20795229 - }, - "builder_sign_jpeg_two_components_mixed_mime": { - "peak_bytes": 14512799, - "leaked_bytes": 3320210, - "total_allocations": 23765197 - }, - "builder_sign_jpeg_archive_roundtrip": { - "peak_bytes": 14239038, - "leaked_bytes": 3431958, - "total_allocations": 15966694 - } -} \ No newline at end of file diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py index 89726d9a..19640a07 100644 --- a/tests/perf/run_profile.py +++ b/tests/perf/run_profile.py @@ -36,7 +36,7 @@ import memray # Scenario name list -from tests.perf.scenario_names import SCENARIO_NAMES +from tests.perf.scenarios import SCENARIO_NAMES HERE = Path(__file__).parent REPORTS_DIR = HERE / "reports" @@ -160,8 +160,17 @@ def main() -> None: action="store_true", help="Overwrite baseline.json with current measurements and exit 0", ) + parser.add_argument( + "--scenario", + choices=SCENARIO_NAMES, + default=None, + help="Run a single scenario instead of all of them. With --update-baseline, " + "only that scenario's entry in baseline.json is updated; the rest are kept.", + ) args = parser.parse_args() + scenarios_to_run = (args.scenario,) if args.scenario else SCENARIO_NAMES + REPORTS_DIR.mkdir(parents=True, exist_ok=True) baseline: dict = {} @@ -171,8 +180,8 @@ def main() -> None: results: dict = {} failures: list[str] = [] - total = len(SCENARIO_NAMES) - for idx, name in enumerate(SCENARIO_NAMES, 1): + total = len(scenarios_to_run) + for idx, name in enumerate(scenarios_to_run, 1): print(f"\n=== [{idx}/{total}] {name} (iterations={ITERATIONS}) ===") with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp: @@ -215,7 +224,14 @@ def main() -> None: bin_path.unlink(missing_ok=True) if args.update_baseline or not baseline: - output = {"_meta": _build_meta()} + # When running a single scenario, merge its result into the existing + # baseline so the other scenarios' entries are preserved. A full run + # replaces the file wholesale. + if args.scenario and baseline: + output = dict(baseline) + else: + output = {} + output["_meta"] = _build_meta() output.update(results) BASELINE_FILE.write_text(json.dumps(output, indent=2)) verb = "Updated" if baseline else "Created" diff --git a/tests/perf/scenario_names.py b/tests/perf/scenario_names.py deleted file mode 100644 index 3b1c604e..00000000 --- a/tests/perf/scenario_names.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2026 Adobe. All rights reserved. -# This file is licensed to you under the Apache License, -# Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) -# or the MIT license (http://opensource.org/licenses/MIT), -# at your option. - -""" -Canonical list of profiling scenario names. - -Single source of truth shared by: -- run_profile.py (driver) -- scenarios.py - -This module intentionally has zero imports so the driver can read the names -without pulling in c2pa or any other dependency. -""" - -SCENARIO_NAMES = ( - "reader_jpeg", "reader_mp4", "reader_wav", - "builder_sign_jpeg", "builder_sign_gif", "builder_sign_heic", - "builder_sign_m4a", "builder_sign_png", "builder_sign_webp", - "builder_sign_avi", "builder_sign_mp4", "builder_sign_tiff", - "builder_sign_jpeg_parent_of", - "builder_sign_jpeg_component_of", - "builder_sign_jpeg_parent_and_component", - "builder_sign_jpeg_parent_and_component_mixed_mime", - "builder_sign_jpeg_two_components_same_mime", - "builder_sign_jpeg_two_components_mixed_mime", - "builder_sign_jpeg_archive_roundtrip", -) diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py index 9fe41312..f5623a04 100644 --- a/tests/perf/scenarios.py +++ b/tests/perf/scenarios.py @@ -396,15 +396,6 @@ def scenario_builder_sign_jpeg_archive_roundtrip(iterations: int = 100) -> None: } -# Validate the SCENARIOS dict against the canonical name list so there is -# a single source of truth for scenario names. -from tests.perf.scenario_names import SCENARIO_NAMES - -_declared = set(SCENARIO_NAMES) -_defined = set(SCENARIOS) -if _declared != _defined: - raise RuntimeError( - "tests/perf scenario name mismatch:\n" - f" missing from SCENARIOS dict: {sorted(_declared - _defined)}\n" - f" extra in SCENARIOS dict: {sorted(_defined - _declared)}" - ) +# Canonical scenario name list, derived from SCENARIOS so the two cannot drift. +# (dict preserves insertion order, so this matches the dict's declaration order.) +SCENARIO_NAMES = tuple(SCENARIOS) From 92383db76b6241089653a027510a005af4a48bba Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 19:49:18 -0700 Subject: [PATCH 04/12] fix: Script updates --- Makefile | 2 +- tests/perf/README.md | 14 +++-- tests/perf/baseline.json | 105 ++++++++++++++++++++++++++++++++++++++ tests/perf/run_profile.py | 39 +++++++++----- 4 files changed, 142 insertions(+), 18 deletions(-) create mode 100644 tests/perf/baseline.json diff --git a/Makefile b/Makefile index dcef53a3..4df3a42e 100644 --- a/Makefile +++ b/Makefile @@ -124,7 +124,7 @@ memory-use-bench: docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS) @echo "" @echo "Reports written to tests/perf/reports/" - @echo "Open tests/perf/reports/.html in a browser (use the leaks toggle for leak view)" + @echo "Open tests/perf/reports/-{peak,leaks,temporary}.html in a browser" .PHONY: clean-memory-perf-reports clean-memory-perf-reports: diff --git a/tests/perf/README.md b/tests/perf/README.md index d0954f50..120d3165 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -11,7 +11,7 @@ and memory leaks across c2pa-python read and sign operations. | `run_profile.py` | Memory performance/usage analysis. Runs each scenario under `memray`, generates HTML reports, reads metrics, and compares against `baseline.json`. | | `Dockerfiles/` | One Dockerfile per target environment. Selected via `PERF_ENV` at `make` time when running the memory analysis. | | `entrypoint.sh` | Container entrypoint. Downloads the Linux native `libc2pa_c.so` at startup into the volume-mounted workspace so it sticks around even through the `-v` mount. | -| `reports/` | Generated HTML flamegraphs (gitignored). Two files per scenario: `.html` (peak/high-water view) and `-leaks.html` (leak view). | +| `reports/` | Generated HTML flamegraphs (gitignored). Three files per scenario: `-peak.html` (peak/high-water view), `-leaks.html` (leak view), and `-temporary.html` (temporary-allocations view). | ## Scenarios @@ -61,7 +61,7 @@ make clean-memory-perf-reports The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--update-baseline`) are `make` variable overrides, not shell env vars. `make` parses `word=value` argument as a variable assignment. Each overrides a `?=` default in the Makefile, and the recipe interpolates them into the `docker build`/`docker run` commands. See [Configuration](#configuration) for the full list and what each forwards to. -Reports are written to `tests/perf/reports/` on the local machine. Two HTML files per scenario: `.html` for the peak/high-water view and `-leaks.html` for the leak view. Open either in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). +Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario: `-peak.html` for the peak/high-water view, `-leaks.html` for the leak view, and `-temporary.html` for the temporary-allocations view (short-lived churn). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). ## Running without Docker (if memray is supported and installed locally) @@ -88,7 +88,7 @@ With `make memory-use-bench VAR=value` you set the **`make` variable** and the M | `make` variable | Forwarded as | Default | Description | | --- | --- | --- | --- | -| `PERF_ENV` | `PERF_ENV` env var | `python-3.12-slim` | Target environment; selects the Dockerfile, tags report filenames (`-.html`), recorded in `baseline.json` `_meta`. See [Environments](#environments). | +| `PERF_ENV` | `PERF_ENV` env var | `python-3.12-slim` | Target environment; selects the Dockerfile, tags report filenames (`--.html`), recorded in `baseline.json` `_meta`. See [Environments](#environments). | | `MEMRAY_ITERATIONS` | `MEMRAY_ITERATIONS` env var | `100` | Loop count per scenario. | | `MEMRAY_THRESHOLD` | `MEMRAY_THRESHOLD` env var | `1.1` | Regression multiplier (1.1 = 10% tolerance). | | `SCENARIO` | `--scenario` CLI arg | _(all)_ | Run a single scenario (e.g. `SCENARIO=builder_sign_jpeg`). | @@ -162,7 +162,13 @@ make memory-use-bench MEMRAY_ITERATIONS=1000 PERF_ARGS=--update-baseline If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. -### When to update baseline +### Temporary allocations + +`-temporary.html` shows **temporary allocations**: memory that is allocated and then freed almost immediately (memray's threshold is one allocation — a block is temporary if it is freed before more than one other allocation happens). These are not leaks — the memory is returned — but they are churn: high allocation/free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat. + +Open the file in a browser to see which call sites are responsible. The view may be sparse or empty if a scenario does little churn, which is a valid result. Temporary allocations are not part of the baseline regression check; the graph is a debugging aid only. + +### When to update the baseline Update `baseline.json` after any intentional change that affects memory use: diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json new file mode 100644 index 00000000..7ce36f79 --- /dev/null +++ b/tests/perf/baseline.json @@ -0,0 +1,105 @@ +{ + "_meta": { + "memray_version": "1.19.3", + "python_version": "3.12.13", + "c2pa_native_version": "c2pa-v0.85.1", + "iterations": 100, + "perf_env": "python-3.12-slim", + "arch": "aarch64" + }, + "reader_jpeg": { + "peak_bytes": 3735605, + "leaked_bytes": 3203124, + "total_allocations": 695795 + }, + "reader_mp4": { + "peak_bytes": 4807380, + "leaked_bytes": 3204257, + "total_allocations": 2115830 + }, + "reader_wav": { + "peak_bytes": 5451160, + "leaked_bytes": 3214154, + "total_allocations": 403212 + }, + "builder_sign_jpeg": { + "peak_bytes": 7620384, + "leaked_bytes": 3319957, + "total_allocations": 519386 + }, + "builder_sign_gif": { + "peak_bytes": 14477397, + "leaked_bytes": 3319595, + "total_allocations": 7185850 + }, + "builder_sign_heic": { + "peak_bytes": 4541366, + "leaked_bytes": 3319760, + "total_allocations": 773573 + }, + "builder_sign_m4a": { + "peak_bytes": 18782991, + "leaked_bytes": 3321202, + "total_allocations": 2276054 + }, + "builder_sign_png": { + "peak_bytes": 7859684, + "leaked_bytes": 3321202, + "total_allocations": 1691457 + }, + "builder_sign_webp": { + "peak_bytes": 8833911, + "leaked_bytes": 3320502, + "total_allocations": 490236 + }, + "builder_sign_avi": { + "peak_bytes": 6973792, + "leaked_bytes": 3320532, + "total_allocations": 40318043 + }, + "builder_sign_mp4": { + "peak_bytes": 6096061, + "leaked_bytes": 3320501, + "total_allocations": 1812271 + }, + "builder_sign_tiff": { + "peak_bytes": 13058475, + "leaked_bytes": 3321039, + "total_allocations": 5142559 + }, + "builder_sign_jpeg_parent_of": { + "peak_bytes": 14108639, + "leaked_bytes": 3321725, + "total_allocations": 1212582 + }, + "builder_sign_jpeg_component_of": { + "peak_bytes": 14110220, + "leaked_bytes": 3321848, + "total_allocations": 1234917 + }, + "builder_sign_jpeg_parent_and_component": { + "peak_bytes": 14450911, + "leaked_bytes": 3417560, + "total_allocations": 2163776 + }, + "builder_sign_jpeg_parent_and_component_mixed_mime": { + "peak_bytes": 14410857, + "leaked_bytes": 3321863, + "total_allocations": 2454210 + }, + "builder_sign_jpeg_two_components_same_mime": { + "peak_bytes": 14439662, + "leaked_bytes": 3417435, + "total_allocations": 2153570 + }, + "builder_sign_jpeg_two_components_mixed_mime": { + "peak_bytes": 14408484, + "leaked_bytes": 3322022, + "total_allocations": 2443876 + }, + "builder_sign_jpeg_archive_roundtrip": { + "peak_bytes": 14168712, + "leaked_bytes": 3303677, + "total_allocations": 1670717 + } +} \ No newline at end of file diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py index 19640a07..c47b7afe 100644 --- a/tests/perf/run_profile.py +++ b/tests/perf/run_profile.py @@ -10,7 +10,8 @@ For each scenario in scenarios.SCENARIOS this script: - Runs the scenario under `memray run --native` -> .bin -- Generates .html (peak memory flamegraph) +- Generates three flamegraph views: -peak.html (high-water), + -leaks.html (--leaks), -temporary.html (--temporary-allocations) - Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader - Compares against baseline.json (creates it on first run) - Exits non-zero if any metric exceeds baseline * threshold @@ -73,15 +74,24 @@ def _run_scenario_under_memray(name: str, bin_path: Path) -> None: sys.exit(1) -def _generate_flamegraph(bin_path: Path, out_path: Path, leaks: bool = False) -> None: +def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> None: + """Render one flamegraph view of a capture file. + + mode: + - 'peak': high-water-mark view (the default flamegraph render). + - 'leaks': memory still live when tracking stopped (--leaks). + - 'temporary': allocations freed before more than one other allocation + occurs, i.e. short-lived churn (--temporary-allocations). + These are mutually exclusive views, so each is a separate render. + """ cmd = [sys.executable, "-m", "memray", "flamegraph", str(bin_path), "-o", str(out_path), "--force"] - if leaks: - # Default flamegraph renders the high-water-mark (peak) view. - # The leak view is a separate render gated behind --leaks. + if mode == "leaks": cmd.append("--leaks") + elif mode == "temporary": + # --temporary-allocations == --temporary-allocation-threshold=1 + cmd.append("--temporary-allocations") # Stream memray's output instead of capturing it, so run does not look stuck - label = "leaks" if leaks else "peak" - print(f" flamegraph ({label})...", flush=True) + print(f" flamegraph ({mode})...", flush=True) result = subprocess.run(cmd, text=True) if result.returncode != 0: print(f" flamegraph generation failed for {out_path.name} (exit {result.returncode})", file=sys.stderr) @@ -192,11 +202,13 @@ def main() -> None: _run_scenario_under_memray(name, bin_path) env_tag = f"-{PERF_ENV}" if PERF_ENV else "" - report_html = REPORTS_DIR / f"{name}{env_tag}.html" + peak_html = REPORTS_DIR / f"{name}{env_tag}-peak.html" leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html" - print(f" generating flamegraphs (peak + leaks)...") - _generate_flamegraph(bin_path, report_html) - _generate_flamegraph(bin_path, leaks_html, leaks=True) + temporary_html = REPORTS_DIR / f"{name}{env_tag}-temporary.html" + print(f" generating flamegraphs (peak + leaks + temporary)...") + _generate_flamegraph(bin_path, peak_html, mode="peak") + _generate_flamegraph(bin_path, leaks_html, mode="leaks") + _generate_flamegraph(bin_path, temporary_html, mode="temporary") print(f" reading metrics...", flush=True) metrics = _read_metrics(bin_path) @@ -205,8 +217,9 @@ def main() -> None: print(f" peak: {_fmt(metrics['peak_bytes'])}") print(f" leaked: {_fmt(metrics['leaked_bytes'])}") print(f" allocs: {metrics['total_allocations']}") - print(f" report: {report_html}") - print(f" leaks: {leaks_html}") + print(f" peak report: {peak_html}") + print(f" leaks report: {leaks_html}") + print(f" temporary report: {temporary_html}") if baseline and name in baseline: b = baseline[name] From d5ced3ad6ffdff07952988fb9c60ad998917e6ea Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 20:12:45 -0700 Subject: [PATCH 05/12] fix: Readme --- tests/perf/README.md | 44 +++++++++++++--- tests/perf/baseline.json | 105 --------------------------------------- 2 files changed, 36 insertions(+), 113 deletions(-) delete mode 100644 tests/perf/baseline.json diff --git a/tests/perf/README.md b/tests/perf/README.md index 120d3165..8e5d27d0 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -32,7 +32,7 @@ Select the target environment with `PERF_ENV` (default: `python-3.12-slim`): | `ubuntu-22.04` | `ubuntu:22.04` | 3.10 (apt default) | resolved (`python3-dbg`) | | `ubuntu-24.04` | `ubuntu:24.04` | 3.12 (apt default) | resolved (`python3-dbg`) | -The slim images run a source-built `/usr/local/bin/python` that ships stripped, and Debian's `python3-dbg` targets a different binary (build-id mismatch), so memray cannot resolve the interpreter's native (C) frames there — you will see a "No debug information was found for the Python interpreter" warning and native traces may lack file names/line numbers. The ubuntu images install `python3-dbg` for the matching apt interpreter, so their native flamegraphs are fully symbolized. Use an `ubuntu-*` `PERF_ENV` when you need resolved native traces. +The slim images run a source-built `/usr/local/bin/python` that ships stripped, and Debian's `python3-dbg` targets a different binary (build-id mismatch), so memray cannot resolve the interpreter's native (C) frames there. You will see a "No debug information was found for the Python interpreter" warning, and native traces may lack file names and line numbers. The ubuntu images install `python3-dbg` for the matching apt interpreter, so their native flamegraphs are fully symbolized. Use an `ubuntu-*` `PERF_ENV` when you need resolved native traces. ## Running (via Docker) @@ -61,7 +61,35 @@ make clean-memory-perf-reports The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--update-baseline`) are `make` variable overrides, not shell env vars. `make` parses `word=value` argument as a variable assignment. Each overrides a `?=` default in the Makefile, and the recipe interpolates them into the `docker build`/`docker run` commands. See [Configuration](#configuration) for the full list and what each forwards to. -Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario: `-peak.html` for the peak/high-water view, `-leaks.html` for the leak view, and `-temporary.html` for the temporary-allocations view (short-lived churn). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). +Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario, one per suffix (described below). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance). + +## Report views + +Each scenario produces three [memray flamegraphs](https://bloomberg.github.io/memray/flamegraph.html). All three are flamegraphs of the same run. They differ only in which allocations they count. + +### `-peak.html`: peak/high-water view + +What it shows: allocations that were simultaneously alive at the moment the process used the most memory (the high-water mark). + +Why it's useful: tells you what drives the largest memory footprint, the working set you must hold at once. Consult this view when you care about peak RSS or OOM headroom. + +How to read it: the widest frames are the biggest contributors to peak. Walk up a wide column to the top frame to find the call site holding that memory at the high-water instant. + +### `-leaks.html`: leak view + +What it shows: memory that was allocated but never freed before tracking stopped (`memray --leaks`). + +Why it's useful: finds memory leaks, meaning memory that grows with work done. It is never zero, because one-time static setup (the native `libc2pa_c` library loading global structures that live for the whole process) shows as "never freed." A real leak is one that scales with iterations. Profile at `MEMRAY_ITERATIONS=100` and `=1000` and compare: flat means static overhead, growing means a leak. See [Why is leaked_bytes not zero?](#why-is-leaked_bytes-not-zero). + +How to read it: a wide frame here is unfreed memory. If its width grows when you raise the iteration count, that top frame is the leaking call site. + +### `-temporary.html`: temporary-allocations view + +What it shows: short-lived churn, meaning memory allocated and then freed almost immediately (memray's threshold: freed before more than one other allocation happens). + +Why it's useful: temporary allocations are not leaks, since the memory is returned, but high allocation and free turnover costs CPU and can fragment the heap. This view surfaces hot per-call churn that the peak and leak views hide, because those objects are freed between iterations and so barely register at the high-water mark. Use it when a loop allocates too much. + +How to read it: wide frames are the biggest sources of throwaway allocations. The view may be sparse or empty for a scenario that does little churn, which is itself a valid result. See [Temporary allocations](#temporary-allocations). ## Running without Docker (if memray is supported and installed locally) @@ -138,17 +166,17 @@ The `_meta` block records which toolchain produced the baseline so the numbers a `peak_bytes`, `total_allocations` and the `arch`/`python`/`memray` versions are all environment-sensitive: a baseline is most meaningful when compared against a run from the same `_meta`. -**`peak_bytes`**: the highest amount of memory in use at any single point during the scenario. +`peak_bytes` is the highest amount of memory in use at any single point during the scenario. -**`leaked_bytes`**: memory that was allocated during the run but never freed before the process exited. Static allocations will persist, as there are one-time loads (e.g. the native library). +`leaked_bytes` is memory that was allocated during the run but never freed before the process exited. Static allocations persist, since there are one-time loads such as the native library. -**`total_allocations`**: total number of individual memory allocation calls made. +`total_allocations` is the total number of individual memory allocation calls made. ### Why is leaked_bytes not zero? -You might expect a the baseline to show `leaked_bytes: 0`. In practice it never does: when the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures that are designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching. So memray sees them as "never freed" even though they are not actually leaking. +You might expect the baseline to show `leaked_bytes: 0`. In practice it never does. When the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching, so memray sees them as "never freed" even though they are not leaking. -A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead, not an actual leak (since it does not grow depending on the work that ran). If signing 1000 images gave you 64 MB leaked, that would be a leak, as there is a memory leak growth growing depending on the work that was executed. +A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead rather than a leak, since it does not grow with the work that ran. If signing 1000 images gave you 64 MB leaked, that would be a leak, because the leaked memory grows with the work executed. The baseline captures this expected static overhead. Future runs compare against it: if `leaked_bytes` grows beyond the baseline by more than 10%, the run fails. @@ -164,7 +192,7 @@ If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. ### Temporary allocations -`-temporary.html` shows **temporary allocations**: memory that is allocated and then freed almost immediately (memray's threshold is one allocation — a block is temporary if it is freed before more than one other allocation happens). These are not leaks — the memory is returned — but they are churn: high allocation/free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat. +`-temporary.html` shows temporary allocations, meaning memory that is allocated and then freed almost immediately (memray's threshold is one allocation: a block is temporary if it is freed before more than one other allocation happens). The memory is returned, so these are not leaks, but they are churn: high allocation and free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat. Open the file in a browser to see which call sites are responsible. The view may be sparse or empty if a scenario does little churn, which is a valid result. Temporary allocations are not part of the baseline regression check; the graph is a debugging aid only. diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json deleted file mode 100644 index 7ce36f79..00000000 --- a/tests/perf/baseline.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "_meta": { - "memray_version": "1.19.3", - "python_version": "3.12.13", - "c2pa_native_version": "c2pa-v0.85.1", - "iterations": 100, - "perf_env": "python-3.12-slim", - "arch": "aarch64" - }, - "reader_jpeg": { - "peak_bytes": 3735605, - "leaked_bytes": 3203124, - "total_allocations": 695795 - }, - "reader_mp4": { - "peak_bytes": 4807380, - "leaked_bytes": 3204257, - "total_allocations": 2115830 - }, - "reader_wav": { - "peak_bytes": 5451160, - "leaked_bytes": 3214154, - "total_allocations": 403212 - }, - "builder_sign_jpeg": { - "peak_bytes": 7620384, - "leaked_bytes": 3319957, - "total_allocations": 519386 - }, - "builder_sign_gif": { - "peak_bytes": 14477397, - "leaked_bytes": 3319595, - "total_allocations": 7185850 - }, - "builder_sign_heic": { - "peak_bytes": 4541366, - "leaked_bytes": 3319760, - "total_allocations": 773573 - }, - "builder_sign_m4a": { - "peak_bytes": 18782991, - "leaked_bytes": 3321202, - "total_allocations": 2276054 - }, - "builder_sign_png": { - "peak_bytes": 7859684, - "leaked_bytes": 3321202, - "total_allocations": 1691457 - }, - "builder_sign_webp": { - "peak_bytes": 8833911, - "leaked_bytes": 3320502, - "total_allocations": 490236 - }, - "builder_sign_avi": { - "peak_bytes": 6973792, - "leaked_bytes": 3320532, - "total_allocations": 40318043 - }, - "builder_sign_mp4": { - "peak_bytes": 6096061, - "leaked_bytes": 3320501, - "total_allocations": 1812271 - }, - "builder_sign_tiff": { - "peak_bytes": 13058475, - "leaked_bytes": 3321039, - "total_allocations": 5142559 - }, - "builder_sign_jpeg_parent_of": { - "peak_bytes": 14108639, - "leaked_bytes": 3321725, - "total_allocations": 1212582 - }, - "builder_sign_jpeg_component_of": { - "peak_bytes": 14110220, - "leaked_bytes": 3321848, - "total_allocations": 1234917 - }, - "builder_sign_jpeg_parent_and_component": { - "peak_bytes": 14450911, - "leaked_bytes": 3417560, - "total_allocations": 2163776 - }, - "builder_sign_jpeg_parent_and_component_mixed_mime": { - "peak_bytes": 14410857, - "leaked_bytes": 3321863, - "total_allocations": 2454210 - }, - "builder_sign_jpeg_two_components_same_mime": { - "peak_bytes": 14439662, - "leaked_bytes": 3417435, - "total_allocations": 2153570 - }, - "builder_sign_jpeg_two_components_mixed_mime": { - "peak_bytes": 14408484, - "leaked_bytes": 3322022, - "total_allocations": 2443876 - }, - "builder_sign_jpeg_archive_roundtrip": { - "peak_bytes": 14168712, - "leaked_bytes": 3303677, - "total_allocations": 1670717 - } -} \ No newline at end of file From 0a135df747b3c2d78ac1556c2506fb46b00203cd Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 20:52:59 -0700 Subject: [PATCH 06/12] fix: Updated tests --- tests/perf/README.md | 10 ++-- tests/perf/baseline.json | 120 ++++++++++++++++++++++++++++++++++++++ tests/perf/scenarios.py | 122 +++++++++++++++++++++++++++------------ 3 files changed, 211 insertions(+), 41 deletions(-) create mode 100644 tests/perf/baseline.json diff --git a/tests/perf/README.md b/tests/perf/README.md index 8e5d27d0..f676a54d 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -21,6 +21,8 @@ Each scenario loops multiple times so leaks accumulate and become visible in the make memory-use-bench MEMRAY_ITERATIONS=1000 ``` +Most scenarios use the Context API: they build a `Context` once and reuse it across iterations, so its settings are parsed a single time. The jpeg and png cases also keep a `_legacy` variant that builds the `Reader`/`Builder` without a `Context`, which re-reads the thread-local settings on each construction. Running a pair (for example `builder_sign_jpeg_legacy` and `builder_sign_jpeg_with_context`) compares the two paths. + ## Environments Select the target environment with `PERF_ENV` (default: `python-3.12-slim`): @@ -50,10 +52,10 @@ make memory-use-bench PERF_ARGS=--update-baseline make memory-use-bench PERF_ENV=ubuntu-24.04 # Run a single scenario instead of the whole suite -make memory-use-bench SCENARIO=builder_sign_jpeg +make memory-use-bench SCENARIO=builder_sign_gif # Refresh just one scenario's baseline entry (others are preserved) -make memory-use-bench SCENARIO=builder_sign_jpeg PERF_ARGS=--update-baseline +make memory-use-bench SCENARIO=builder_sign_gif PERF_ARGS=--update-baseline # Remove all generated HTML reports make clean-memory-perf-reports @@ -101,13 +103,13 @@ python -m tests.perf.run_profile Run a single scenario (useful for generating data for one operation without the full suite): ```bash -python -m tests.perf.run_profile --scenario builder_sign_jpeg +python -m tests.perf.run_profile --scenario builder_sign_gif ``` With `--update-baseline`, a single-scenario run only rewrites that scenario's entry in `baseline.json`; the other scenarios' entries are preserved. ```bash -python -m tests.perf.run_profile --scenario builder_sign_jpeg --update-baseline +python -m tests.perf.run_profile --scenario builder_sign_gif --update-baseline ``` ## Configuration diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json new file mode 100644 index 00000000..41dd4b11 --- /dev/null +++ b/tests/perf/baseline.json @@ -0,0 +1,120 @@ +{ + "_meta": { + "memray_version": "1.19.3", + "python_version": "3.12.13", + "c2pa_native_version": "c2pa-v0.85.1", + "iterations": 20, + "perf_env": "python-3.12-slim", + "arch": "aarch64" + }, + "reader_jpeg_legacy": { + "peak_bytes": 3708873, + "leaked_bytes": 3176394, + "total_allocations": 201698 + }, + "reader_jpeg_with_context": { + "peak_bytes": 3701615, + "leaked_bytes": 3168500, + "total_allocations": 200473 + }, + "reader_mp4": { + "peak_bytes": 4773723, + "leaked_bytes": 3170004, + "total_allocations": 484511 + }, + "reader_wav": { + "peak_bytes": 5417548, + "leaked_bytes": 3179946, + "total_allocations": 142211 + }, + "builder_sign_jpeg_legacy": { + "peak_bytes": 7575647, + "leaked_bytes": 3275161, + "total_allocations": 166775 + }, + "builder_sign_jpeg_with_context": { + "peak_bytes": 7569823, + "leaked_bytes": 3269085, + "total_allocations": 165627 + }, + "builder_sign_png_legacy": { + "peak_bytes": 7813477, + "leaked_bytes": 3274992, + "total_allocations": 401211 + }, + "builder_sign_png_with_context": { + "peak_bytes": 7807341, + "leaked_bytes": 3269072, + "total_allocations": 400018 + }, + "builder_sign_gif": { + "peak_bytes": 14425854, + "leaked_bytes": 3267823, + "total_allocations": 1498929 + }, + "builder_sign_heic": { + "peak_bytes": 4489702, + "leaked_bytes": 3267869, + "total_allocations": 216475 + }, + "builder_sign_m4a": { + "peak_bytes": 18729821, + "leaked_bytes": 3267803, + "total_allocations": 516969 + }, + "builder_sign_webp": { + "peak_bytes": 8781442, + "leaked_bytes": 3267804, + "total_allocations": 159830 + }, + "builder_sign_avi": { + "peak_bytes": 6921294, + "leaked_bytes": 3267803, + "total_allocations": 8125369 + }, + "builder_sign_mp4": { + "peak_bytes": 6043592, + "leaked_bytes": 3267803, + "total_allocations": 424223 + }, + "builder_sign_tiff": { + "peak_bytes": 13005040, + "leaked_bytes": 3267330, + "total_allocations": 1090303 + }, + "builder_sign_jpeg_parent_of": { + "peak_bytes": 14056720, + "leaked_bytes": 3269920, + "total_allocations": 304324 + }, + "builder_sign_jpeg_component_of": { + "peak_bytes": 14058389, + "leaked_bytes": 3270187, + "total_allocations": 308806 + }, + "builder_sign_jpeg_parent_and_component": { + "peak_bytes": 14408397, + "leaked_bytes": 3434911, + "total_allocations": 494789 + }, + "builder_sign_jpeg_parent_and_component_mixed_mime": { + "peak_bytes": 14359472, + "leaked_bytes": 3270648, + "total_allocations": 552704 + }, + "builder_sign_jpeg_two_components_same_mime": { + "peak_bytes": 14389022, + "leaked_bytes": 3417992, + "total_allocations": 492787 + }, + "builder_sign_jpeg_two_components_mixed_mime": { + "peak_bytes": 14356999, + "leaked_bytes": 3270651, + "total_allocations": 550625 + }, + "builder_sign_jpeg_archive_roundtrip": { + "peak_bytes": 14096563, + "leaked_bytes": 3296281, + "total_allocations": 398663 + } +} \ No newline at end of file diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py index f5623a04..dacbb3bb 100644 --- a/tests/perf/scenarios.py +++ b/tests/perf/scenarios.py @@ -13,7 +13,7 @@ import os import sys from pathlib import Path -from c2pa import Builder, C2paSignerInfo, Reader, Signer +from c2pa import Builder, C2paSignerInfo, Context, Reader, Signer FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" READING_FIXTURES_DIR = FIXTURES_DIR / "files-for-reading-tests" @@ -110,63 +110,91 @@ def _read_file(path: Path, mime: str, iterations: int) -> None: reader.close() +# Context-API helpers: the Context is built once before the loop and reused on +# every iteration, so its settings are parsed a single time. Most scenarios use +# these. The `_legacy` jpeg/png scenarios build the Reader/Builder without a +# Context, which re-reads thread-local settings on each construction; running a +# legacy scenario against its `_with_context` pair isolates the settings cost. + +def _sign_file_context(path: Path, mime: str, iterations: int) -> None: + signer = _make_signer() + context = Context(signer=signer) # signer is consumed into the context + source_bytes = path.read_bytes() + manifest = {**MANIFEST_BASE, "format": mime} + for _ in _iterate(iterations): + source = io.BytesIO(source_bytes) + output = io.BytesIO() + builder = Builder(manifest, context=context) + # str first arg selects the context signer (c2pa_builder_sign_context). + builder.sign(mime, source, output) + + +def _read_file_context(path: Path, mime: str, iterations: int) -> None: + context = Context() + for _ in _iterate(iterations): + with open(path, "rb") as f: + reader = Reader(mime, f, manifest_data=None, context=context) + reader.json() + reader.close() + + # Reader scenarios: read manifests from files with manifests -def scenario_reader_jpeg(iterations: int = 100) -> None: +def scenario_reader_jpeg_legacy(iterations: int = 100) -> None: _read_file(SIGNED_JPEG, "image/jpeg", iterations) def scenario_reader_mp4(iterations: int = 100) -> None: - _read_file(READING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) + _read_file_context(READING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) def scenario_reader_wav(iterations: int = 100) -> None: - _read_file(READING_FIXTURES_DIR / "sample1_signed.wav", "audio/wav", iterations) + _read_file_context(READING_FIXTURES_DIR / "sample1_signed.wav", "audio/wav", iterations) # Builder.sign (without ingredients)) -def scenario_builder_sign_jpeg(iterations: int = 100) -> None: +def scenario_builder_sign_jpeg_legacy(iterations: int = 100) -> None: _sign_file(SOURCE_JPEG, "image/jpeg", iterations) def scenario_builder_sign_gif(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "sample1.gif", "image/gif", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.gif", "image/gif", iterations) def scenario_builder_sign_heic(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "sample1.heic", "image/heic", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.heic", "image/heic", iterations) def scenario_builder_sign_m4a(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "sample1.m4a", "audio/mp4", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.m4a", "audio/mp4", iterations) -def scenario_builder_sign_png(iterations: int = 100) -> None: +def scenario_builder_sign_png_legacy(iterations: int = 100) -> None: _sign_file(SIGNING_FIXTURES_DIR / "sample1.png", "image/png", iterations) def scenario_builder_sign_webp(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "sample1.webp", "image/webp", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.webp", "image/webp", iterations) def scenario_builder_sign_avi(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "test.avi", "video/x-msvideo", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "test.avi", "video/x-msvideo", iterations) def scenario_builder_sign_mp4(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations) def scenario_builder_sign_tiff(iterations: int = 100) -> None: - _sign_file(SIGNING_FIXTURES_DIR / "TUSCANY.TIF", "image/tiff", iterations) + _sign_file_context(SIGNING_FIXTURES_DIR / "TUSCANY.TIF", "image/tiff", iterations) # Builder.sign scenarios with ingredient linking def scenario_builder_sign_jpeg_parent_of(iterations: int = 100) -> None: """One parentOf ingredient linked to c2pa.opened action.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() ingredient_bytes = SIGNED_JPEG.read_bytes() manifest = { @@ -182,18 +210,18 @@ def scenario_builder_sign_jpeg_parent_of(iterations: int = 100) -> None: }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(ingredient_bytes) as ing: builder.add_ingredient( {"relationship": "parentOf", "instance_id": _PARENT_ID}, "image/jpeg", ing, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_component_of(iterations: int = 100) -> None: """One componentOf ingredient linked to c2pa.placed action.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() ingredient_bytes = SIGNED_JPEG.read_bytes() manifest = { @@ -210,18 +238,18 @@ def scenario_builder_sign_jpeg_component_of(iterations: int = 100) -> None: }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(ingredient_bytes) as ing: builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID}, "image/jpeg", ing, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_parent_and_component(iterations: int = 100) -> None: """parentOf + componentOf ingredients (both JPEG) linked to opened + placed actions.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() parent_bytes = SIGNED_JPEG.read_bytes() placed_bytes = CLOUD_JPEG.read_bytes() @@ -246,7 +274,7 @@ def scenario_builder_sign_jpeg_parent_and_component(iterations: int = 100) -> No }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2: builder.add_ingredient( {"relationship": "parentOf", "instance_id": _PARENT_ID2}, "image/jpeg", ing1, @@ -254,12 +282,12 @@ def scenario_builder_sign_jpeg_parent_and_component(iterations: int = 100) -> No builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID2}, "image/jpeg", ing2, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_parent_and_component_mixed_mime(iterations: int = 100) -> None: """parentOf JPEG + componentOf PNG linked to opened + placed actions.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() parent_bytes = SIGNED_JPEG.read_bytes() placed_bytes = SIGNING_PNG.read_bytes() @@ -284,7 +312,7 @@ def scenario_builder_sign_jpeg_parent_and_component_mixed_mime(iterations: int = }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2: builder.add_ingredient( {"relationship": "parentOf", "instance_id": _PARENT_ID3}, "image/jpeg", ing1, @@ -292,12 +320,12 @@ def scenario_builder_sign_jpeg_parent_and_component_mixed_mime(iterations: int = builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID3}, "image/png", ing2, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_two_components_same_mime(iterations: int = 100) -> None: """Two componentOf JPEG ingredients in a single c2pa.placed action.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() comp1_bytes = SIGNED_JPEG.read_bytes() comp2_bytes = CLOUD_JPEG.read_bytes() @@ -314,7 +342,7 @@ def scenario_builder_sign_jpeg_two_components_same_mime(iterations: int = 100) - }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2: builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1, @@ -322,12 +350,12 @@ def scenario_builder_sign_jpeg_two_components_same_mime(iterations: int = 100) - builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/jpeg", ing2, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_two_components_mixed_mime(iterations: int = 100) -> None: """componentOf JPEG + componentOf PNG in a single c2pa.placed action.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() comp1_bytes = SIGNED_JPEG.read_bytes() comp2_bytes = SIGNING_PNG.read_bytes() @@ -344,7 +372,7 @@ def scenario_builder_sign_jpeg_two_components_mixed_mime(iterations: int = 100) }], } for _ in _iterate(iterations): - builder = Builder(manifest) + builder = Builder(manifest, context=context) with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2: builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1, @@ -352,36 +380,56 @@ def scenario_builder_sign_jpeg_two_components_mixed_mime(iterations: int = 100) builder.add_ingredient( {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/png", ing2, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) def scenario_builder_sign_jpeg_archive_roundtrip(iterations: int = 100) -> None: """Serialize builder to archive, reload, add ingredient, sign.""" - signer = _make_signer() + context = Context(signer=_make_signer()) source_bytes = SOURCE_JPEG.read_bytes() ingredient_bytes = SIGNED_JPEG.read_bytes() for _ in _iterate(iterations): archive = io.BytesIO() Builder(MANIFEST_BASE).to_archive(archive) archive.seek(0) - builder = Builder.from_archive(archive) + # from_archive() yields a context-less Builder; to keep the Context + # (and its signer), build with the context first, then load the archive. + builder = Builder(MANIFEST_BASE, context=context).with_archive(archive) with io.BytesIO(ingredient_bytes) as ing: builder.add_ingredient( {"relationship": "parentOf", "instance_id": _PARENT_ID}, "image/jpeg", ing, ) - builder.sign(signer, "image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO()) + + +# jpeg + png context variants, paired with the `_legacy` scenarios above for +# side-by-side comparison. + +def scenario_builder_sign_jpeg_with_context(iterations: int = 100) -> None: + _sign_file_context(SOURCE_JPEG, "image/jpeg", iterations) + + +def scenario_builder_sign_png_with_context(iterations: int = 100) -> None: + _sign_file_context(SIGNING_PNG, "image/png", iterations) + + +def scenario_reader_jpeg_with_context(iterations: int = 100) -> None: + _read_file_context(SIGNED_JPEG, "image/jpeg", iterations) SCENARIOS = { - "reader_jpeg": scenario_reader_jpeg, + "reader_jpeg_legacy": scenario_reader_jpeg_legacy, + "reader_jpeg_with_context": scenario_reader_jpeg_with_context, "reader_mp4": scenario_reader_mp4, "reader_wav": scenario_reader_wav, - "builder_sign_jpeg": scenario_builder_sign_jpeg, + "builder_sign_jpeg_legacy": scenario_builder_sign_jpeg_legacy, + "builder_sign_jpeg_with_context": scenario_builder_sign_jpeg_with_context, + "builder_sign_png_legacy": scenario_builder_sign_png_legacy, + "builder_sign_png_with_context": scenario_builder_sign_png_with_context, "builder_sign_gif": scenario_builder_sign_gif, "builder_sign_heic": scenario_builder_sign_heic, "builder_sign_m4a": scenario_builder_sign_m4a, - "builder_sign_png": scenario_builder_sign_png, "builder_sign_webp": scenario_builder_sign_webp, "builder_sign_avi": scenario_builder_sign_avi, "builder_sign_mp4": scenario_builder_sign_mp4, From 8d1afe5153a2abaf3297667c3b86459b926aa018 Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 21:04:01 -0700 Subject: [PATCH 07/12] fix: Updated tests --- tests/perf/README.md | 2 + tests/perf/scenarios.py | 93 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/tests/perf/README.md b/tests/perf/README.md index f676a54d..8ce24b6f 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -23,6 +23,8 @@ make memory-use-bench MEMRAY_ITERATIONS=1000 Most scenarios use the Context API: they build a `Context` once and reuse it across iterations, so its settings are parsed a single time. The jpeg and png cases also keep a `_legacy` variant that builds the `Reader`/`Builder` without a `Context`, which re-reads the thread-local settings on each construction. Running a pair (for example `builder_sign_jpeg_legacy` and `builder_sign_jpeg_with_context`) compares the two paths. +The `builder_sign_{jpeg,png}_parallel_*` scenarios build one `Context` and share it across 10 threads that sign concurrently, each with its own streams and `Builder`. The name encodes two axes. `split` divides the iteration budget across the threads, so total work matches a single-threaded scenario; `full` runs the full loop on each of the 10 threads, so total work is 10x (use these with `SCENARIO=` rather than the whole suite). `pool` runs the threads through a `ThreadPoolExecutor`; `barrier` starts all 10 at once with a `threading.Barrier`. + ## Environments Select the target environment with `PERF_ENV` (default: `python-3.12-slim`): diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py index dacbb3bb..cd464f8e 100644 --- a/tests/perf/scenarios.py +++ b/tests/perf/scenarios.py @@ -12,6 +12,8 @@ import io import os import sys +import threading +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from c2pa import Builder, C2paSignerInfo, Context, Reader, Signer @@ -138,6 +140,64 @@ def _read_file_context(path: Path, mime: str, iterations: int) -> None: reader.close() +# Parallel signing: one Context built once and shared across threads. Each +# thread uses its own BytesIO source/dest and its own Builder per sign; the +# Context (and its signer) is only read. This exercises Context thread-safety +# under concurrent signing. + +_PARALLEL_THREADS = 10 + + +def _sign_parallel(path: Path, mime: str, iterations: int, *, + per_thread_full: bool, launch: str) -> None: + """Sign from `_PARALLEL_THREADS` threads sharing one Context. + + per_thread_full=False: the iteration budget is split across threads (each + does iterations // _PARALLEL_THREADS), so total work matches the + single-threaded scenarios. + per_thread_full=True: each thread runs the full `iterations` loop, so total + work is _PARALLEL_THREADS x iterations (aggregate concurrent load). + launch="pool": ThreadPoolExecutor(max_workers=_PARALLEL_THREADS). + launch="barrier": threads released together by a Barrier so all signs run + simultaneously (peak Context contention). + """ + signer = _make_signer() + context = Context(signer=signer) # built once, shared, kept open + source_bytes = path.read_bytes() + manifest = {**MANIFEST_BASE, "format": mime} + + per_thread = ( + iterations if per_thread_full + else max(1, iterations // _PARALLEL_THREADS) + ) + + def work(barrier=None): + if barrier is not None: + barrier.wait() # release all threads at once + for _ in range(per_thread): + source = io.BytesIO(source_bytes) # per-thread, never shared + output = io.BytesIO() + builder = Builder(manifest, context=context) + # str first arg selects the context signer. + builder.sign(mime, source, output) + + if launch == "pool": + with ThreadPoolExecutor(max_workers=_PARALLEL_THREADS) as ex: + futures = [ex.submit(work) for _ in range(_PARALLEL_THREADS)] + for f in futures: + f.result() # surface exceptions from worker threads + else: # barrier + barrier = threading.Barrier(_PARALLEL_THREADS) + threads = [ + threading.Thread(target=work, args=(barrier,)) + for _ in range(_PARALLEL_THREADS) + ] + for t in threads: + t.start() + for t in threads: + t.join() + + # Reader scenarios: read manifests from files with manifests def scenario_reader_jpeg_legacy(iterations: int = 100) -> None: @@ -418,6 +478,33 @@ def scenario_reader_jpeg_with_context(iterations: int = 100) -> None: _read_file_context(SIGNED_JPEG, "image/jpeg", iterations) +# Parallel signing variants: one shared Context across 10 threads. +# {split, full} x {pool, barrier} x {jpeg, png}. + +def scenario_builder_sign_jpeg_parallel_split_pool(iterations: int = 100) -> None: + _sign_parallel(SOURCE_JPEG, "image/jpeg", iterations, per_thread_full=False, launch="pool") + + +def scenario_builder_sign_jpeg_parallel_split_barrier(iterations: int = 100) -> None: + _sign_parallel(SOURCE_JPEG, "image/jpeg", iterations, per_thread_full=False, launch="barrier") + + +def scenario_builder_sign_png_parallel_split_pool(iterations: int = 100) -> None: + _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=False, launch="pool") + + +def scenario_builder_sign_png_parallel_split_barrier(iterations: int = 100) -> None: + _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=False, launch="barrier") + + +def scenario_builder_sign_png_parallel_full_pool(iterations: int = 100) -> None: + _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=True, launch="pool") + + +def scenario_builder_sign_png_parallel_full_barrier(iterations: int = 100) -> None: + _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=True, launch="barrier") + + SCENARIOS = { "reader_jpeg_legacy": scenario_reader_jpeg_legacy, "reader_jpeg_with_context": scenario_reader_jpeg_with_context, @@ -427,6 +514,12 @@ def scenario_reader_jpeg_with_context(iterations: int = 100) -> None: "builder_sign_jpeg_with_context": scenario_builder_sign_jpeg_with_context, "builder_sign_png_legacy": scenario_builder_sign_png_legacy, "builder_sign_png_with_context": scenario_builder_sign_png_with_context, + "builder_sign_jpeg_parallel_split_pool": scenario_builder_sign_jpeg_parallel_split_pool, + "builder_sign_jpeg_parallel_split_barrier": scenario_builder_sign_jpeg_parallel_split_barrier, + "builder_sign_png_parallel_split_pool": scenario_builder_sign_png_parallel_split_pool, + "builder_sign_png_parallel_split_barrier": scenario_builder_sign_png_parallel_split_barrier, + "builder_sign_png_parallel_full_pool": scenario_builder_sign_png_parallel_full_pool, + "builder_sign_png_parallel_full_barrier": scenario_builder_sign_png_parallel_full_barrier, "builder_sign_gif": scenario_builder_sign_gif, "builder_sign_heic": scenario_builder_sign_heic, "builder_sign_m4a": scenario_builder_sign_m4a, From 68f825d93166ec0c5fd2c082a5b5f0480943e73e Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 21:34:48 -0700 Subject: [PATCH 08/12] fix: Updated tests --- tests/perf/README.md | 16 +++- tests/perf/baseline.json | 164 +++++++++++++++++++++++---------------- 2 files changed, 109 insertions(+), 71 deletions(-) diff --git a/tests/perf/README.md b/tests/perf/README.md index 8ce24b6f..fa73a0fd 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -1,4 +1,4 @@ -# Memory Profiling Harness +# Memory profiling framework Uses [memray](https://github.com/bloomberg/memray) to track peak memory, allocation patterns, and memory leaks across c2pa-python read and sign operations. @@ -11,7 +11,7 @@ and memory leaks across c2pa-python read and sign operations. | `run_profile.py` | Memory performance/usage analysis. Runs each scenario under `memray`, generates HTML reports, reads metrics, and compares against `baseline.json`. | | `Dockerfiles/` | One Dockerfile per target environment. Selected via `PERF_ENV` at `make` time when running the memory analysis. | | `entrypoint.sh` | Container entrypoint. Downloads the Linux native `libc2pa_c.so` at startup into the volume-mounted workspace so it sticks around even through the `-v` mount. | -| `reports/` | Generated HTML flamegraphs (gitignored). Three files per scenario: `-peak.html` (peak/high-water view), `-leaks.html` (leak view), and `-temporary.html` (temporary-allocations view). | +| `reports/` | Generated HTML reports (gitignored). Three files per scenario: `-peak.html` (peak/high-water view), `-leaks.html` (leak view), and `-temporary.html` (temporary-allocations view). | ## Scenarios @@ -162,7 +162,7 @@ The `_meta` block records which toolchain produced the baseline so the numbers a | `_meta` field | Meaning | | --- | --- | | `memray_version` | memray version that generated the metrics | -| `python_version` | Python version that ran the test harness | +| `python_version` | Python version that ran the test framework | | `c2pa_native_version` | native `libc2pa_c` version (from `c2pa-native-version.txt`) | | `iterations` | `MEMRAY_ITERATIONS` used for the run | | `perf_env` | `PERF_ENV` (target environment) | @@ -194,11 +194,19 @@ make memory-use-bench MEMRAY_ITERATIONS=1000 PERF_ARGS=--update-baseline If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. +### Reading the "Resident set size over time" graph (why memory looks like it climbs) + +The "Resident set size over time" plot (chart icon, top-right of the report) draws two lines. "Resident size" (RSS) is every page the OS counts as resident: interpreter, `libc2pa_c`, thread stacks, and pages the allocator holds but has not returned. "Heap size" is only the live tracked allocations. + +On the parallel scenarios the RSS line steps up and stays high. The threads each hold their own source, output, and `Builder` live at once, so RSS rises to cover that combined working set (the steps line up with the moments all threads overlap). The allocator then keeps those arena pages for reuse instead of returning them, so RSS plateaus at the high-water mark. + +Judge leaks by the heap line. The heap rises early and then settles or falls, the same shape as the single-threaded baseline. A within-run heap rise is not by itself proof of a leak (the allocator high-water can climb and settle within a bounded run). + ### Temporary allocations `-temporary.html` shows temporary allocations, meaning memory that is allocated and then freed almost immediately (memray's threshold is one allocation: a block is temporary if it is freed before more than one other allocation happens). The memory is returned, so these are not leaks, but they are churn: high allocation and free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat. -Open the file in a browser to see which call sites are responsible. The view may be sparse or empty if a scenario does little churn, which is a valid result. Temporary allocations are not part of the baseline regression check; the graph is a debugging aid only. +Open the file in a browser to see which call sites are responsible. The view may be sparse or empty if a scenario does little churn. Note that temporary allocations are not part of the baseline regression check: that graph is a debugging aid only. ### When to update the baseline diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json index 41dd4b11..cd7db163 100644 --- a/tests/perf/baseline.json +++ b/tests/perf/baseline.json @@ -3,118 +3,148 @@ "memray_version": "1.19.3", "python_version": "3.12.13", "c2pa_native_version": "c2pa-v0.85.1", - "iterations": 20, + "iterations": 100, "perf_env": "python-3.12-slim", "arch": "aarch64" }, "reader_jpeg_legacy": { - "peak_bytes": 3708873, - "leaked_bytes": 3176394, - "total_allocations": 201698 + "peak_bytes": 3814601, + "leaked_bytes": 3266541, + "total_allocations": 698915 }, "reader_jpeg_with_context": { - "peak_bytes": 3701615, - "leaked_bytes": 3168500, - "total_allocations": 200473 + "peak_bytes": 3807522, + "leaked_bytes": 3259634, + "total_allocations": 692971 }, "reader_mp4": { - "peak_bytes": 4773723, - "leaked_bytes": 3170004, - "total_allocations": 484511 + "peak_bytes": 4877918, + "leaked_bytes": 3259562, + "total_allocations": 2113007 }, "reader_wav": { - "peak_bytes": 5417548, - "leaked_bytes": 3179946, - "total_allocations": 142211 + "peak_bytes": 5521743, + "leaked_bytes": 3269504, + "total_allocations": 400387 }, "builder_sign_jpeg_legacy": { - "peak_bytes": 7575647, - "leaked_bytes": 3275161, - "total_allocations": 166775 + "peak_bytes": 7695839, + "leaked_bytes": 3385100, + "total_allocations": 522543 }, "builder_sign_jpeg_with_context": { - "peak_bytes": 7569823, - "leaked_bytes": 3269085, - "total_allocations": 165627 + "peak_bytes": 7688843, + "leaked_bytes": 3377852, + "total_allocations": 516747 }, "builder_sign_png_legacy": { - "peak_bytes": 7813477, - "leaked_bytes": 3274992, - "total_allocations": 401211 + "peak_bytes": 7933706, + "leaked_bytes": 3385542, + "total_allocations": 1694638 }, "builder_sign_png_with_context": { - "peak_bytes": 7807341, - "leaked_bytes": 3269072, - "total_allocations": 400018 + "peak_bytes": 7926495, + "leaked_bytes": 3377973, + "total_allocations": 1688857 + }, + "builder_sign_jpeg_parallel_split_pool": { + "peak_bytes": 45766622, + "leaked_bytes": 3819391, + "total_allocations": 528677 + }, + "builder_sign_jpeg_parallel_split_barrier": { + "peak_bytes": 45734302, + "leaked_bytes": 3809638, + "total_allocations": 527440 + }, + "builder_sign_png_parallel_split_pool": { + "peak_bytes": 46004425, + "leaked_bytes": 3820410, + "total_allocations": 1700717 + }, + "builder_sign_png_parallel_split_barrier": { + "peak_bytes": 45972521, + "leaked_bytes": 3814097, + "total_allocations": 1699504 + }, + "builder_sign_png_parallel_full_pool": { + "peak_bytes": 46495487, + "leaked_bytes": 3893924, + "total_allocations": 16164989 + }, + "builder_sign_png_parallel_full_barrier": { + "peak_bytes": 45972519, + "leaked_bytes": 3870183, + "total_allocations": 16163593 }, "builder_sign_gif": { - "peak_bytes": 14425854, - "leaked_bytes": 3267823, - "total_allocations": 1498929 + "peak_bytes": 14545919, + "leaked_bytes": 3378154, + "total_allocations": 7183245 }, "builder_sign_heic": { - "peak_bytes": 4489702, - "leaked_bytes": 3267869, - "total_allocations": 216475 + "peak_bytes": 4609191, + "leaked_bytes": 3378224, + "total_allocations": 771063 }, "builder_sign_m4a": { - "peak_bytes": 18729821, - "leaked_bytes": 3267803, - "total_allocations": 516969 + "peak_bytes": 18849432, + "leaked_bytes": 3378282, + "total_allocations": 2273533 }, "builder_sign_webp": { - "peak_bytes": 8781442, - "leaked_bytes": 3267804, - "total_allocations": 159830 + "peak_bytes": 8901434, + "leaked_bytes": 3378224, + "total_allocations": 487631 }, "builder_sign_avi": { - "peak_bytes": 6921294, - "leaked_bytes": 3267803, - "total_allocations": 8125369 + "peak_bytes": 7040821, + "leaked_bytes": 3378012, + "total_allocations": 40315491 }, "builder_sign_mp4": { - "peak_bytes": 6043592, - "leaked_bytes": 3267803, - "total_allocations": 424223 + "peak_bytes": 6163287, + "leaked_bytes": 3378176, + "total_allocations": 1809680 }, "builder_sign_tiff": { - "peak_bytes": 13005040, - "leaked_bytes": 3267330, - "total_allocations": 1090303 + "peak_bytes": 13125777, + "leaked_bytes": 3378214, + "total_allocations": 5139996 }, "builder_sign_jpeg_parent_of": { - "peak_bytes": 14056720, - "leaked_bytes": 3269920, - "total_allocations": 304324 + "peak_bytes": 14174948, + "leaked_bytes": 3379432, + "total_allocations": 1209913 }, "builder_sign_jpeg_component_of": { - "peak_bytes": 14058389, - "leaked_bytes": 3270187, - "total_allocations": 308806 + "peak_bytes": 14176483, + "leaked_bytes": 3379509, + "total_allocations": 1232398 }, "builder_sign_jpeg_parent_and_component": { - "peak_bytes": 14408397, - "leaked_bytes": 3434911, - "total_allocations": 494789 + "peak_bytes": 14524112, + "leaked_bytes": 3476721, + "total_allocations": 2160920 }, "builder_sign_jpeg_parent_and_component_mixed_mime": { - "peak_bytes": 14359472, - "leaked_bytes": 3270648, - "total_allocations": 552704 + "peak_bytes": 14476564, + "leaked_bytes": 3380647, + "total_allocations": 2451479 }, "builder_sign_jpeg_two_components_same_mime": { - "peak_bytes": 14389022, - "leaked_bytes": 3417992, - "total_allocations": 492787 + "peak_bytes": 14520586, + "leaked_bytes": 3475529, + "total_allocations": 2150809 }, "builder_sign_jpeg_two_components_mixed_mime": { - "peak_bytes": 14356999, - "leaked_bytes": 3270651, - "total_allocations": 550625 + "peak_bytes": 14474050, + "leaked_bytes": 3379259, + "total_allocations": 2441187 }, "builder_sign_jpeg_archive_roundtrip": { - "peak_bytes": 14096563, - "leaked_bytes": 3296281, - "total_allocations": 398663 + "peak_bytes": 14229441, + "leaked_bytes": 3429100, + "total_allocations": 1680242 } } \ No newline at end of file From d4a972011c4345181d45cdbfb7de7789e45332dc Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 21:44:29 -0700 Subject: [PATCH 09/12] fix: Updated tests --- tests/perf/baseline.json | 166 ++++++++++++++++++--------------------- tests/perf/scenarios.py | 10 --- 2 files changed, 78 insertions(+), 98 deletions(-) diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json index cd7db163..302d648a 100644 --- a/tests/perf/baseline.json +++ b/tests/perf/baseline.json @@ -8,143 +8,133 @@ "arch": "aarch64" }, "reader_jpeg_legacy": { - "peak_bytes": 3814601, - "leaked_bytes": 3266541, - "total_allocations": 698915 + "peak_bytes": 3814421, + "leaked_bytes": 3266116, + "total_allocations": 698899 }, "reader_jpeg_with_context": { - "peak_bytes": 3807522, - "leaked_bytes": 3259634, - "total_allocations": 692971 + "peak_bytes": 3822953, + "leaked_bytes": 3257471, + "total_allocations": 692953 }, "reader_mp4": { - "peak_bytes": 4877918, - "leaked_bytes": 3259562, - "total_allocations": 2113007 + "peak_bytes": 4876441, + "leaked_bytes": 3257485, + "total_allocations": 2112991 }, "reader_wav": { - "peak_bytes": 5521743, - "leaked_bytes": 3269504, - "total_allocations": 400387 + "peak_bytes": 5520266, + "leaked_bytes": 3267427, + "total_allocations": 400371 }, "builder_sign_jpeg_legacy": { - "peak_bytes": 7695839, - "leaked_bytes": 3385100, - "total_allocations": 522543 + "peak_bytes": 7695310, + "leaked_bytes": 3383623, + "total_allocations": 522425 }, "builder_sign_jpeg_with_context": { - "peak_bytes": 7688843, - "leaked_bytes": 3377852, - "total_allocations": 516747 + "peak_bytes": 7688236, + "leaked_bytes": 3376293, + "total_allocations": 516851 }, "builder_sign_png_legacy": { - "peak_bytes": 7933706, - "leaked_bytes": 3385542, - "total_allocations": 1694638 + "peak_bytes": 7932767, + "leaked_bytes": 3383648, + "total_allocations": 1694629 }, "builder_sign_png_with_context": { - "peak_bytes": 7926495, - "leaked_bytes": 3377973, - "total_allocations": 1688857 + "peak_bytes": 7925490, + "leaked_bytes": 3376452, + "total_allocations": 1688908 }, "builder_sign_jpeg_parallel_split_pool": { - "peak_bytes": 45766622, - "leaked_bytes": 3819391, - "total_allocations": 528677 + "peak_bytes": 45764159, + "leaked_bytes": 3818113, + "total_allocations": 528785 }, "builder_sign_jpeg_parallel_split_barrier": { - "peak_bytes": 45734302, - "leaked_bytes": 3809638, - "total_allocations": 527440 + "peak_bytes": 46225287, + "leaked_bytes": 3809216, + "total_allocations": 527412 }, "builder_sign_png_parallel_split_pool": { - "peak_bytes": 46004425, - "leaked_bytes": 3820410, - "total_allocations": 1700717 + "peak_bytes": 46002549, + "leaked_bytes": 3817801, + "total_allocations": 1700731 }, "builder_sign_png_parallel_split_barrier": { - "peak_bytes": 45972521, - "leaked_bytes": 3814097, - "total_allocations": 1699504 - }, - "builder_sign_png_parallel_full_pool": { - "peak_bytes": 46495487, - "leaked_bytes": 3893924, - "total_allocations": 16164989 - }, - "builder_sign_png_parallel_full_barrier": { - "peak_bytes": 45972519, - "leaked_bytes": 3870183, - "total_allocations": 16163593 + "peak_bytes": 45970433, + "leaked_bytes": 3812044, + "total_allocations": 1699396 }, "builder_sign_gif": { - "peak_bytes": 14545919, - "leaked_bytes": 3378154, - "total_allocations": 7183245 + "peak_bytes": 14544515, + "leaked_bytes": 3375865, + "total_allocations": 7183237 }, "builder_sign_heic": { - "peak_bytes": 4609191, - "leaked_bytes": 3378224, - "total_allocations": 771063 + "peak_bytes": 4608484, + "leaked_bytes": 3376030, + "total_allocations": 771079 }, "builder_sign_m4a": { - "peak_bytes": 18849432, - "leaked_bytes": 3378282, - "total_allocations": 2273533 + "peak_bytes": 18849082, + "leaked_bytes": 3376431, + "total_allocations": 2273497 }, "builder_sign_webp": { - "peak_bytes": 8901434, - "leaked_bytes": 3378224, - "total_allocations": 487631 + "peak_bytes": 8900701, + "leaked_bytes": 3376432, + "total_allocations": 487683 }, "builder_sign_avi": { - "peak_bytes": 7040821, - "leaked_bytes": 3378012, - "total_allocations": 40315491 + "peak_bytes": 7040387, + "leaked_bytes": 3376267, + "total_allocations": 40315553 }, "builder_sign_mp4": { - "peak_bytes": 6163287, - "leaked_bytes": 3378176, - "total_allocations": 1809680 + "peak_bytes": 6162851, + "leaked_bytes": 3376431, + "total_allocations": 1809672 }, "builder_sign_tiff": { - "peak_bytes": 13125777, - "leaked_bytes": 3378214, - "total_allocations": 5139996 + "peak_bytes": 13124728, + "leaked_bytes": 3376268, + "total_allocations": 5139967 }, "builder_sign_jpeg_parent_of": { - "peak_bytes": 14174948, - "leaked_bytes": 3379432, - "total_allocations": 1209913 + "peak_bytes": 14173992, + "leaked_bytes": 3377656, + "total_allocations": 1209933 }, "builder_sign_jpeg_component_of": { - "peak_bytes": 14176483, - "leaked_bytes": 3379509, - "total_allocations": 1232398 + "peak_bytes": 14175518, + "leaked_bytes": 3377891, + "total_allocations": 1232336 }, "builder_sign_jpeg_parent_and_component": { - "peak_bytes": 14524112, - "leaked_bytes": 3476721, - "total_allocations": 2160920 + "peak_bytes": 14530406, + "leaked_bytes": 3474418, + "total_allocations": 2160934 }, "builder_sign_jpeg_parent_and_component_mixed_mime": { - "peak_bytes": 14476564, - "leaked_bytes": 3380647, - "total_allocations": 2451479 + "peak_bytes": 14476171, + "leaked_bytes": 3378735, + "total_allocations": 2451587 }, "builder_sign_jpeg_two_components_same_mime": { - "peak_bytes": 14520586, - "leaked_bytes": 3475529, - "total_allocations": 2150809 + "peak_bytes": 14519270, + "leaked_bytes": 3473673, + "total_allocations": 2150782 }, "builder_sign_jpeg_two_components_mixed_mime": { - "peak_bytes": 14474050, - "leaked_bytes": 3379259, - "total_allocations": 2441187 + "peak_bytes": 14473127, + "leaked_bytes": 3377445, + "total_allocations": 2441195 }, "builder_sign_jpeg_archive_roundtrip": { - "peak_bytes": 14229441, - "leaked_bytes": 3429100, - "total_allocations": 1680242 + "peak_bytes": 14226832, + "leaked_bytes": 3426491, + "total_allocations": 1680290 } } \ No newline at end of file diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py index cd464f8e..0432aa20 100644 --- a/tests/perf/scenarios.py +++ b/tests/perf/scenarios.py @@ -497,14 +497,6 @@ def scenario_builder_sign_png_parallel_split_barrier(iterations: int = 100) -> N _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=False, launch="barrier") -def scenario_builder_sign_png_parallel_full_pool(iterations: int = 100) -> None: - _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=True, launch="pool") - - -def scenario_builder_sign_png_parallel_full_barrier(iterations: int = 100) -> None: - _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=True, launch="barrier") - - SCENARIOS = { "reader_jpeg_legacy": scenario_reader_jpeg_legacy, "reader_jpeg_with_context": scenario_reader_jpeg_with_context, @@ -518,8 +510,6 @@ def scenario_builder_sign_png_parallel_full_barrier(iterations: int = 100) -> No "builder_sign_jpeg_parallel_split_barrier": scenario_builder_sign_jpeg_parallel_split_barrier, "builder_sign_png_parallel_split_pool": scenario_builder_sign_png_parallel_split_pool, "builder_sign_png_parallel_split_barrier": scenario_builder_sign_png_parallel_split_barrier, - "builder_sign_png_parallel_full_pool": scenario_builder_sign_png_parallel_full_pool, - "builder_sign_png_parallel_full_barrier": scenario_builder_sign_png_parallel_full_barrier, "builder_sign_gif": scenario_builder_sign_gif, "builder_sign_heic": scenario_builder_sign_heic, "builder_sign_m4a": scenario_builder_sign_m4a, From 8ddc6e3782499127e3b08463a3e615eeab15e173 Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 22:18:04 -0700 Subject: [PATCH 10/12] Improve clarity on memory leak and allocation details Clarify explanations regarding memory leaks and temporary allocations in the README. --- tests/perf/README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/perf/README.md b/tests/perf/README.md index fa73a0fd..f6a7c09e 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -180,7 +180,7 @@ The `_meta` block records which toolchain produced the baseline so the numbers a You might expect the baseline to show `leaked_bytes: 0`. In practice it never does. When the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching, so memray sees them as "never freed" even though they are not leaking. -A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead rather than a leak, since it does not grow with the work that ran. If signing 1000 images gave you 64 MB leaked, that would be a leak, because the leaked memory grows with the work executed. +A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead rather than a leak, since it does not grow with the work that ran. If signing 1000 images gave you 64 MB leaked, that would be a leak, as the leaked memory grows with the work executed. The baseline captures this expected static overhead. Future runs compare against it: if `leaked_bytes` grows beyond the baseline by more than 10%, the run fails. @@ -192,11 +192,11 @@ Run with a higher iteration count than default (100) and compare: make memory-use-bench MEMRAY_ITERATIONS=1000 PERF_ARGS=--update-baseline ``` -If `leaked_bytes` stays flat compared to a 100-iteration run, there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. +If `leaked_bytes` stays flat compared to a baseline run or in a larger run (more iterations), there is no leak. If it scales with iterations, open `tests/perf/reports/-leaks.html` in a browser to see which function is responsible. ### Reading the "Resident set size over time" graph (why memory looks like it climbs) -The "Resident set size over time" plot (chart icon, top-right of the report) draws two lines. "Resident size" (RSS) is every page the OS counts as resident: interpreter, `libc2pa_c`, thread stacks, and pages the allocator holds but has not returned. "Heap size" is only the live tracked allocations. +The "Resident set size over time" plot (chart icon, top-right of the report) draws two lines. "Resident size" (RSS) is every page the OS counts as resident: interpreter and pages the allocator holds but has not returned. "Heap size" is only the live tracked allocations. On the parallel scenarios the RSS line steps up and stays high. The threads each hold their own source, output, and `Builder` live at once, so RSS rises to cover that combined working set (the steps line up with the moments all threads overlap). The allocator then keeps those arena pages for reuse instead of returning them, so RSS plateaus at the high-water mark. @@ -206,8 +206,6 @@ Judge leaks by the heap line. The heap rises early and then settles or falls, th `-temporary.html` shows temporary allocations, meaning memory that is allocated and then freed almost immediately (memray's threshold is one allocation: a block is temporary if it is freed before more than one other allocation happens). The memory is returned, so these are not leaks, but they are churn: high allocation and free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat. -Open the file in a browser to see which call sites are responsible. The view may be sparse or empty if a scenario does little churn. Note that temporary allocations are not part of the baseline regression check: that graph is a debugging aid only. - ### When to update the baseline Update `baseline.json` after any intentional change that affects memory use: From 7d0abf23eba135076e2abfeb01325029f12fd81e Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Tue, 2 Jun 2026 22:19:02 -0700 Subject: [PATCH 11/12] Clarify memory profiling framework documentation Updated documentation to reflect SDK operations instead of just read and sign operations. --- tests/perf/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/perf/README.md b/tests/perf/README.md index f6a7c09e..38ba803e 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -1,7 +1,7 @@ # Memory profiling framework Uses [memray](https://github.com/bloomberg/memray) to track peak memory, allocation patterns, -and memory leaks across c2pa-python read and sign operations. +and memory leaks across c2pa-python SDK operations. ## Files From 66cd8f53fb54c8efff2c78e57ffc8e417a10017a Mon Sep 17 00:00:00 2001 From: tmathern <60901087+tmathern@users.noreply.github.com> Date: Wed, 3 Jun 2026 09:46:37 -0700 Subject: [PATCH 12/12] fix: Additional fixes and changes (#274) * fix: Updated code * fix: Updated code --- src/c2pa/c2pa.py | 31 ++++++++++++++++++++++--- tests/perf/README.md | 36 +++++++++++++++++++++++++++++ tests/perf/run_profile.py | 48 ++++++++++++++++++++++++++++++++------- 3 files changed, 104 insertions(+), 11 deletions(-) diff --git a/src/c2pa/c2pa.py b/src/c2pa/c2pa.py index 4ada020d..9dffdcb6 100644 --- a/src/c2pa/c2pa.py +++ b/src/c2pa/c2pa.py @@ -1812,7 +1812,26 @@ def read_callback(ctx, data, length): if not data or length <= 0: return -1 - buffer = self._file_like_stream.read(length) + stream = self._file_like_stream + readinto = getattr(stream, "readinto", None) + if readinto is not None: + # Zero-copy fast path. from_address wraps the native buffer + # Rust handed us (size == length) without a cast object and + # without allocating a length-sized bytes. readinto fills it + # directly and returns the byte count, so there is no + # intermediate bytes, no len()/min(), and no memmove copy. + # Every binary stream (BytesIO, BufferedReader, FileIO, + # BufferedRandom) implements readinto; only text-mode or + # custom duck-typed streams lack it and fall through below. + # data is a POINTER(c_uint8); addressof(.contents) gives the + # raw int address from_address needs (no cast object). + buf = (ctypes.c_char * length).from_address( + ctypes.addressof(data.contents)) + n = readinto(buf) + return n if n else 0 + + # Fallback for streams without readinto. + buffer = stream.read(length) if not buffer: # EOF return 0 @@ -1846,8 +1865,14 @@ def seek_callback(ctx, offset, whence): if not self._initialized or self._closed: return -1 try: - file_stream.seek(offset, whence) - return file_stream.tell() + # io.IOBase.seek returns the new absolute position, which is + # exactly what the Rust seek callback expects (see + # c2pa_stream.rs). Use it directly and skip a separate tell() + # call, which would allocate another Python int on every seek. + # Fall back to tell() only for stream objects that do not honor + # the return-value contract and return None. + pos = file_stream.seek(offset, whence) + return pos if pos is not None else file_stream.tell() except Exception: return -1 diff --git a/tests/perf/README.md b/tests/perf/README.md index 38ba803e..1f2ec022 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -95,6 +95,8 @@ Why it's useful: temporary allocations are not leaks, since the memory is return How to read it: wide frames are the biggest sources of throwaway allocations. The view may be sparse or empty for a scenario that does little churn, which is itself a valid result. See [Temporary allocations](#temporary-allocations). +The temporary view is the heaviest to render: memray holds every allocation and free to decide which are short-lived. On a very large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario) the render can run out of memory and fail. The run does not abort in that case; it records what failed and keeps going. See [Troubleshooting](#troubleshooting). + ## Running without Docker (if memray is supported and installed locally) ```bash @@ -215,3 +217,37 @@ make memory-use-bench PERF_ARGS=--update-baseline ``` Commit the updated `baseline.json` alongside the code change, so it becomes the new reference to compare against. + +## Troubleshooting + +### A flamegraph render fails with `exit -9` + +You may see a message like `flamegraph render failed for reader_mp4-...-temporary.html (killed (likely OOM))`. The `-9` is SIGKILL: the operating system's out-of-memory killer terminated the `memray flamegraph` subprocess. The temporary view is the heaviest to render, and on a large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario such as `reader_mp4`) it can exhaust available memory. + +The run does not abort. The capture and the metrics (`peak_bytes`, `leaked_bytes`, `total_allocations`) are read separately and are still recorded, the baseline is still written, and the run lists every failed render at the end. Only the HTML render is missing, and you have two ways to regenerate it. + +#### Option A: rerun the one scenario + +A single-scenario run renders one capture at a time with nothing else resident, so it often fits where the full suite did not: + +```bash +make memory-use-bench SCENARIO=reader_mp4 +``` + +If it still runs out of memory, lower the iteration count to shrink the capture: + +```bash +make memory-use-bench SCENARIO=reader_mp4 MEMRAY_ITERATIONS=20 +``` + +A lower iteration count makes that scenario's absolute allocation numbers no longer directly comparable to a full 100-iteration run. + +#### Option B: re-render the kept capture (no re-profiling) + +When a render fails, the run keeps that scenario's capture as `reports/-.bin`. Re-render just the failed view from that file with a higher temporary-allocation threshold, which cuts how much memray holds in memory so the render fits. This uses the original run's data, so the result stays comparable to the rest of the run: + +```bash +python3 -m memray flamegraph reports/reader_mp4-python-3.12-slim.bin \ + -o reports/reader_mp4-python-3.12-slim-temporary.html \ + --temporary-allocations --temporary-allocation-threshold=10 --force +``` diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py index c47b7afe..31593967 100644 --- a/tests/perf/run_profile.py +++ b/tests/perf/run_profile.py @@ -27,6 +27,7 @@ import argparse import json import os +import shutil import subprocess import sys import tempfile @@ -74,7 +75,7 @@ def _run_scenario_under_memray(name: str, bin_path: Path) -> None: sys.exit(1) -def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> None: +def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> bool: """Render one flamegraph view of a capture file. mode: @@ -94,8 +95,13 @@ def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> print(f" flamegraph ({mode})...", flush=True) result = subprocess.run(cmd, text=True) if result.returncode != 0: - print(f" flamegraph generation failed for {out_path.name} (exit {result.returncode})", file=sys.stderr) - sys.exit(1) + # -9 is SIGKILL, almost always the OOM killer reaping the heavy + # temporary render on a large capture. Do not abort the whole run: + # the capture and metrics are recorded separately and still good. + reason = "killed (likely OOM)" if result.returncode == -9 else f"exit {result.returncode}" + print(f" flamegraph {mode} render failed for {out_path.name} ({reason})", file=sys.stderr) + return False + return True # get_allocation_records() yields deallocation records too... @@ -189,6 +195,7 @@ def main() -> None: results: dict = {} failures: list[str] = [] + render_failures: list[dict] = [] total = len(scenarios_to_run) for idx, name in enumerate(scenarios_to_run, 1): @@ -197,18 +204,25 @@ def main() -> None: with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp: bin_path = Path(tmp.name) + env_tag = f"-{PERF_ENV}" if PERF_ENV else "" + scenario_render_failed = False + failed_modes: list[dict] = [] try: print(f" profiling...") _run_scenario_under_memray(name, bin_path) - env_tag = f"-{PERF_ENV}" if PERF_ENV else "" peak_html = REPORTS_DIR / f"{name}{env_tag}-peak.html" leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html" temporary_html = REPORTS_DIR / f"{name}{env_tag}-temporary.html" print(f" generating flamegraphs (peak + leaks + temporary)...") - _generate_flamegraph(bin_path, peak_html, mode="peak") - _generate_flamegraph(bin_path, leaks_html, mode="leaks") - _generate_flamegraph(bin_path, temporary_html, mode="temporary") + scenario_render_failed = False + failed_modes: list[dict] = [] + for mode, html in (("peak", peak_html), + ("leaks", leaks_html), + ("temporary", temporary_html)): + if not _generate_flamegraph(bin_path, html, mode=mode): + scenario_render_failed = True + failed_modes.append({"name": name, "mode": mode, "html": html.name}) print(f" reading metrics...", flush=True) metrics = _read_metrics(bin_path) @@ -234,7 +248,17 @@ def main() -> None: f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)" ) finally: - bin_path.unlink(missing_ok=True) + if scenario_render_failed: + # Keep the capture so the failed view can be re-rendered + # offline (with a higher --temporary-allocation-threshold) + # instead of re-profiling the whole scenario. + kept = REPORTS_DIR / f"{name}{env_tag}.bin" + shutil.move(str(bin_path), str(kept)) + for fm in failed_modes: + fm["bin"] = str(kept) + render_failures.extend(failed_modes) + else: + bin_path.unlink(missing_ok=True) if args.update_baseline or not baseline: # When running a single scenario, merge its result into the existing @@ -250,6 +274,14 @@ def main() -> None: verb = "Updated" if baseline else "Created" print(f"\n{verb} baseline: {BASELINE_FILE}") + if render_failures: + print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr) + for r in render_failures: + print(f" {r['name']} [{r['mode']}] -> {r['html']} (capture kept: {r['bin']})", file=sys.stderr) + print(" Recover without re-profiling, e.g.:", file=sys.stderr) + print(" python3 -m memray flamegraph -o " + "--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr) + if failures: print("\nREGRESSIONS DETECTED:", file=sys.stderr) for f in failures: