diff --git a/.gitignore b/.gitignore
index c612bf1a..147e8357 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,3 +122,7 @@ target/
 src/c2pa/libs/
 !tests/fixtures/*.pem
 !tests/fixtures/*.key
+
+# Memory profiling reports
+tests/perf/reports/*.html
+tests/perf/reports/*.bin
diff --git a/Makefile b/Makefile
index ba70dfb3..4df3a42e 100644
--- a/Makefile
+++ b/Makefile
@@ -110,3 +110,23 @@ download-native-artifacts:
 # Build API documentation with Sphinx
 docs:
 	python3 scripts/generate_api_docs.py
+
+# Memory profiling with memray (runs in Docker, reports go to tests/perf/reports/)
+# More details for usage are in tests/perf/README.md
+PERF_ENV ?= python-3.12-slim
+MEMRAY_ITERATIONS ?= 100
+MEMRAY_THRESHOLD ?= 1.1
+SCENARIO ?=
+SCENARIO_ARG := $(if $(SCENARIO),--scenario $(SCENARIO),)
+.PHONY: memory-use-bench
+memory-use-bench:
+	docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) .
+	docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS)
+	@echo ""
+	@echo "Reports written to tests/perf/reports/"
+	@echo "Open tests/perf/reports/<scenario>-{peak,leaks,temporary}.html in a browser"
+
+.PHONY: clean-memory-perf-reports
+clean-memory-perf-reports:
+	rm -f tests/perf/reports/*.html tests/perf/reports/*.bin
+	@echo "Cleared tests/perf/reports/"
diff --git a/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile
new file mode 100644
index 00000000..0db28b11
--- /dev/null
+++ b/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile
@@ -0,0 +1,22 @@
+FROM python:3.10.20-slim-bookworm
+
+WORKDIR /workspace
+
+# libunwind for memray native stack unwinding
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libunwind-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Pre-install Python deps using only the requirements files (layer-cached).
+# The full project arrives via the -v mount at runtime.
+COPY requirements.txt requirements-dev.txt ./
+RUN pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
+
+RUN pip install --no-cache-dir memray==1.19.3
+
+COPY tests/perf/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["python", "-m", "tests.perf.run_profile"]
diff --git a/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile
new file mode 100644
index 00000000..1e387d1c
--- /dev/null
+++ b/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile
@@ -0,0 +1,22 @@
+FROM python:3.12.13-slim-bookworm
+
+WORKDIR /workspace
+
+# libunwind for memray native stack unwinding
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libunwind-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Pre-install Python deps using only the requirements files (layer-cached).
+# The full project arrives via the -v mount at runtime.
+COPY requirements.txt requirements-dev.txt ./
+RUN pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
+
+RUN pip install --no-cache-dir memray==1.19.3
+
+COPY tests/perf/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["python", "-m", "tests.perf.run_profile"]
diff --git a/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile
new file mode 100644
index 00000000..649422ac
--- /dev/null
+++ b/tests/perf/Dockerfiles/ubuntu-22.04-perf-Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /workspace
+
+# Ubuntu 22.04 ships Python 3.10 as python3 by default.
+# libunwind for memray native stack unwinding.
+# python3-dbg supplies the interpreter's debug symbols so memray can resolve
+# file names + line numbers for native (C) frames in the flamegraphs.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-venv \
+    python3-dbg \
+    libunwind-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -s /usr/bin/python3 /usr/bin/python
+
+# Pre-install runtime deps only. Project arrives via -v mount.
+COPY requirements.txt ./
+RUN pip3 install --no-cache-dir -r requirements.txt
+
+RUN pip3 install --no-cache-dir memray==1.19.3 requests==2.34.2
+
+COPY tests/perf/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["python", "-m", "tests.perf.run_profile"]
diff --git a/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile
new file mode 100644
index 00000000..0fd3a523
--- /dev/null
+++ b/tests/perf/Dockerfiles/ubuntu-24.04-perf-Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /workspace
+
+# Ubuntu 24.04 ships Python 3.12 as python3 by default.
+# libunwind used for memray native stack unwinding.
+# python3-dbg supplies the interpreter's debug symbols so memray can resolve
+# file names + line numbers for native (C) frames in the flamegraphs.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-venv \
+    python3-dbg \
+    libunwind-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -s /usr/bin/python3 /usr/bin/python
+
+# Pre-install runtime deps only. Project arrives via -v mount.
+COPY requirements.txt ./
+RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt
+
+RUN pip3 install --no-cache-dir --break-system-packages memray==1.19.3 requests==2.34.2
+
+COPY tests/perf/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["python", "-m", "tests.perf.run_profile"]
diff --git a/tests/perf/README.md b/tests/perf/README.md
new file mode 100644
index 00000000..1f2ec022
--- /dev/null
+++ b/tests/perf/README.md
@@ -0,0 +1,253 @@
+# Memory profiling framework
+
+Uses [memray](https://github.com/bloomberg/memray) to track peak memory, allocation patterns,
+and memory leaks across c2pa-python SDK operations.
+
+## Files
+
+| File | Purpose |
+| --- | --- |
+| `scenarios.py` | Functions that exercise each profiling scenario. Imported by `run_profile.py`. |
+| `run_profile.py` | Memory performance/usage analysis. Runs each scenario under `memray`, generates HTML reports, reads metrics, and compares against `baseline.json`. |
+| `Dockerfiles/` | One Dockerfile per target environment. Selected via `PERF_ENV` at `make` time when running the memory analysis. |
+| `entrypoint.sh` | Container entrypoint. Downloads the Linux native `libc2pa_c.so` at startup into the volume-mounted workspace so it sticks around even through the `-v` mount. |
+| `reports/` | Generated HTML reports (gitignored). Three files per scenario: `<scenario>-peak.html` (peak/high-water view), `<scenario>-leaks.html` (leak view), and `<scenario>-temporary.html` (temporary-allocations view). |
+
+## Scenarios
+
+Each scenario loops multiple times so leaks accumulate and become visible in the leaks flamegraph and the memory use graph (defaults to 100). Change the count of iterations when running by setting the `MEMRAY_ITERATIONS` variable (the Makefile forwards it into the container):
+
+```bash
+make memory-use-bench MEMRAY_ITERATIONS=1000
+```
+
+Most scenarios use the Context API: they build a `Context` once and reuse it across iterations, so its settings are parsed a single time. The jpeg and png cases also keep a `_legacy` variant that builds the `Reader`/`Builder` without a `Context`, which re-reads the thread-local settings on each construction. Running a pair (for example `builder_sign_jpeg_legacy` and `builder_sign_jpeg_with_context`) compares the two paths.
+
+The `builder_sign_{jpeg,png}_parallel_*` scenarios build one `Context` and share it across 10 threads that sign concurrently, each with its own streams and `Builder`. The name encodes two axes. `split` divides the iteration budget across the threads, so total work matches a single-threaded scenario; `full` runs the full loop on each of the 10 threads, so total work is 10x (use these with `SCENARIO=` rather than the whole suite). `pool` runs the threads through a `ThreadPoolExecutor`; `barrier` starts all 10 at once with a `threading.Barrier`.
+
+## Environments
+
+Select the target environment with `PERF_ENV` (default: `python-3.12-slim`):
+
+| `PERF_ENV` value | Base image | Python | Native symbols |
+| --- | --- | --- | --- |
+| `python-3.12-slim` | `python:3.12-slim` | 3.12 | interpreter frames unresolved |
+| `python-3.10-slim` | `python:3.10-slim` | 3.10 | interpreter frames unresolved |
+| `ubuntu-22.04` | `ubuntu:22.04` | 3.10 (apt default) | resolved (`python3-dbg`) |
+| `ubuntu-24.04` | `ubuntu:24.04` | 3.12 (apt default) | resolved (`python3-dbg`) |
+
+The slim images run a source-built `/usr/local/bin/python` that ships stripped, and Debian's `python3-dbg` targets a different binary (build-id mismatch), so memray cannot resolve the interpreter's native (C) frames there. You will see a "No debug information was found for the Python interpreter" warning, and native traces may lack file names and line numbers. The ubuntu images install `python3-dbg` for the matching apt interpreter, so their native flamegraphs are fully symbolized. Use an `ubuntu-*` `PERF_ENV` when you need resolved native traces.
+
+## Running (via Docker)
+
+```bash
+# First run (if there is no baseline.json): establishes baseline.json
+make memory-use-bench
+
+# Subsequent runs: compares against baseline, fails if >10% regression
+make memory-use-bench
+
+# Refresh baseline after an intentional memory change
+make memory-use-bench PERF_ARGS=--update-baseline
+
+# Run against a different runner environment
+make memory-use-bench PERF_ENV=ubuntu-24.04
+
+# Run a single scenario instead of the whole suite
+make memory-use-bench SCENARIO=builder_sign_gif
+
+# Refresh just one scenario's baseline entry (others are preserved)
+make memory-use-bench SCENARIO=builder_sign_gif PERF_ARGS=--update-baseline
+
+# Remove all generated HTML reports
+make clean-memory-perf-reports
+```
+
+The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--update-baseline`) are `make` variable overrides, not shell env vars. `make` parses `word=value` argument as a variable assignment. Each overrides a `?=` default in the Makefile, and the recipe interpolates them into the `docker build`/`docker run` commands. See [Configuration](#configuration) for the full list and what each forwards to.
+
+Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario, one per suffix (described below). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance).
+
+## Report views
+
+Each scenario produces three [memray flamegraphs](https://bloomberg.github.io/memray/flamegraph.html). All three are flamegraphs of the same run. They differ only in which allocations they count.
+
+### `<scenario>-peak.html`: peak/high-water view
+
+What it shows: allocations that were simultaneously alive at the moment the process used the most memory (the high-water mark).
+
+Why it's useful: tells you what drives the largest memory footprint, the working set you must hold at once. Consult this view when you care about peak RSS or OOM headroom.
+
+How to read it: the widest frames are the biggest contributors to peak. Walk up a wide column to the top frame to find the call site holding that memory at the high-water instant.
+
+### `<scenario>-leaks.html`: leak view
+
+What it shows: memory that was allocated but never freed before tracking stopped (`memray --leaks`).
+
+Why it's useful: finds memory leaks, meaning memory that grows with work done. It is never zero, because one-time static setup (the native `libc2pa_c` library loading global structures that live for the whole process) shows as "never freed." A real leak is one that scales with iterations. Profile at `MEMRAY_ITERATIONS=100` and `=1000` and compare: flat means static overhead, growing means a leak. See [Why is leaked_bytes not zero?](#why-is-leaked_bytes-not-zero).
+
+How to read it: a wide frame here is unfreed memory. If its width grows when you raise the iteration count, that top frame is the leaking call site.
+
+### `<scenario>-temporary.html`: temporary-allocations view
+
+What it shows: short-lived churn, meaning memory allocated and then freed almost immediately (memray's threshold: freed before more than one other allocation happens).
+
+Why it's useful: temporary allocations are not leaks, since the memory is returned, but high allocation and free turnover costs CPU and can fragment the heap. This view surfaces hot per-call churn that the peak and leak views hide, because those objects are freed between iterations and so barely register at the high-water mark. Use it when a loop allocates too much.
+
+How to read it: wide frames are the biggest sources of throwaway allocations. The view may be sparse or empty for a scenario that does little churn, which is itself a valid result. See [Temporary allocations](#temporary-allocations).
+
+The temporary view is the heaviest to render: memray holds every allocation and free to decide which are short-lived. On a very large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario) the render can run out of memory and fail. The run does not abort in that case; it records what failed and keeps going. See [Troubleshooting](#troubleshooting).
+
+## Running without Docker (if memray is supported and installed locally)
+
+```bash
+pip install memray
+python -m tests.perf.run_profile
+```
+
+Run a single scenario (useful for generating data for one operation without the full suite):
+
+```bash
+python -m tests.perf.run_profile --scenario builder_sign_gif
+```
+
+With `--update-baseline`, a single-scenario run only rewrites that scenario's entry in `baseline.json`; the other scenarios' entries are preserved.
+
+```bash
+python -m tests.perf.run_profile --scenario builder_sign_gif --update-baseline
+```
+
+## Configuration
+
+With `make memory-use-bench VAR=value` you set the **`make` variable** and the Makefile forwards it as shown in the "Forwarded as" column. Running `run_profile.py` without Docker, you set the **env var** (or pass the CLI arg) directly.
+
+| `make` variable | Forwarded as | Default | Description |
+| --- | --- | --- | --- |
+| `PERF_ENV` | `PERF_ENV` env var | `python-3.12-slim` | Target environment; selects the Dockerfile, tags report filenames (`<scenario>-<PERF_ENV>-<view>.html`), recorded in `baseline.json` `_meta`. See [Environments](#environments). |
+| `MEMRAY_ITERATIONS` | `MEMRAY_ITERATIONS` env var | `100` | Loop count per scenario. |
+| `MEMRAY_THRESHOLD` | `MEMRAY_THRESHOLD` env var | `1.1` | Regression multiplier (1.1 = 10% tolerance). |
+| `SCENARIO` | `--scenario` CLI arg | _(all)_ | Run a single scenario (e.g. `SCENARIO=builder_sign_jpeg`). |
+| `PERF_ARGS` | passed straight through | _(none)_ | Extra `run_profile.py` args (e.g. `PERF_ARGS=--update-baseline`). |
+
+`PERF_SCENARIO` is an additional env var, but internal: the runner sets it per scenario so the loop can label its progress. Not user-configurable.
+
+Example to override iteration count:
+
+```bash
+make memory-use-bench MEMRAY_ITERATIONS=1000
+```
+
+## Reading baseline.json
+
+`baseline.json` is committed to the repo and reports following data for each scenario:
+
+```json
+{
+  "_meta": {
+    "memray_version": "1.19.3",
+    "python_version": "3.12.13",
+    "c2pa_native_version": "c2pa-v0.85.0",
+    "iterations": 100,
+    "perf_env": "python-3.12-slim",
+    "arch": "x86_64"
+  },
+  "scenario_name": {
+    "peak_bytes": 62914560,
+    "leaked_bytes": 3271766,
+    "total_allocations": 12840
+  },
+  ...
+}
+```
+
+The `_meta` block records which toolchain produced the baseline so the numbers are reproducible. It is provenance only and is never compared against. The regression check only looks at the per-scenario entries.
+
+| `_meta` field | Meaning |
+| --- | --- |
+| `memray_version` | memray version that generated the metrics |
+| `python_version` | Python version that ran the test framework |
+| `c2pa_native_version` | native `libc2pa_c` version (from `c2pa-native-version.txt`) |
+| `iterations` | `MEMRAY_ITERATIONS` used for the run |
+| `perf_env` | `PERF_ENV` (target environment) |
+| `arch` | machine architecture (`platform.machine()`) |
+
+`peak_bytes`, `total_allocations` and the `arch`/`python`/`memray` versions are all environment-sensitive: a baseline is most meaningful when compared against a run from the same `_meta`.
+
+`peak_bytes` is the highest amount of memory in use at any single point during the scenario.
+
+`leaked_bytes` is memory that was allocated during the run but never freed before the process exited. Static allocations persist, since there are one-time loads such as the native library.
+
+`total_allocations` is the total number of individual memory allocation calls made.
+
+### Why is leaked_bytes not zero?
+
+You might expect the baseline to show `leaked_bytes: 0`. In practice it never does. When the c2pa native library (`libc2pa_c.so`) is first loaded, Rust sets up global data structures designed to live for the entire lifetime of the process. They get cleaned up when the process exits, which is after memray stops watching, so memray sees them as "never freed" even though they are not leaking.
+
+A memory leak grows proportionally with work done. If you sign 50 images and get 3.2 MB leaked, then sign 1000 images and still get 3.2 MB leaked, that 3.2 MB is static one-time overhead rather than a leak, since it does not grow with the work that ran. If signing 1000 images gave you 64 MB leaked, that would be a leak, as the leaked memory grows with the work executed.
+
+The baseline captures this expected static overhead. Future runs compare against it: if `leaked_bytes` grows beyond the baseline by more than 10%, the run fails.
+
+### How to confirm no leak exists?
+
+Run with a higher iteration count than default (100) and compare:
+
+```bash
+make memory-use-bench MEMRAY_ITERATIONS=1000 PERF_ARGS=--update-baseline
+```
+
+If `leaked_bytes` stays flat compared to a baseline run or in a larger run (more iterations), there is no leak. If it scales with iterations, open `tests/perf/reports/<scenario>-leaks.html` in a browser to see which function is responsible.
+
+### Reading the "Resident set size over time" graph (why memory looks like it climbs)
+
+The "Resident set size over time" plot (chart icon, top-right of the report) draws two lines. "Resident size" (RSS) is every page the OS counts as resident: interpreter and pages the allocator holds but has not returned. "Heap size" is only the live tracked allocations.
+
+On the parallel scenarios the RSS line steps up and stays high. The threads each hold their own source, output, and `Builder` live at once, so RSS rises to cover that combined working set (the steps line up with the moments all threads overlap). The allocator then keeps those arena pages for reuse instead of returning them, so RSS plateaus at the high-water mark.
+
+Judge leaks by the heap line. The heap rises early and then settles or falls, the same shape as the single-threaded baseline. A within-run heap rise is not by itself proof of a leak (the allocator high-water can climb and settle within a bounded run).
+
+### Temporary allocations
+
+`<scenario>-temporary.html` shows temporary allocations, meaning memory that is allocated and then freed almost immediately (memray's threshold is one allocation: a block is temporary if it is freed before more than one other allocation happens). The memory is returned, so these are not leaks, but they are churn: high allocation and free turnover that costs CPU and can fragment the heap. A scenario doing lots of short-lived work can show heavy temporary allocations while `leaked_bytes` stays flat.
+
+### When to update the baseline
+
+Update `baseline.json` after any intentional change that affects memory use:
+
+```bash
+make memory-use-bench PERF_ARGS=--update-baseline
+```
+
+Commit the updated `baseline.json` alongside the code change, so it becomes the new reference to compare against.
+
+## Troubleshooting
+
+### A flamegraph render fails with `exit -9`
+
+You may see a message like `flamegraph render failed for reader_mp4-...-temporary.html (killed (likely OOM))`. The `-9` is SIGKILL: the operating system's out-of-memory killer terminated the `memray flamegraph` subprocess. The temporary view is the heaviest to render, and on a large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario such as `reader_mp4`) it can exhaust available memory.
+
+The run does not abort. The capture and the metrics (`peak_bytes`, `leaked_bytes`, `total_allocations`) are read separately and are still recorded, the baseline is still written, and the run lists every failed render at the end. Only the HTML render is missing, and you have two ways to regenerate it.
+
+#### Option A: rerun the one scenario
+
+A single-scenario run renders one capture at a time with nothing else resident, so it often fits where the full suite did not:
+
+```bash
+make memory-use-bench SCENARIO=reader_mp4
+```
+
+If it still runs out of memory, lower the iteration count to shrink the capture:
+
+```bash
+make memory-use-bench SCENARIO=reader_mp4 MEMRAY_ITERATIONS=20
+```
+
+A lower iteration count makes that scenario's absolute allocation numbers no longer directly comparable to a full 100-iteration run.
+
+#### Option B: re-render the kept capture (no re-profiling)
+
+When a render fails, the run keeps that scenario's capture as `reports/<scenario>-<env>.bin`. Re-render just the failed view from that file with a higher temporary-allocation threshold, which cuts how much memray holds in memory so the render fits. This uses the original run's data, so the result stays comparable to the rest of the run:
+
+```bash
+python3 -m memray flamegraph reports/reader_mp4-python-3.12-slim.bin \
+  -o reports/reader_mp4-python-3.12-slim-temporary.html \
+  --temporary-allocations --temporary-allocation-threshold=10 --force
+```
diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py
new file mode 100644
index 00000000..a56982a7
--- /dev/null
+++ b/tests/perf/__init__.py
@@ -0,0 +1 @@
+# Empty placeholder file to facilitate imports
\ No newline at end of file
diff --git a/tests/perf/baseline.json b/tests/perf/baseline.json
new file mode 100644
index 00000000..302d648a
--- /dev/null
+++ b/tests/perf/baseline.json
@@ -0,0 +1,140 @@
+{
+  "_meta": {
+    "memray_version": "1.19.3",
+    "python_version": "3.12.13",
+    "c2pa_native_version": "c2pa-v0.85.1",
+    "iterations": 100,
+    "perf_env": "python-3.12-slim",
+    "arch": "aarch64"
+  },
+  "reader_jpeg_legacy": {
+    "peak_bytes": 3814421,
+    "leaked_bytes": 3266116,
+    "total_allocations": 698899
+  },
+  "reader_jpeg_with_context": {
+    "peak_bytes": 3822953,
+    "leaked_bytes": 3257471,
+    "total_allocations": 692953
+  },
+  "reader_mp4": {
+    "peak_bytes": 4876441,
+    "leaked_bytes": 3257485,
+    "total_allocations": 2112991
+  },
+  "reader_wav": {
+    "peak_bytes": 5520266,
+    "leaked_bytes": 3267427,
+    "total_allocations": 400371
+  },
+  "builder_sign_jpeg_legacy": {
+    "peak_bytes": 7695310,
+    "leaked_bytes": 3383623,
+    "total_allocations": 522425
+  },
+  "builder_sign_jpeg_with_context": {
+    "peak_bytes": 7688236,
+    "leaked_bytes": 3376293,
+    "total_allocations": 516851
+  },
+  "builder_sign_png_legacy": {
+    "peak_bytes": 7932767,
+    "leaked_bytes": 3383648,
+    "total_allocations": 1694629
+  },
+  "builder_sign_png_with_context": {
+    "peak_bytes": 7925490,
+    "leaked_bytes": 3376452,
+    "total_allocations": 1688908
+  },
+  "builder_sign_jpeg_parallel_split_pool": {
+    "peak_bytes": 45764159,
+    "leaked_bytes": 3818113,
+    "total_allocations": 528785
+  },
+  "builder_sign_jpeg_parallel_split_barrier": {
+    "peak_bytes": 46225287,
+    "leaked_bytes": 3809216,
+    "total_allocations": 527412
+  },
+  "builder_sign_png_parallel_split_pool": {
+    "peak_bytes": 46002549,
+    "leaked_bytes": 3817801,
+    "total_allocations": 1700731
+  },
+  "builder_sign_png_parallel_split_barrier": {
+    "peak_bytes": 45970433,
+    "leaked_bytes": 3812044,
+    "total_allocations": 1699396
+  },
+  "builder_sign_gif": {
+    "peak_bytes": 14544515,
+    "leaked_bytes": 3375865,
+    "total_allocations": 7183237
+  },
+  "builder_sign_heic": {
+    "peak_bytes": 4608484,
+    "leaked_bytes": 3376030,
+    "total_allocations": 771079
+  },
+  "builder_sign_m4a": {
+    "peak_bytes": 18849082,
+    "leaked_bytes": 3376431,
+    "total_allocations": 2273497
+  },
+  "builder_sign_webp": {
+    "peak_bytes": 8900701,
+    "leaked_bytes": 3376432,
+    "total_allocations": 487683
+  },
+  "builder_sign_avi": {
+    "peak_bytes": 7040387,
+    "leaked_bytes": 3376267,
+    "total_allocations": 40315553
+  },
+  "builder_sign_mp4": {
+    "peak_bytes": 6162851,
+    "leaked_bytes": 3376431,
+    "total_allocations": 1809672
+  },
+  "builder_sign_tiff": {
+    "peak_bytes": 13124728,
+    "leaked_bytes": 3376268,
+    "total_allocations": 5139967
+  },
+  "builder_sign_jpeg_parent_of": {
+    "peak_bytes": 14173992,
+    "leaked_bytes": 3377656,
+    "total_allocations": 1209933
+  },
+  "builder_sign_jpeg_component_of": {
+    "peak_bytes": 14175518,
+    "leaked_bytes": 3377891,
+    "total_allocations": 1232336
+  },
+  "builder_sign_jpeg_parent_and_component": {
+    "peak_bytes": 14530406,
+    "leaked_bytes": 3474418,
+    "total_allocations": 2160934
+  },
+  "builder_sign_jpeg_parent_and_component_mixed_mime": {
+    "peak_bytes": 14476171,
+    "leaked_bytes": 3378735,
+    "total_allocations": 2451587
+  },
+  "builder_sign_jpeg_two_components_same_mime": {
+    "peak_bytes": 14519270,
+    "leaked_bytes": 3473673,
+    "total_allocations": 2150782
+  },
+  "builder_sign_jpeg_two_components_mixed_mime": {
+    "peak_bytes": 14473127,
+    "leaked_bytes": 3377445,
+    "total_allocations": 2441195
+  },
+  "builder_sign_jpeg_archive_roundtrip": {
+    "peak_bytes": 14226832,
+    "leaked_bytes": 3426491,
+    "total_allocations": 1680290
+  }
+}
\ No newline at end of file
diff --git a/tests/perf/entrypoint.sh b/tests/perf/entrypoint.sh
new file mode 100644
index 00000000..f0f1f917
--- /dev/null
+++ b/tests/perf/entrypoint.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -e
+
+cd /workspace
+export PYTHONPATH=/workspace/src
+
+# Download the Linux native library into the volume-mounted workspace.
+# Runs at container start so libs land in the host-mounted tree,
+# not in a build layer that gets shadowed by the -v mount.
+C2PA_VERSION=$(cat c2pa-native-version.txt)
+ARCH=$(uname -m)
+
+if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
+    PLATFORM="aarch64-unknown-linux-gnu"
+else
+    PLATFORM="x86_64-unknown-linux-gnu"
+fi
+
+echo "Downloading c2pa native lib: $C2PA_VERSION / $PLATFORM"
+C2PA_LIBS_PLATFORM=$PLATFORM python scripts/download_artifacts.py "$C2PA_VERSION"
+
+# Replicate what setup.py copy_platform_libraries() does:
+# So the correct Linux library is here for the Dockerfile
+python - <<EOF
+import shutil
+from pathlib import Path
+src = Path("artifacts/$PLATFORM")
+dst = Path("src/c2pa/libs")
+dst.mkdir(parents=True, exist_ok=True)
+for f in src.glob("*"):
+    if f.is_file():
+        shutil.copy2(f, dst / f.name)
+        print(f"  copied {f.name}")
+EOF
+
+echo "src/c2pa/libs contents: $(ls src/c2pa/libs/)"
+
+exec "$@"
diff --git a/tests/perf/reports/.gitkeep b/tests/perf/reports/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py
new file mode 100644
index 00000000..31593967
--- /dev/null
+++ b/tests/perf/run_profile.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+# Copyright 2026 Adobe. All rights reserved.
+# This file is licensed to you under the Apache License,
+# Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+# or the MIT license (http://opensource.org/licenses/MIT),
+# at your option.
+
+"""
+Memory profiling harness using memray.
+
+For each scenario in scenarios.SCENARIOS this script:
+- Runs the scenario under `memray run --native` -> <name>.bin
+- Generates three flamegraph views: <name>-peak.html (high-water),
+  <name>-leaks.html (--leaks), <name>-temporary.html (--temporary-allocations)
+- Reads peak_bytes and leaked_bytes from the .bin via memray.FileReader
+- Compares against baseline.json (creates it on first run)
+- Exits non-zero if any metric exceeds baseline * threshold
+
+Usage:
+    python -m tests.perf.run_profile [--update-baseline]
+
+Environment variables:
+- MEMRAY_ITERATIONS: number of times each scenario loops (default: 100)
+- MEMRAY_THRESHOLD: regression multiplier, e.g. 1.1 for 10% (default: 1.1)
+"""
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import platform
+
+import memray
+
+# Scenario name list
+from tests.perf.scenarios import SCENARIO_NAMES
+
+HERE = Path(__file__).parent
+REPORTS_DIR = HERE / "reports"
+BASELINE_FILE = HERE / "baseline.json"
+
+ITERATIONS = int(os.environ.get("MEMRAY_ITERATIONS", "100"))
+THRESHOLD = float(os.environ.get("MEMRAY_THRESHOLD", "1.1"))
+PERF_ENV = os.environ.get("PERF_ENV", "")
+
+
+def _run_scenario_under_memray(name: str, bin_path: Path) -> None:
+    """Spawn a subprocess that runs one scenario under memray --native."""
+    repo_root = HERE.parent.parent
+    script = f"""
+import sys
+sys.path.insert(0, "{repo_root}")
+sys.path.insert(0, "{repo_root / 'src'}")
+from tests.perf.scenarios import SCENARIOS
+SCENARIOS["{name}"]({ITERATIONS})
+"""
+    cmd = [
+        sys.executable, "-m", "memray", "run",
+        "--native",
+        "--trace-python-allocators",
+        "--force",
+        "-o", str(bin_path),
+        "-c", script,
+    ]
+    # Pass the scenario name so the loop can label its progress
+    env = {**os.environ, "PERF_SCENARIO": name}
+    result = subprocess.run(cmd, text=True, env=env)
+    if result.returncode != 0:
+        print(f"  memray run failed for {name} (exit {result.returncode})", file=sys.stderr)
+        sys.exit(1)
+
+
+def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> bool:
+    """Render one flamegraph view of a capture file.
+
+    mode:
+    - 'peak':      high-water-mark view (the default flamegraph render).
+    - 'leaks':     memory still live when tracking stopped (--leaks).
+    - 'temporary': allocations freed before more than one other allocation
+                   occurs, i.e. short-lived churn (--temporary-allocations).
+    These are mutually exclusive views, so each is a separate render.
+    """
+    cmd = [sys.executable, "-m", "memray", "flamegraph", str(bin_path), "-o", str(out_path), "--force"]
+    if mode == "leaks":
+        cmd.append("--leaks")
+    elif mode == "temporary":
+        # --temporary-allocations == --temporary-allocation-threshold=1
+        cmd.append("--temporary-allocations")
+    # Stream memray's output instead of capturing it, so run does not look stuck
+    print(f"    flamegraph ({mode})...", flush=True)
+    result = subprocess.run(cmd, text=True)
+    if result.returncode != 0:
+        # -9 is SIGKILL, almost always the OOM killer reaping the heavy
+        # temporary render on a large capture. Do not abort the whole run:
+        # the capture and metrics are recorded separately and still good.
+        reason = "killed (likely OOM)" if result.returncode == -9 else f"exit {result.returncode}"
+        print(f"  flamegraph {mode} render failed for {out_path.name} ({reason})", file=sys.stderr)
+        return False
+    return True
+
+
+# get_allocation_records() yields deallocation records too...
+# They carry size 0, so they don't affect byte sums, but they
+# inflate record count, so we filter them out when counting alloc calls.
+_DEALLOCATORS = {
+    memray.AllocatorType.FREE,
+    memray.AllocatorType.MUNMAP,
+    memray.AllocatorType.PYMALLOC_FREE,
+}
+
+
+def _read_metrics(bin_path: Path) -> dict:
+    """Extract peak_bytes, leaked_bytes and total_allocations from a memray .bin file."""
+    with memray.FileReader(str(bin_path)) as reader:
+        # peak_bytes: the high-water mark of live memory, i.e. the most memory
+        # in use at any single instant.
+        peak_bytes = reader.metadata.peak_memory
+
+        # total_allocations: number of allocation calls.
+        # We exclude deallocator records to count just allocations.
+        total_allocations = sum(
+            1
+            for record in reader.get_allocation_records()
+            if record.allocator not in _DEALLOCATORS
+        )
+
+        # leaked_bytes: memory still reachable when tracking ended (never freed).
+        leaked_bytes = sum(
+            record.size
+            for record in reader.get_leaked_allocation_records(merge_threads=True)
+        )
+
+    return {
+        "peak_bytes": peak_bytes,
+        "leaked_bytes": leaked_bytes,
+        "total_allocations": total_allocations,
+    }
+
+
+def _build_meta() -> dict:
+    """Provenance for the baseline: which toolchain produced these numbers.
+    Recorded so a committed baseline is reproducible under same conditions.
+    """
+    native_version = ""
+    try:
+        native_version = (HERE.parent.parent / "c2pa-native-version.txt").read_text().strip()
+    except OSError:
+        pass
+    return {
+        "memray_version": getattr(memray, "__version__", ""),
+        "python_version": platform.python_version(),
+        "c2pa_native_version": native_version,
+        "iterations": ITERATIONS,
+        "perf_env": PERF_ENV,
+        "arch": platform.machine(),
+    }
+
+
+def _fmt(n: int) -> str:
+    if n >= 1024 ** 2:
+        return f"{n / 1024**2:.1f} MiB"
+    if n >= 1024:
+        return f"{n / 1024:.1f} KiB"
+    return f"{n} B"
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="c2pa-python memory profiler")
+    parser.add_argument(
+        "--update-baseline",
+        action="store_true",
+        help="Overwrite baseline.json with current measurements and exit 0",
+    )
+    parser.add_argument(
+        "--scenario",
+        choices=SCENARIO_NAMES,
+        default=None,
+        help="Run a single scenario instead of all of them. With --update-baseline, "
+             "only that scenario's entry in baseline.json is updated; the rest are kept.",
+    )
+    args = parser.parse_args()
+
+    scenarios_to_run = (args.scenario,) if args.scenario else SCENARIO_NAMES
+
+    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
+
+    baseline: dict = {}
+    if BASELINE_FILE.exists() and not args.update_baseline:
+        baseline = json.loads(BASELINE_FILE.read_text())
+
+    results: dict = {}
+    failures: list[str] = []
+    render_failures: list[dict] = []
+
+    total = len(scenarios_to_run)
+    for idx, name in enumerate(scenarios_to_run, 1):
+        print(f"\n=== [{idx}/{total}] {name} (iterations={ITERATIONS}) ===")
+
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
+            bin_path = Path(tmp.name)
+
+        env_tag = f"-{PERF_ENV}" if PERF_ENV else ""
+        scenario_render_failed = False
+        failed_modes: list[dict] = []
+        try:
+            print(f"  profiling...")
+            _run_scenario_under_memray(name, bin_path)
+
+            peak_html = REPORTS_DIR / f"{name}{env_tag}-peak.html"
+            leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html"
+            temporary_html = REPORTS_DIR / f"{name}{env_tag}-temporary.html"
+            print(f"  generating flamegraphs (peak + leaks + temporary)...")
+            scenario_render_failed = False
+            failed_modes: list[dict] = []
+            for mode, html in (("peak", peak_html),
+                               ("leaks", leaks_html),
+                               ("temporary", temporary_html)):
+                if not _generate_flamegraph(bin_path, html, mode=mode):
+                    scenario_render_failed = True
+                    failed_modes.append({"name": name, "mode": mode, "html": html.name})
+
+            print(f"  reading metrics...", flush=True)
+            metrics = _read_metrics(bin_path)
+            results[name] = metrics
+
+            print(f"  peak:   {_fmt(metrics['peak_bytes'])}")
+            print(f"  leaked: {_fmt(metrics['leaked_bytes'])}")
+            print(f"  allocs: {metrics['total_allocations']}")
+            print(f"  peak report:      {peak_html}")
+            print(f"  leaks report:     {leaks_html}")
+            print(f"  temporary report: {temporary_html}")
+
+            if baseline and name in baseline:
+                b = baseline[name]
+                for metric in ("peak_bytes", "leaked_bytes"):
+                    current = metrics[metric]
+                    base = b.get(metric, 0)
+                    limit = base * THRESHOLD
+                    if current > limit:
+                        diff_pct = (current - base) / base * 100 if base else float("inf")
+                        failures.append(
+                            f"{name}.{metric}: {_fmt(current)} > baseline {_fmt(base)}"
+                            f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
+                        )
+        finally:
+            if scenario_render_failed:
+                # Keep the capture so the failed view can be re-rendered
+                # offline (with a higher --temporary-allocation-threshold)
+                # instead of re-profiling the whole scenario.
+                kept = REPORTS_DIR / f"{name}{env_tag}.bin"
+                shutil.move(str(bin_path), str(kept))
+                for fm in failed_modes:
+                    fm["bin"] = str(kept)
+                render_failures.extend(failed_modes)
+            else:
+                bin_path.unlink(missing_ok=True)
+
+    if args.update_baseline or not baseline:
+        # When running a single scenario, merge its result into the existing
+        # baseline so the other scenarios' entries are preserved. A full run
+        # replaces the file wholesale.
+        if args.scenario and baseline:
+            output = dict(baseline)
+        else:
+            output = {}
+        output["_meta"] = _build_meta()
+        output.update(results)
+        BASELINE_FILE.write_text(json.dumps(output, indent=2))
+        verb = "Updated" if baseline else "Created"
+        print(f"\n{verb} baseline: {BASELINE_FILE}")
+
+    if render_failures:
+        print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr)
+        for r in render_failures:
+            print(f"  {r['name']} [{r['mode']}] -> {r['html']}  (capture kept: {r['bin']})", file=sys.stderr)
+        print("  Recover without re-profiling, e.g.:", file=sys.stderr)
+        print("    python3 -m memray flamegraph <kept.bin> -o <out.html> "
+              "--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr)
+
+    if failures:
+        print("\nREGRESSIONS DETECTED:", file=sys.stderr)
+        for f in failures:
+            print(f"  {f}", file=sys.stderr)
+        sys.exit(1)
+
+    print("\nAll scenarios within baseline thresholds.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/perf/scenarios.py b/tests/perf/scenarios.py
new file mode 100644
index 00000000..0432aa20
--- /dev/null
+++ b/tests/perf/scenarios.py
@@ -0,0 +1,532 @@
+# Copyright 2026 Adobe. All rights reserved.
+# This file is licensed to you under the Apache License,
+# Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+# or the MIT license (http://opensource.org/licenses/MIT),
+# at your option.
+
+"""
+Plain functions (no pytest dependencies) that exercise the profiling scenarios.
+Each function is called N times by run_profile.py.
+"""
+
+import io
+import os
+import sys
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+from c2pa import Builder, C2paSignerInfo, Context, Reader, Signer
+
+FIXTURES_DIR = Path(__file__).parent.parent / "fixtures"
+READING_FIXTURES_DIR = FIXTURES_DIR / "files-for-reading-tests"
+SIGNING_FIXTURES_DIR = FIXTURES_DIR / "files-for-signing-tests"
+
+SIGNED_JPEG = FIXTURES_DIR / "C.jpg"
+CLOUD_JPEG = FIXTURES_DIR / "cloud.jpg"
+SOURCE_JPEG = FIXTURES_DIR / "A.jpg"
+SIGNING_PNG = SIGNING_FIXTURES_DIR / "sample1.png"
+
+_DST_COMPOSITE = "http://cv.iptc.org/newscodes/digitalsourcetype/compositeWithTrainedAlgorithmicMedia"
+
+_PARENT_ID    = "xmp:iid:aaaaaaaa-0001-0001-0001-aaaaaaaaaaaa"
+_PLACED_ID    = "xmp:iid:bbbbbbbb-0002-0002-0002-bbbbbbbbbbbb"
+_PARENT_ID2   = "xmp:iid:cccccccc-0003-0003-0003-cccccccccccc"
+_PLACED_ID2   = "xmp:iid:dddddddd-0004-0004-0004-dddddddddddd"
+_PARENT_ID3   = "xmp:iid:eeeeeeee-0005-0005-0005-eeeeeeeeeeee"
+_PLACED_ID3   = "xmp:iid:ffffffff-0006-0006-0006-ffffffffffff"
+_PLACED_ID4   = "xmp:iid:11111111-0007-0007-0007-111111111111"
+_PLACED_ID5   = "xmp:iid:22222222-0008-0008-0008-222222222222"
+
+MANIFEST_BASE = {
+    "claim_generator": "perf_test",
+    "claim_generator_info": [{"name": "perf_test", "version": "0.0.1"}],
+    "format": "image/jpeg",
+    "title": "Perf Test Image",
+    "ingredients": [],
+    "assertions": [
+        {
+            "label": "c2pa.actions",
+            "data": {
+                "actions": [
+                    {
+                        "action": "c2pa.created",
+                        "digitalSourceType": "http://cv.iptc.org/newscodes/digitalsourcetype/digitalCreation",
+                    }
+                ]
+            },
+        }
+    ],
+}
+
+
+# Scenario name for progress output, set per-run by run_profile.py via the env.
+_SCENARIO = os.environ.get("PERF_SCENARIO", "")
+
+
+def _iterate(n: int):
+    """Yield range(n), printing a progress line to stderr ~every 10%.
+
+    The memray run phase is otherwise silent for the whole scenario, which at
+    high iteration counts looks hung. The print is gated to ~10 lines total so
+    it stays readable at N=100 and N=100000 alike, and writes to stderr so it
+    never lands in the captured/parsed metrics output.
+    """
+    step = max(1, n // 10)
+    label = f"{_SCENARIO}: " if _SCENARIO else ""
+    for i in range(n):
+        if i % step == 0:
+            print(f"  {label}iter {i}/{n} ({i * 100 // n if n else 100}%)",
+                  file=sys.stderr, flush=True)
+        yield i
+    print(f"  {label}iter {n}/{n} (100%)", file=sys.stderr, flush=True)
+
+
+def _make_signer() -> Signer:
+    certs = (FIXTURES_DIR / "es256_certs.pem").read_bytes()
+    key = (FIXTURES_DIR / "es256_private.key").read_bytes()
+    info = C2paSignerInfo(
+        alg=b"es256",
+        sign_cert=certs,
+        private_key=key,
+        ta_url=b"http://timestamp.digicert.com",
+    )
+    return Signer.from_info(info)
+
+
+def _sign_file(path: Path, mime: str, iterations: int) -> None:
+    signer = _make_signer()
+    source_bytes = path.read_bytes()
+    manifest = {**MANIFEST_BASE, "format": mime}
+    for _ in _iterate(iterations):
+        source = io.BytesIO(source_bytes)
+        output = io.BytesIO()
+        builder = Builder(manifest)
+        builder.sign(signer, mime, source, output)
+
+
+def _read_file(path: Path, mime: str, iterations: int) -> None:
+    for _ in _iterate(iterations):
+        with open(path, "rb") as f:
+            reader = Reader(mime, f)
+            reader.json()
+            reader.close()
+
+
+# Context-API helpers: the Context is built once before the loop and reused on
+# every iteration, so its settings are parsed a single time. Most scenarios use
+# these. The `_legacy` jpeg/png scenarios build the Reader/Builder without a
+# Context, which re-reads thread-local settings on each construction; running a
+# legacy scenario against its `_with_context` pair isolates the settings cost.
+
+def _sign_file_context(path: Path, mime: str, iterations: int) -> None:
+    signer = _make_signer()
+    context = Context(signer=signer)  # signer is consumed into the context
+    source_bytes = path.read_bytes()
+    manifest = {**MANIFEST_BASE, "format": mime}
+    for _ in _iterate(iterations):
+        source = io.BytesIO(source_bytes)
+        output = io.BytesIO()
+        builder = Builder(manifest, context=context)
+        # str first arg selects the context signer (c2pa_builder_sign_context).
+        builder.sign(mime, source, output)
+
+
+def _read_file_context(path: Path, mime: str, iterations: int) -> None:
+    context = Context()
+    for _ in _iterate(iterations):
+        with open(path, "rb") as f:
+            reader = Reader(mime, f, manifest_data=None, context=context)
+            reader.json()
+            reader.close()
+
+
+# Parallel signing: one Context built once and shared across threads. Each
+# thread uses its own BytesIO source/dest and its own Builder per sign; the
+# Context (and its signer) is only read. This exercises Context thread-safety
+# under concurrent signing.
+
+_PARALLEL_THREADS = 10
+
+
+def _sign_parallel(path: Path, mime: str, iterations: int, *,
+                   per_thread_full: bool, launch: str) -> None:
+    """Sign from `_PARALLEL_THREADS` threads sharing one Context.
+
+    per_thread_full=False: the iteration budget is split across threads (each
+        does iterations // _PARALLEL_THREADS), so total work matches the
+        single-threaded scenarios.
+    per_thread_full=True: each thread runs the full `iterations` loop, so total
+        work is _PARALLEL_THREADS x iterations (aggregate concurrent load).
+    launch="pool": ThreadPoolExecutor(max_workers=_PARALLEL_THREADS).
+    launch="barrier": threads released together by a Barrier so all signs run
+        simultaneously (peak Context contention).
+    """
+    signer = _make_signer()
+    context = Context(signer=signer)  # built once, shared, kept open
+    source_bytes = path.read_bytes()
+    manifest = {**MANIFEST_BASE, "format": mime}
+
+    per_thread = (
+        iterations if per_thread_full
+        else max(1, iterations // _PARALLEL_THREADS)
+    )
+
+    def work(barrier=None):
+        if barrier is not None:
+            barrier.wait()  # release all threads at once
+        for _ in range(per_thread):
+            source = io.BytesIO(source_bytes)  # per-thread, never shared
+            output = io.BytesIO()
+            builder = Builder(manifest, context=context)
+            # str first arg selects the context signer.
+            builder.sign(mime, source, output)
+
+    if launch == "pool":
+        with ThreadPoolExecutor(max_workers=_PARALLEL_THREADS) as ex:
+            futures = [ex.submit(work) for _ in range(_PARALLEL_THREADS)]
+            for f in futures:
+                f.result()  # surface exceptions from worker threads
+    else:  # barrier
+        barrier = threading.Barrier(_PARALLEL_THREADS)
+        threads = [
+            threading.Thread(target=work, args=(barrier,))
+            for _ in range(_PARALLEL_THREADS)
+        ]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+
+# Reader scenarios: read manifests from files with manifests
+
+def scenario_reader_jpeg_legacy(iterations: int = 100) -> None:
+    _read_file(SIGNED_JPEG, "image/jpeg", iterations)
+
+
+def scenario_reader_mp4(iterations: int = 100) -> None:
+    _read_file_context(READING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations)
+
+
+def scenario_reader_wav(iterations: int = 100) -> None:
+    _read_file_context(READING_FIXTURES_DIR / "sample1_signed.wav", "audio/wav", iterations)
+
+
+# Builder.sign (without ingredients))
+
+def scenario_builder_sign_jpeg_legacy(iterations: int = 100) -> None:
+    _sign_file(SOURCE_JPEG, "image/jpeg", iterations)
+
+
+def scenario_builder_sign_gif(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.gif", "image/gif", iterations)
+
+
+def scenario_builder_sign_heic(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.heic", "image/heic", iterations)
+
+
+def scenario_builder_sign_m4a(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.m4a", "audio/mp4", iterations)
+
+
+def scenario_builder_sign_png_legacy(iterations: int = 100) -> None:
+    _sign_file(SIGNING_FIXTURES_DIR / "sample1.png", "image/png", iterations)
+
+
+def scenario_builder_sign_webp(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "sample1.webp", "image/webp", iterations)
+
+
+def scenario_builder_sign_avi(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "test.avi", "video/x-msvideo", iterations)
+
+
+def scenario_builder_sign_mp4(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "video1.mp4", "video/mp4", iterations)
+
+
+def scenario_builder_sign_tiff(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_FIXTURES_DIR / "TUSCANY.TIF", "image/tiff", iterations)
+
+
+# Builder.sign scenarios with ingredient linking
+
+def scenario_builder_sign_jpeg_parent_of(iterations: int = 100) -> None:
+    """One parentOf ingredient linked to c2pa.opened action."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    ingredient_bytes = SIGNED_JPEG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [{
+                "action": "c2pa.opened",
+                "softwareAgent": {"name": "perf_test"},
+                "parameters": {"ingredientIds": [_PARENT_ID]},
+                "digitalSourceType": _DST_COMPOSITE,
+            }]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(ingredient_bytes) as ing:
+            builder.add_ingredient(
+                {"relationship": "parentOf", "instance_id": _PARENT_ID},
+                "image/jpeg", ing,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_component_of(iterations: int = 100) -> None:
+    """One componentOf ingredient linked to c2pa.placed action."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    ingredient_bytes = SIGNED_JPEG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "ingredients": [{"format": "image/jpeg", "relationship": "componentOf", "instance_id": _PLACED_ID}],
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [{
+                "action": "c2pa.placed",
+                "softwareAgent": {"name": "perf_test"},
+                "parameters": {"ingredientIds": [_PLACED_ID]},
+                "digitalSourceType": _DST_COMPOSITE,
+            }]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(ingredient_bytes) as ing:
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID},
+                "image/jpeg", ing,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_parent_and_component(iterations: int = 100) -> None:
+    """parentOf + componentOf ingredients (both JPEG) linked to opened + placed actions."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    parent_bytes = SIGNED_JPEG.read_bytes()
+    placed_bytes = CLOUD_JPEG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [
+                {
+                    "action": "c2pa.opened",
+                    "softwareAgent": {"name": "perf_test"},
+                    "parameters": {"ingredientIds": [_PARENT_ID2]},
+                    "digitalSourceType": _DST_COMPOSITE,
+                },
+                {
+                    "action": "c2pa.placed",
+                    "softwareAgent": {"name": "perf_test"},
+                    "parameters": {"ingredientIds": [_PLACED_ID2]},
+                    "digitalSourceType": _DST_COMPOSITE,
+                },
+            ]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2:
+            builder.add_ingredient(
+                {"relationship": "parentOf",   "instance_id": _PARENT_ID2}, "image/jpeg", ing1,
+            )
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID2}, "image/jpeg", ing2,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_parent_and_component_mixed_mime(iterations: int = 100) -> None:
+    """parentOf JPEG + componentOf PNG linked to opened + placed actions."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    parent_bytes = SIGNED_JPEG.read_bytes()
+    placed_bytes = SIGNING_PNG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [
+                {
+                    "action": "c2pa.opened",
+                    "softwareAgent": {"name": "perf_test"},
+                    "parameters": {"ingredientIds": [_PARENT_ID3]},
+                    "digitalSourceType": _DST_COMPOSITE,
+                },
+                {
+                    "action": "c2pa.placed",
+                    "softwareAgent": {"name": "perf_test"},
+                    "parameters": {"ingredientIds": [_PLACED_ID3]},
+                    "digitalSourceType": _DST_COMPOSITE,
+                },
+            ]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(parent_bytes) as ing1, io.BytesIO(placed_bytes) as ing2:
+            builder.add_ingredient(
+                {"relationship": "parentOf",   "instance_id": _PARENT_ID3}, "image/jpeg", ing1,
+            )
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID3}, "image/png",  ing2,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_two_components_same_mime(iterations: int = 100) -> None:
+    """Two componentOf JPEG ingredients in a single c2pa.placed action."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    comp1_bytes = SIGNED_JPEG.read_bytes()
+    comp2_bytes = CLOUD_JPEG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [{
+                "action": "c2pa.placed",
+                "softwareAgent": {"name": "perf_test"},
+                "parameters": {"ingredientIds": [_PLACED_ID4, _PLACED_ID5]},
+                "digitalSourceType": _DST_COMPOSITE,
+            }]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2:
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1,
+            )
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/jpeg", ing2,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_two_components_mixed_mime(iterations: int = 100) -> None:
+    """componentOf JPEG + componentOf PNG in a single c2pa.placed action."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    comp1_bytes = SIGNED_JPEG.read_bytes()
+    comp2_bytes = SIGNING_PNG.read_bytes()
+    manifest = {
+        **MANIFEST_BASE,
+        "assertions": [{
+            "label": "c2pa.actions.v2",
+            "data": {"actions": [{
+                "action": "c2pa.placed",
+                "softwareAgent": {"name": "perf_test"},
+                "parameters": {"ingredientIds": [_PLACED_ID4, _PLACED_ID5]},
+                "digitalSourceType": _DST_COMPOSITE,
+            }]},
+        }],
+    }
+    for _ in _iterate(iterations):
+        builder = Builder(manifest, context=context)
+        with io.BytesIO(comp1_bytes) as ing1, io.BytesIO(comp2_bytes) as ing2:
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID4}, "image/jpeg", ing1,
+            )
+            builder.add_ingredient(
+                {"relationship": "componentOf", "instance_id": _PLACED_ID5}, "image/png",  ing2,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+def scenario_builder_sign_jpeg_archive_roundtrip(iterations: int = 100) -> None:
+    """Serialize builder to archive, reload, add ingredient, sign."""
+    context = Context(signer=_make_signer())
+    source_bytes = SOURCE_JPEG.read_bytes()
+    ingredient_bytes = SIGNED_JPEG.read_bytes()
+    for _ in _iterate(iterations):
+        archive = io.BytesIO()
+        Builder(MANIFEST_BASE).to_archive(archive)
+        archive.seek(0)
+        # from_archive() yields a context-less Builder; to keep the Context
+        # (and its signer), build with the context first, then load the archive.
+        builder = Builder(MANIFEST_BASE, context=context).with_archive(archive)
+        with io.BytesIO(ingredient_bytes) as ing:
+            builder.add_ingredient(
+                {"relationship": "parentOf", "instance_id": _PARENT_ID},
+                "image/jpeg", ing,
+            )
+        builder.sign("image/jpeg", io.BytesIO(source_bytes), io.BytesIO())
+
+
+# jpeg + png context variants, paired with the `_legacy` scenarios above for
+# side-by-side comparison.
+
+def scenario_builder_sign_jpeg_with_context(iterations: int = 100) -> None:
+    _sign_file_context(SOURCE_JPEG, "image/jpeg", iterations)
+
+
+def scenario_builder_sign_png_with_context(iterations: int = 100) -> None:
+    _sign_file_context(SIGNING_PNG, "image/png", iterations)
+
+
+def scenario_reader_jpeg_with_context(iterations: int = 100) -> None:
+    _read_file_context(SIGNED_JPEG, "image/jpeg", iterations)
+
+
+# Parallel signing variants: one shared Context across 10 threads.
+# {split, full} x {pool, barrier} x {jpeg, png}.
+
+def scenario_builder_sign_jpeg_parallel_split_pool(iterations: int = 100) -> None:
+    _sign_parallel(SOURCE_JPEG, "image/jpeg", iterations, per_thread_full=False, launch="pool")
+
+
+def scenario_builder_sign_jpeg_parallel_split_barrier(iterations: int = 100) -> None:
+    _sign_parallel(SOURCE_JPEG, "image/jpeg", iterations, per_thread_full=False, launch="barrier")
+
+
+def scenario_builder_sign_png_parallel_split_pool(iterations: int = 100) -> None:
+    _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=False, launch="pool")
+
+
+def scenario_builder_sign_png_parallel_split_barrier(iterations: int = 100) -> None:
+    _sign_parallel(SIGNING_PNG, "image/png", iterations, per_thread_full=False, launch="barrier")
+
+
+SCENARIOS = {
+    "reader_jpeg_legacy": scenario_reader_jpeg_legacy,
+    "reader_jpeg_with_context": scenario_reader_jpeg_with_context,
+    "reader_mp4": scenario_reader_mp4,
+    "reader_wav": scenario_reader_wav,
+    "builder_sign_jpeg_legacy": scenario_builder_sign_jpeg_legacy,
+    "builder_sign_jpeg_with_context": scenario_builder_sign_jpeg_with_context,
+    "builder_sign_png_legacy": scenario_builder_sign_png_legacy,
+    "builder_sign_png_with_context": scenario_builder_sign_png_with_context,
+    "builder_sign_jpeg_parallel_split_pool": scenario_builder_sign_jpeg_parallel_split_pool,
+    "builder_sign_jpeg_parallel_split_barrier": scenario_builder_sign_jpeg_parallel_split_barrier,
+    "builder_sign_png_parallel_split_pool": scenario_builder_sign_png_parallel_split_pool,
+    "builder_sign_png_parallel_split_barrier": scenario_builder_sign_png_parallel_split_barrier,
+    "builder_sign_gif": scenario_builder_sign_gif,
+    "builder_sign_heic": scenario_builder_sign_heic,
+    "builder_sign_m4a": scenario_builder_sign_m4a,
+    "builder_sign_webp": scenario_builder_sign_webp,
+    "builder_sign_avi": scenario_builder_sign_avi,
+    "builder_sign_mp4": scenario_builder_sign_mp4,
+    "builder_sign_tiff": scenario_builder_sign_tiff,
+    "builder_sign_jpeg_parent_of": scenario_builder_sign_jpeg_parent_of,
+    "builder_sign_jpeg_component_of": scenario_builder_sign_jpeg_component_of,
+    "builder_sign_jpeg_parent_and_component": scenario_builder_sign_jpeg_parent_and_component,
+    "builder_sign_jpeg_parent_and_component_mixed_mime": scenario_builder_sign_jpeg_parent_and_component_mixed_mime,
+    "builder_sign_jpeg_two_components_same_mime": scenario_builder_sign_jpeg_two_components_same_mime,
+    "builder_sign_jpeg_two_components_mixed_mime": scenario_builder_sign_jpeg_two_components_mixed_mime,
+    "builder_sign_jpeg_archive_roundtrip": scenario_builder_sign_jpeg_archive_roundtrip,
+}
+
+
+# Canonical scenario name list, derived from SCENARIOS so the two cannot drift.
+# (dict preserves insertion order, so this matches the dict's declaration order.)
+SCENARIO_NAMES = tuple(SCENARIOS)