diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f525644..b5bc9b8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -105,7 +105,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements-lock.txt
-          python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
+          python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0' 'hypothesis>=6.100,<7'
 
       - name: Run unittest suite
         run: python -m unittest discover tests -v
@@ -114,7 +114,8 @@ jobs:
         # Pytest fixtures (tests/conftest.py) build a temp workspaceStorage and
         # exercise Flask routes via app.test_client(). Only listed files — not
         # `pytest tests/` — to avoid re-collecting unittest.TestCase classes above.
-        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py -v --tb=short
+        # -o addopts= avoids inheriting benchmark-only options from pyproject.toml.
+        run: python -m pytest tests/test_api_search.py tests/test_api_workspaces.py tests/test_api_export.py tests/test_pdf_export.py tests/test_search_helpers.py -v --tb=short -o addopts=
 
       # ── PyInstaller desktop build (Windows only, once per workflow) ────────
       # Closes #44. Builds the onedir bundle and smoke-tests --help so the
@@ -213,3 +214,41 @@ jobs:
             --verbose \
             --redact \
             --exit-code 1
+
+  # ── Performance benchmarks: summary cache (issue #7) ───────────────────────
+  benchmarks:
+    name: Performance benchmarks (gated)
+    needs: [unittest]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: "3.12"
+
+      - name: Install runtime + benchmark dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements-lock.txt
+          python -m pip install 'pytest>=8,<9' 'pytest-benchmark==4.0.0'
+
+      - name: Run summary-cache benchmarks
+        run: >
+          python -m pytest tests/benchmarks/
+          --benchmark-only
+          --benchmark-json=benchmark-results.json
+          --benchmark-columns=min,max,mean,stddev,rounds
+          -o addopts=
+
+      - name: Regression gate
+        run: python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        if: always()
+        with:
+          name: benchmark-results
+          path: benchmark-results.json
diff --git a/.gitignore b/.gitignore
index 5fd078f..f204306 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,5 @@ Thumbs.db
 htmlcov/
 coverage.xml
 .hypothesis/
+benchmark-results.json
+benchmarks/_raw.json
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
new file mode 100644
index 0000000..d664af3
--- /dev/null
+++ b/benchmarks/baselines.json
@@ -0,0 +1,15 @@
+{
+  "_note": "Gated means from ubuntu-latest CI benchmark-results.json (PR #120, run 28123677675). Refresh: pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=",
+  "updated": "2026-06-24T19:20:27Z",
+  "machine": "Linux",
+  "groups": {
+    "summary-cache": {
+      "test_summary_cache_hit": 6.3e-05,
+      "test_summary_cache_miss": 6.3e-05,
+      "test_fingerprint_workspace_entries[10]": 0.001844,
+      "test_fingerprint_workspace_entries[50]": 0.007759,
+      "test_fingerprint_workspace_entries[200]": 0.022231,
+      "test_summary_cache_round_trip": 0.000351
+    }
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index a49ac40..6c37998 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,10 +31,19 @@ desktop = ["pywebview>=5.0,<6"]
 # Development tooling: testing + type checking.
 dev = [
     "pytest>=8,<9",
+    "pytest-benchmark>=4,<5",
     "mypy>=1.10,<2",
     "hypothesis>=6.100,<7",
 ]
 
+[tool.pytest.ini_options]
+pythonpath = ["."]
+addopts = "--benchmark-disable"
+testpaths = ["tests"]
+markers = [
+    "benchmark: performance benchmarks (pytest-benchmark)",
+]
+
 [project.scripts]
 # Primary CLI: export Cursor chat histories to Markdown / zip.
 # Usage: cursor-chat-export [--since all|last] [--out DIR] [--no-zip] [--help]
diff --git a/requirements-lock.txt b/requirements-lock.txt
index 4a65662..beaa107 100644
--- a/requirements-lock.txt
+++ b/requirements-lock.txt
@@ -6,7 +6,7 @@
 # Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g.
 # colorama via click) are omitted — pip still installs them on Windows when needed.
 blinker==1.9.0            # via flask
-click==8.4.1              # via flask
+click==8.4.2              # via flask
 defusedxml==0.7.1         # via fpdf2
 flask==3.1.3              # via -r requirements.txt
 fonttools==4.63.0         # via fpdf2
diff --git a/scripts/check_benchmark_regression.py b/scripts/check_benchmark_regression.py
new file mode 100644
index 0000000..d2fc79c
--- /dev/null
+++ b/scripts/check_benchmark_regression.py
@@ -0,0 +1,163 @@
+"""Compare pytest-benchmark JSON output against stored baselines."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+THRESHOLD = 1.20
+
+
+class BenchmarkDataError(ValueError):
+    """Raised when benchmark JSON input is malformed or missing required fields."""
+
+
+def normalize_benchmark_name(name: str) -> str:
+    """Strip pytest file node prefix so baselines match short or full benchmark names."""
+    text = str(name)
+    if "::" not in text:
+        return text
+    prefix, _, suffix = text.partition("::")
+    # Only strip module paths (…/test_foo.py::test_name); leave "::" inside [param::value] intact.
+    if prefix.endswith(".py"):
+        return suffix
+    return text
+
+
+def load_results(results_path: str | Path) -> dict[str, float]:
+    path = Path(results_path)
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
+    except json.JSONDecodeError as exc:
+        raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
+    try:
+        benchmarks = data["benchmarks"]
+    except (KeyError, TypeError) as exc:
+        raise BenchmarkDataError(f"{path} missing top-level 'benchmarks' array") from exc
+    if not isinstance(benchmarks, list):
+        raise BenchmarkDataError(f"{path} 'benchmarks' must be an array")
+
+    results: dict[str, float] = {}
+    for index, entry in enumerate(benchmarks):
+        if not isinstance(entry, dict):
+            raise BenchmarkDataError(f"{path} benchmarks[{index}] must be an object")
+        try:
+            raw_name = entry["name"]
+            mean = float(entry["stats"]["mean"])
+        except (KeyError, TypeError, ValueError) as exc:
+            raise BenchmarkDataError(
+                f"{path} benchmarks[{index}] missing 'name' or 'stats.mean'"
+            ) from exc
+        name = normalize_benchmark_name(str(raw_name))
+        if name in results:
+            raise BenchmarkDataError(f"{path} duplicate benchmark name {name!r}")
+        results[name] = mean
+    return results
+
+
+def load_baseline_means(baselines_path: str | Path) -> dict[str, float]:
+    path = Path(baselines_path)
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
+    except json.JSONDecodeError as exc:
+        raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
+    if not isinstance(data, dict):
+        raise BenchmarkDataError(f"{path} root value must be an object")
+
+    if "groups" not in data:
+        raise BenchmarkDataError(f"{path} missing required 'groups' key")
+    groups = data["groups"]
+    if not isinstance(groups, dict):
+        raise BenchmarkDataError(f"{path} 'groups' must be an object")
+
+    means: dict[str, float] = {}
+    for group_name, value in groups.items():
+        if not isinstance(value, dict):
+            raise BenchmarkDataError(
+                f"{path} groups[{group_name!r}] must be an object of benchmark means"
+            )
+        for name, mean in value.items():
+            bench_name = normalize_benchmark_name(str(name))
+            if bench_name in means:
+                raise BenchmarkDataError(
+                    f"{path} duplicate benchmark name {bench_name!r} across groups"
+                )
+            try:
+                means[bench_name] = float(mean)
+            except (TypeError, ValueError) as exc:
+                raise BenchmarkDataError(
+                    f"{path} groups[{group_name!r}][{name!r}] is not a numeric mean"
+                ) from exc
+    return means
+
+
+def check_regression(
+    results_path: str | Path,
+    baselines_path: str | Path,
+    *,
+    threshold: float = THRESHOLD,
+) -> int:
+    """Return 0 when within threshold; 1 when any gated benchmark regresses."""
+    flat = load_results(results_path)
+    baseline_means = load_baseline_means(baselines_path)
+
+    failures: list[str] = []
+    missing: list[str] = []
+    for name, base in baseline_means.items():
+        cur = flat.get(name)
+        if cur is None:
+            print(f"FAIL: no current result for gated baseline {name!r}")
+            missing.append(name)
+            continue
+        if base == 0:
+            print(f"WARN: baseline for {name!r} is zero; skipping ratio check")
+            continue
+        ratio = cur / base
+        tag = "FAIL" if ratio > threshold else "ok"
+        print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
+        if ratio > threshold:
+            failures.append(name)
+
+    for name in flat:
+        if name not in baseline_means:
+            print(f"WARN: {name!r} has no baseline yet; not gated")
+
+    if failures:
+        print(f"\nREGRESSION: {len(failures)} benchmark(s) exceeded {threshold:.0%}")
+    if missing:
+        print(f"\nMISSING: {len(missing)} gated benchmark(s) absent from current results")
+    if failures or missing:
+        return 1
+    return 0
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("results_path", help="pytest-benchmark --benchmark-json output")
+    parser.add_argument("baselines_path", help="path to benchmarks/baselines.json")
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=THRESHOLD,
+        help="fail when current mean exceeds baseline by more than this ratio (default: 1.20)",
+    )
+    args = parser.parse_args(argv)
+    try:
+        return check_regression(
+            args.results_path,
+            args.baselines_path,
+            threshold=args.threshold,
+        )
+    except BenchmarkDataError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
new file mode 100644
index 0000000..25d9234
--- /dev/null
+++ b/tests/benchmarks/conftest.py
@@ -0,0 +1,88 @@
+"""Synthetic workspace trees for summary-cache performance benchmarks."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from services import summary_cache
+from services.summary_cache import fingerprint_workspace_storage
+
+
+def make_workspace_entries(workspace_root: Path, count: int) -> list[dict[str, Any]]:
+    """Build *count* synthetic workspace entries with on-disk state files."""
+    entries: list[dict[str, Any]] = []
+    for i in range(count):
+        name = f"ws_{i:04d}"
+        entry_dir = workspace_root / name
+        entry_dir.mkdir(parents=True, exist_ok=True)
+        (entry_dir / "state.vscdb").write_bytes(b"bench")
+        workspace_json = entry_dir / "workspace.json"
+        workspace_json.write_text('{"folder": "/bench"}', encoding="utf-8")
+        entries.append(
+            {
+                "name": name,
+                "workspaceJsonPath": str(workspace_json),
+            }
+        )
+    return entries
+
+
+@pytest.fixture
+def summary_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Redirect summary-cache files to an isolated temp directory.
+
+    Patches ``CACHE_DIR`` (also used by tab-summary paths via ``_tab_summaries_path``)
+    plus the projects/composer-map file constants used by current benchmarks.
+    """
+    cache_dir = tmp_path / "cache"
+    cache_dir.mkdir()
+    monkeypatch.setattr(summary_cache, "CACHE_DIR", cache_dir)
+    monkeypatch.setattr(summary_cache, "PROJECTS_CACHE_FILE", cache_dir / "projects.json")
+    monkeypatch.setattr(
+        summary_cache,
+        "COMPOSER_MAP_CACHE_FILE",
+        cache_dir / "composer-id-to-ws.json",
+    )
+    return cache_dir
+
+
+@pytest.fixture
+def sample_projects() -> list[dict[str, Any]]:
+    return [
+        {
+            "id": "ws_0000",
+            "name": "Bench Project",
+            "conversationCount": 3,
+            "lastModified": "2026-06-24T00:00:00Z",
+        }
+    ]
+
+
+@pytest.fixture
+def synthetic_workspace(tmp_path: Path, request: pytest.FixtureRequest) -> tuple[str, list[dict[str, Any]]]:
+    """Workspace path + entries. Parametrize via indirect ``workspace_entry_count``."""
+    count = getattr(request, "param", 10)
+    workspace_root = tmp_path / "workspaceStorage"
+    workspace_root.mkdir()
+    entries = make_workspace_entries(workspace_root, count)
+    return str(workspace_root), entries
+
+
+@pytest.fixture
+def workspace_fingerprint(synthetic_workspace: tuple[str, list[dict[str, Any]]]) -> dict[str, Any]:
+    workspace_path, entries = synthetic_workspace
+    return fingerprint_workspace_storage(
+        workspace_path,
+        entries,
+        global_db_path=None,
+        rules=[],
+    )
+
+
+@pytest.fixture
+def stale_fingerprint(workspace_fingerprint: dict[str, Any]) -> dict[str, Any]:
+    """Return a fingerprint guaranteed to differ from the stored one."""
+    return {**workspace_fingerprint, "rules_digest": "deadbeefdeadbeef"}
diff --git a/tests/benchmarks/test_summary_cache_bench.py b/tests/benchmarks/test_summary_cache_bench.py
new file mode 100644
index 0000000..b9a4595
--- /dev/null
+++ b/tests/benchmarks/test_summary_cache_bench.py
@@ -0,0 +1,74 @@
+"""pytest-benchmark coverage for services/summary_cache.py hot paths."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from services.summary_cache import (
+    fingerprint_workspace_storage,
+    get_cached_projects,
+    set_cached_projects,
+)
+
+
+@pytest.mark.benchmark(group="summary-cache")
+def test_summary_cache_hit(
+    benchmark,
+    summary_cache_dir: Path,
+    workspace_fingerprint: dict[str, Any],
+    sample_projects: list[dict[str, Any]],
+) -> None:
+    set_cached_projects(workspace_fingerprint, sample_projects, [])
+    benchmark(get_cached_projects, workspace_fingerprint)
+
+
+@pytest.mark.benchmark(group="summary-cache")
+def test_summary_cache_miss(
+    benchmark,
+    summary_cache_dir: Path,
+    workspace_fingerprint: dict[str, Any],
+    stale_fingerprint: dict[str, Any],
+    sample_projects: list[dict[str, Any]],
+) -> None:
+    set_cached_projects(workspace_fingerprint, sample_projects, [])
+    benchmark(get_cached_projects, stale_fingerprint)
+
+
+@pytest.mark.benchmark(group="summary-cache")
+@pytest.mark.parametrize(
+    "synthetic_workspace",
+    [10, 50, 200],
+    indirect=True,
+)
+def test_fingerprint_workspace_entries(
+    benchmark,
+    synthetic_workspace: tuple[str, list[dict[str, Any]]],
+) -> None:
+    workspace_path, entries = synthetic_workspace
+    benchmark(
+        fingerprint_workspace_storage,
+        workspace_path,
+        entries,
+        global_db_path=None,
+        rules=[],
+    )
+
+
+@pytest.mark.benchmark(group="summary-cache")
+def test_summary_cache_round_trip(
+    benchmark,
+    summary_cache_dir: Path,
+    workspace_fingerprint: dict[str, Any],
+    sample_projects: list[dict[str, Any]],
+) -> None:
+    fp = workspace_fingerprint
+    projects = sample_projects
+
+    def _run() -> None:
+        set_cached_projects(fp, projects, [])
+        get_cached_projects(fp)
+
+    benchmark(_run)
diff --git a/tests/test_check_benchmark_regression.py b/tests/test_check_benchmark_regression.py
new file mode 100644
index 0000000..8de10a8
--- /dev/null
+++ b/tests/test_check_benchmark_regression.py
@@ -0,0 +1,215 @@
+"""Tests for scripts/check_benchmark_regression.py."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from scripts.check_benchmark_regression import (
+    BenchmarkDataError,
+    check_regression,
+    load_baseline_means,
+    load_results,
+    normalize_benchmark_name,
+)
+
+GATED_BENCH = "test_summary_cache_hit"
+
+
+def _write_results(path, benchmarks: list[dict]) -> None:
+    path.write_text(
+        json.dumps({"benchmarks": benchmarks}, indent=2),
+        encoding="utf-8",
+    )
+
+
+def _write_baselines(path, groups: dict[str, dict[str, float]]) -> None:
+    path.write_text(
+        json.dumps({"groups": groups}, indent=2),
+        encoding="utf-8",
+    )
+
+
+def test_normalize_benchmark_name_strips_module_prefix() -> None:
+    full = "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_hit"
+    assert normalize_benchmark_name(full) == "test_summary_cache_hit"
+    assert normalize_benchmark_name("test_summary_cache_hit") == "test_summary_cache_hit"
+
+
+def test_normalize_benchmark_name_preserves_colons_in_param_values() -> None:
+    short = "test_x[param::v]"
+    full = f"tests/benchmarks/test_x.py::{short}"
+    assert normalize_benchmark_name(short) == short
+    assert normalize_benchmark_name(full) == short
+
+
+def test_load_results_normalizes_full_node_id(tmp_path) -> None:
+    path = tmp_path / "results.json"
+    _write_results(
+        path,
+        [
+            {
+                "name": "tests/benchmarks/test_summary_cache_bench.py::test_summary_cache_hit",
+                "stats": {"mean": 0.0001},
+            }
+        ],
+    )
+
+    assert load_results(path)["test_summary_cache_hit"] == pytest.approx(0.0001)
+
+
+def test_missing_baseline_warns_without_failing(
+    tmp_path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [
+            {"name": "test_new_bench", "stats": {"mean": 0.01}},
+            {"name": GATED_BENCH, "stats": {"mean": 0.0001}},
+        ],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0001}},
+    )
+
+    assert check_regression(results, baselines) == 0
+    out = capsys.readouterr().out
+    assert "WARN: 'test_new_bench' has no baseline yet" in out
+
+
+def test_regression_over_threshold_fails(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [{"name": GATED_BENCH, "stats": {"mean": 0.00025}}],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0002}},
+    )
+
+    assert check_regression(results, baselines) == 1
+    out = capsys.readouterr().out
+    assert "REGRESSION" in out
+
+
+def test_within_threshold_passes(tmp_path) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [{"name": GATED_BENCH, "stats": {"mean": 0.00022}}],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0002}},
+    )
+
+    assert check_regression(results, baselines) == 0
+
+
+def test_load_results_rejects_malformed_json(tmp_path) -> None:
+    path = tmp_path / "bad.json"
+    path.write_text("{not json", encoding="utf-8")
+    with pytest.raises(BenchmarkDataError, match="invalid JSON"):
+        load_results(path)
+
+
+def test_load_results_requires_benchmarks_array(tmp_path) -> None:
+    path = tmp_path / "results.json"
+    path.write_text("{}", encoding="utf-8")
+    with pytest.raises(BenchmarkDataError, match="'benchmarks' array"):
+        load_results(path)
+
+
+def test_load_results_rejects_missing_file(tmp_path) -> None:
+    with pytest.raises(BenchmarkDataError, match="cannot read"):
+        load_results(tmp_path / "missing.json")
+
+
+def test_zero_baseline_skips_ratio_check(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [{"name": GATED_BENCH, "stats": {"mean": 0.00025}}],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0}},
+    )
+
+    assert check_regression(results, baselines) == 0
+    assert f"baseline for '{GATED_BENCH}' is zero" in capsys.readouterr().out
+
+
+def test_exactly_at_threshold_passes(tmp_path) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(
+        results,
+        [{"name": GATED_BENCH, "stats": {"mean": 0.00024}}],
+    )
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0002}},
+    )
+
+    assert check_regression(results, baselines) == 0
+
+
+def test_missing_current_result_fails(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    results = tmp_path / "results.json"
+    baselines = tmp_path / "baselines.json"
+    _write_results(results, [])
+    _write_baselines(
+        baselines,
+        {"summary-cache": {GATED_BENCH: 0.0002}},
+    )
+
+    assert check_regression(results, baselines) == 1
+    out = capsys.readouterr().out
+    assert "MISSING" in out
+    assert "no current result for gated baseline" in out
+
+
+def test_main_reports_benchmark_data_error(tmp_path, capsys: pytest.CaptureFixture[str]) -> None:
+    from scripts.check_benchmark_regression import main
+
+    bad = tmp_path / "bad.json"
+    bad.write_text("{}", encoding="utf-8")
+    baselines = tmp_path / "baselines.json"
+    _write_baselines(baselines, {"summary-cache": {GATED_BENCH: 0.0002}})
+
+    assert main([str(bad), str(baselines)]) == 2
+    assert "ERROR:" in capsys.readouterr().err
+
+
+def test_duplicate_baseline_name_raises(tmp_path) -> None:
+    baselines = tmp_path / "baselines.json"
+    _write_baselines(
+        baselines,
+        {
+            "summary-cache": {GATED_BENCH: 0.0002},
+            "export": {GATED_BENCH: 0.0003},
+        },
+    )
+
+    with pytest.raises(BenchmarkDataError, match="duplicate benchmark name"):
+        load_baseline_means(baselines)
+
+
+def test_load_baseline_means_rejects_non_dict_group(tmp_path) -> None:
+    baselines = tmp_path / "baselines.json"
+    baselines.write_text(
+        json.dumps({"groups": {"summary-cache": "not-a-dict"}}),
+        encoding="utf-8",
+    )
+
+    with pytest.raises(BenchmarkDataError, match="must be an object"):
+        load_baseline_means(baselines)