From 1e9285ce98da0732f7716ded0d6e9ceb1ab20b6b Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 09:58:33 -0700
Subject: [PATCH 1/6] Fix benchmark cache path: use directory instead of file

The Cache@2 ADO task requires a directory path, but .perf.baseline was a
file. Move baseline storage into a .perf-baseline/ directory so the cache
task can save/restore it correctly.

- Change cache path to \/.perf-baseline
- Update test_benchmark.py to store baseline in .perf-baseline/data
- Ensure directory exists with mkdir -p before running benchmarks
- Update .gitignore accordingly

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .gitignore              | 2 +-
 azure-pipelines.yml     | 3 ++-
 tests/test_benchmark.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1af10eff..862aea53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,7 +61,7 @@ tests/config.json
 msal_cache.bin
 
 .env
-.perf.baseline
+.perf-baseline/
 
 *.pfx
 .vscode/settings.json
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 8aef40a4..f62c20cc 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -105,9 +105,10 @@ extends:
           displayName: 'Restore performance baseline cache'
           inputs:
             key: 'perf-baseline | "$(Agent.OS)" | tests/test_benchmark.py'
-            path: .perf.baseline
+            path: $(System.DefaultWorkingDirectory)/.perf-baseline
 
         - bash: |
+            mkdir -p .perf-baseline
             pytest --benchmark-only --benchmark-json benchmark.json --log-cli-level INFO tests/test_benchmark.py
           displayName: 'Run benchmarks'
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 9aaeac05..2f6d8efc 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -2,7 +2,7 @@
 from perf_baseline import Baseline
 
 
-baseline = Baseline(".perf.baseline", threshold=1.5)  # Up to 1.5x slower than baseline
+baseline = Baseline(".perf-baseline/data", threshold=1.5)  # Up to 1.5x slower than baseline
 
 # Here come benchmark test cases, powered by pytest-benchmark
 # Func names will become diag names.

From efd4cd1900fa193f9e8c6b5d8a88435349c6efb8 Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 10:17:09 -0700
Subject: [PATCH 2/6] Add forceBenchmarks pipeline variable to allow manual
 benchmark runs

Allows benchmarks to run on non-dev branches by setting the pipeline
variable forceBenchmarks=true when queuing a manual build.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 azure-pipelines.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index f62c20cc..198e4061 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -74,11 +74,16 @@ extends:
       condition: |
         and(
           succeeded('E2ETests'),
-          eq(variables['Build.SourceBranch'], 'refs/heads/dev'),
           or(
-            eq(variables['Build.Reason'], 'IndividualCI'),
-            eq(variables['Build.Reason'], 'BatchedCI'),
-            eq(variables['Build.Reason'], 'Manual')
+            eq(variables['forceBenchmarks'], 'true'),
+            and(
+              eq(variables['Build.SourceBranch'], 'refs/heads/dev'),
+              or(
+                eq(variables['Build.Reason'], 'IndividualCI'),
+                eq(variables['Build.Reason'], 'BatchedCI'),
+                eq(variables['Build.Reason'], 'Manual')
+              )
+            )
           )
         )
       jobs:

From d9d3884f05850aac105065c09a2239065de53480 Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 12:01:16 -0700
Subject: [PATCH 3/6] Address PR review: ensure .perf-baseline dir exists in
 all contexts

- Add os.makedirs in test_benchmark.py so local runs don't fail
- Use absolute path in mkdir -p to align with Cache@2 path

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 azure-pipelines.yml     | 2 +-
 tests/test_benchmark.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 198e4061..dda62863 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -113,7 +113,7 @@ extends:
             path: $(System.DefaultWorkingDirectory)/.perf-baseline
 
         - bash: |
-            mkdir -p .perf-baseline
+            mkdir -p $(System.DefaultWorkingDirectory)/.perf-baseline
             pytest --benchmark-only --benchmark-json benchmark.json --log-cli-level INFO tests/test_benchmark.py
           displayName: 'Run benchmarks'
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 2f6d8efc..ab74ea82 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -2,6 +2,8 @@
 from perf_baseline import Baseline
 
 
+import os
+os.makedirs(".perf-baseline", exist_ok=True)
 baseline = Baseline(".perf-baseline/data", threshold=1.5)  # Up to 1.5x slower than baseline
 
 # Here come benchmark test cases, powered by pytest-benchmark

From 0ea7fb156410cd512e6a24013f57fa47bd0a335a Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 12:09:37 -0700
Subject: [PATCH 4/6] Address review: anchor baseline path to repo root, fix
 comment

- Use __file__-based path resolution so baseline works from any CWD
- Move stdlib imports before third-party imports
- Update pipeline comment to reflect forceBenchmarks behavior

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 azure-pipelines.yml     |  2 +-
 tests/test_benchmark.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index dda62863..e58d330b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -70,7 +70,7 @@ extends:
     - stage: Benchmark
       displayName: 'Run benchmarks'
       dependsOn: E2ETests
-      # Only run on post-merge pushes to dev - not on PRs or scheduled runs.
+      # Run on post-merge pushes to dev, or manually when forceBenchmarks=true.
       condition: |
         and(
           succeeded('E2ETests'),
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index ab74ea82..6eeaae47 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,10 +1,14 @@
+import os
+from pathlib import Path
+
 from tests.simulator import ClientCredentialGrantSimulator as CcaTester
 from perf_baseline import Baseline
 
 
-import os
-os.makedirs(".perf-baseline", exist_ok=True)
-baseline = Baseline(".perf-baseline/data", threshold=1.5)  # Up to 1.5x slower than baseline
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+_BASELINE_DIR = _REPO_ROOT / ".perf-baseline"
+os.makedirs(_BASELINE_DIR, exist_ok=True)
+baseline = Baseline(str(_BASELINE_DIR / "data"), threshold=1.5)  # Up to 1.5x slower than baseline
 
 # Here come benchmark test cases, powered by pytest-benchmark
 # Func names will become diag names.

From 56676811c826a3e782d0c11ea625a8997eb2e30c Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 12:49:01 -0700
Subject: [PATCH 5/6] Add 5-min step timeout to pytest to fail fast on 3.14
 stalls

Python 3.14 intermittently stalls during test execution, consuming the
full 30-min job timeout. Add a 5-minute timeoutInMinutes on the pytest
step so stalls are caught quickly instead of wasting agent time.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .Pipelines/template-pipeline-stages.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.Pipelines/template-pipeline-stages.yml b/.Pipelines/template-pipeline-stages.yml
index d62d670f..1bfee6a3 100644
--- a/.Pipelines/template-pipeline-stages.yml
+++ b/.Pipelines/template-pipeline-stages.yml
@@ -128,6 +128,10 @@ stages:
           --deselect tests/test_cryptography.py::CryptographyTestCase::test_should_be_run_with_latest_version_of_cryptography \
           2>&1 | tee test-results/pytest-unit.log
       displayName: 'Run pytest (unit)'
+      # Python 3.14 intermittently stalls during test execution (likely due to
+      # missing pre-built wheels forcing source compilation or runtime changes).
+      # Cap at 5 minutes to fail fast instead of consuming the full 30-min job timeout.
+      timeoutInMinutes: 5
       env:
         # Force unbuffered stdout so ADO logs stream in real time through the tee pipe.
         PYTHONUNBUFFERED: '1'

From 4308dfa90a0b79c9a33bd91652c96dfba4229154 Mon Sep 17 00:00:00 2001
From: Ryan Auld <RyAuld@microsoft.com>
Date: Mon, 15 Jun 2026 13:04:36 -0700
Subject: [PATCH 6/6] Address review: scope forceBenchmarks to Manual builds,
 add step timeout

- Require Build.Reason=Manual for forceBenchmarks override (case-insensitive)
- Add 5-min timeoutInMinutes on pytest step to catch stalls early

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .Pipelines/template-pipeline-stages.yml | 5 +----
 azure-pipelines.yml                     | 7 +++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.Pipelines/template-pipeline-stages.yml b/.Pipelines/template-pipeline-stages.yml
index 1bfee6a3..b763f01a 100644
--- a/.Pipelines/template-pipeline-stages.yml
+++ b/.Pipelines/template-pipeline-stages.yml
@@ -94,7 +94,7 @@ stages:
       ob_outputDirectory: '$(Build.ArtifactStagingDirectory)'
     strategy:
       matrix:
-        Python39:  { python.version: '3.9'  }
+        Python39:  { python.version: '3.9' }
         Python310: { python.version: '3.10' }
         Python311: { python.version: '3.11' }
         Python312: { python.version: '3.12' }
@@ -128,9 +128,6 @@ stages:
           --deselect tests/test_cryptography.py::CryptographyTestCase::test_should_be_run_with_latest_version_of_cryptography \
           2>&1 | tee test-results/pytest-unit.log
       displayName: 'Run pytest (unit)'
-      # Python 3.14 intermittently stalls during test execution (likely due to
-      # missing pre-built wheels forcing source compilation or runtime changes).
-      # Cap at 5 minutes to fail fast instead of consuming the full 30-min job timeout.
       timeoutInMinutes: 5
       env:
         # Force unbuffered stdout so ADO logs stream in real time through the tee pipe.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index e58d330b..58b821f2 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -70,12 +70,15 @@ extends:
     - stage: Benchmark
       displayName: 'Run benchmarks'
       dependsOn: E2ETests
-      # Run on post-merge pushes to dev, or manually when forceBenchmarks=true.
+      # Run on post-merge pushes to dev, or on manual builds when forceBenchmarks=true.
       condition: |
         and(
           succeeded('E2ETests'),
           or(
-            eq(variables['forceBenchmarks'], 'true'),
+            and(
+              eq(variables['Build.Reason'], 'Manual'),
+              in(variables['forceBenchmarks'], 'true', 'True')
+            ),
             and(
               eq(variables['Build.SourceBranch'], 'refs/heads/dev'),
               or(