From 1e9285ce98da0732f7716ded0d6e9ceb1ab20b6b Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 09:58:33 -0700 Subject: [PATCH 1/6] Fix benchmark cache path: use directory instead of file The Cache@2 ADO task requires a directory path, but .perf.baseline was a file. Move baseline storage into a .perf-baseline/ directory so the cache task can save/restore it correctly. - Change cache path to \/.perf-baseline - Update test_benchmark.py to store baseline in .perf-baseline/data - Ensure directory exists with mkdir -p before running benchmarks - Update .gitignore accordingly Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 2 +- azure-pipelines.yml | 3 ++- tests/test_benchmark.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 1af10eff..862aea53 100644 --- a/.gitignore +++ b/.gitignore @@ -61,7 +61,7 @@ tests/config.json msal_cache.bin .env -.perf.baseline +.perf-baseline/ *.pfx .vscode/settings.json diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8aef40a4..f62c20cc 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -105,9 +105,10 @@ extends: displayName: 'Restore performance baseline cache' inputs: key: 'perf-baseline | "$(Agent.OS)" | tests/test_benchmark.py' - path: .perf.baseline + path: $(System.DefaultWorkingDirectory)/.perf-baseline - bash: | + mkdir -p .perf-baseline pytest --benchmark-only --benchmark-json benchmark.json --log-cli-level INFO tests/test_benchmark.py displayName: 'Run benchmarks' diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 9aaeac05..2f6d8efc 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,7 +2,7 @@ from perf_baseline import Baseline -baseline = Baseline(".perf.baseline", threshold=1.5) # Up to 1.5x slower than baseline +baseline = Baseline(".perf-baseline/data", threshold=1.5) # Up to 1.5x slower than baseline # Here come benchmark test cases, powered by pytest-benchmark # Func names will become diag names. From efd4cd1900fa193f9e8c6b5d8a88435349c6efb8 Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 10:17:09 -0700 Subject: [PATCH 2/6] Add forceBenchmarks pipeline variable to allow manual benchmark runs Allows benchmarks to run on non-dev branches by setting the pipeline variable forceBenchmarks=true when queuing a manual build. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- azure-pipelines.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f62c20cc..198e4061 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -74,11 +74,16 @@ extends: condition: | and( succeeded('E2ETests'), - eq(variables['Build.SourceBranch'], 'refs/heads/dev'), or( - eq(variables['Build.Reason'], 'IndividualCI'), - eq(variables['Build.Reason'], 'BatchedCI'), - eq(variables['Build.Reason'], 'Manual') + eq(variables['forceBenchmarks'], 'true'), + and( + eq(variables['Build.SourceBranch'], 'refs/heads/dev'), + or( + eq(variables['Build.Reason'], 'IndividualCI'), + eq(variables['Build.Reason'], 'BatchedCI'), + eq(variables['Build.Reason'], 'Manual') + ) + ) ) ) jobs: From d9d3884f05850aac105065c09a2239065de53480 Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 12:01:16 -0700 Subject: [PATCH 3/6] Address PR review: ensure .perf-baseline dir exists in all contexts - Add os.makedirs in test_benchmark.py so local runs don't fail - Use absolute path in mkdir -p to align with Cache@2 path Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- azure-pipelines.yml | 2 +- tests/test_benchmark.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 198e4061..dda62863 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -113,7 +113,7 @@ extends: path: $(System.DefaultWorkingDirectory)/.perf-baseline - bash: | - mkdir -p .perf-baseline + mkdir -p $(System.DefaultWorkingDirectory)/.perf-baseline pytest --benchmark-only --benchmark-json benchmark.json --log-cli-level INFO tests/test_benchmark.py displayName: 'Run benchmarks' diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 2f6d8efc..ab74ea82 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,6 +2,8 @@ from perf_baseline import Baseline +import os +os.makedirs(".perf-baseline", exist_ok=True) baseline = Baseline(".perf-baseline/data", threshold=1.5) # Up to 1.5x slower than baseline # Here come benchmark test cases, powered by pytest-benchmark From 0ea7fb156410cd512e6a24013f57fa47bd0a335a Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 12:09:37 -0700 Subject: [PATCH 4/6] Address review: anchor baseline path to repo root, fix comment - Use __file__-based path resolution so baseline works from any CWD - Move stdlib imports before third-party imports - Update pipeline comment to reflect forceBenchmarks behavior Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- azure-pipelines.yml | 2 +- tests/test_benchmark.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index dda62863..e58d330b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -70,7 +70,7 @@ extends: - stage: Benchmark displayName: 'Run benchmarks' dependsOn: E2ETests - # Only run on post-merge pushes to dev - not on PRs or scheduled runs. + # Run on post-merge pushes to dev, or manually when forceBenchmarks=true. condition: | and( succeeded('E2ETests'), diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index ab74ea82..6eeaae47 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,10 +1,14 @@ +import os +from pathlib import Path + from tests.simulator import ClientCredentialGrantSimulator as CcaTester from perf_baseline import Baseline -import os -os.makedirs(".perf-baseline", exist_ok=True) -baseline = Baseline(".perf-baseline/data", threshold=1.5) # Up to 1.5x slower than baseline +_REPO_ROOT = Path(__file__).resolve().parent.parent +_BASELINE_DIR = _REPO_ROOT / ".perf-baseline" +os.makedirs(_BASELINE_DIR, exist_ok=True) +baseline = Baseline(str(_BASELINE_DIR / "data"), threshold=1.5) # Up to 1.5x slower than baseline # Here come benchmark test cases, powered by pytest-benchmark # Func names will become diag names. From 56676811c826a3e782d0c11ea625a8997eb2e30c Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 12:49:01 -0700 Subject: [PATCH 5/6] Add 5-min step timeout to pytest to fail fast on 3.14 stalls Python 3.14 intermittently stalls during test execution, consuming the full 30-min job timeout. Add a 5-minute timeoutInMinutes on the pytest step so stalls are caught quickly instead of wasting agent time. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .Pipelines/template-pipeline-stages.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.Pipelines/template-pipeline-stages.yml b/.Pipelines/template-pipeline-stages.yml index d62d670f..1bfee6a3 100644 --- a/.Pipelines/template-pipeline-stages.yml +++ b/.Pipelines/template-pipeline-stages.yml @@ -128,6 +128,10 @@ stages: --deselect tests/test_cryptography.py::CryptographyTestCase::test_should_be_run_with_latest_version_of_cryptography \ 2>&1 | tee test-results/pytest-unit.log displayName: 'Run pytest (unit)' + # Python 3.14 intermittently stalls during test execution (likely due to + # missing pre-built wheels forcing source compilation or runtime changes). + # Cap at 5 minutes to fail fast instead of consuming the full 30-min job timeout. + timeoutInMinutes: 5 env: # Force unbuffered stdout so ADO logs stream in real time through the tee pipe. PYTHONUNBUFFERED: '1' From 4308dfa90a0b79c9a33bd91652c96dfba4229154 Mon Sep 17 00:00:00 2001 From: Ryan Auld Date: Mon, 15 Jun 2026 13:04:36 -0700 Subject: [PATCH 6/6] Address review: scope forceBenchmarks to Manual builds, add step timeout - Require Build.Reason=Manual for forceBenchmarks override (case-insensitive) - Add 5-min timeoutInMinutes on pytest step to catch stalls early Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .Pipelines/template-pipeline-stages.yml | 5 +---- azure-pipelines.yml | 7 +++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.Pipelines/template-pipeline-stages.yml b/.Pipelines/template-pipeline-stages.yml index 1bfee6a3..b763f01a 100644 --- a/.Pipelines/template-pipeline-stages.yml +++ b/.Pipelines/template-pipeline-stages.yml @@ -94,7 +94,7 @@ stages: ob_outputDirectory: '$(Build.ArtifactStagingDirectory)' strategy: matrix: - Python39: { python.version: '3.9' } + Python39: { python.version: '3.9' } Python310: { python.version: '3.10' } Python311: { python.version: '3.11' } Python312: { python.version: '3.12' } @@ -128,9 +128,6 @@ stages: --deselect tests/test_cryptography.py::CryptographyTestCase::test_should_be_run_with_latest_version_of_cryptography \ 2>&1 | tee test-results/pytest-unit.log displayName: 'Run pytest (unit)' - # Python 3.14 intermittently stalls during test execution (likely due to - # missing pre-built wheels forcing source compilation or runtime changes). - # Cap at 5 minutes to fail fast instead of consuming the full 30-min job timeout. timeoutInMinutes: 5 env: # Force unbuffered stdout so ADO logs stream in real time through the tee pipe. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index e58d330b..58b821f2 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -70,12 +70,15 @@ extends: - stage: Benchmark displayName: 'Run benchmarks' dependsOn: E2ETests - # Run on post-merge pushes to dev, or manually when forceBenchmarks=true. + # Run on post-merge pushes to dev, or on manual builds when forceBenchmarks=true. condition: | and( succeeded('E2ETests'), or( - eq(variables['forceBenchmarks'], 'true'), + and( + eq(variables['Build.Reason'], 'Manual'), + in(variables['forceBenchmarks'], 'true', 'True') + ), and( eq(variables['Build.SourceBranch'], 'refs/heads/dev'), or(