From 1c67d3ca30b1c8dd2614b61babb35382c34861ca Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 29 May 2026 09:36:20 -0400 Subject: [PATCH 1/4] adding memory stats to regression benchmarks --- .../jvector/example/AutoBenchYAML.java | 9 +- .../example/util/BenchmarkSummarizer.java | 85 +++++++++++++------ visualize_benchmarks.py | 4 +- 3 files changed, 68 insertions(+), 30 deletions(-) diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java index 6e805d03a..22d672cfd 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java @@ -181,7 +181,8 @@ public static void main(String[] args) throws IOException { // Write CSV data try (FileWriter writer = new FileWriter(outputFile)) { // Write CSV header - writer.write("dataset,QPS,QPS StdDev,Mean Latency,Recall@10,Index Construction Time,Avg Nodes Visited\n"); + writer.write("dataset,QPS,QPS StdDev,Mean Latency,Recall@10,Index Construction Time,Avg Nodes Visited," + + "Build Heap Used (MB),Build Off-Heap (MB),Search Heap Used (MB),Search Off-Heap (MB)\n"); // Write one row per dataset with average metrics for (Map.Entry entry : statsByDataset.entrySet()) { @@ -194,7 +195,11 @@ public static void main(String[] args) throws IOException { writer.write(datasetStats.getAvgLatency() + ","); writer.write(datasetStats.getAvgRecall() + ","); writer.write(datasetStats.getIndexConstruction() + ","); - writer.write(datasetStats.getAvgNodesVisited() + "\n"); + writer.write(datasetStats.getAvgNodesVisited() + ","); + writer.write(datasetStats.getAvgBuildHeapMB() + ","); + writer.write(datasetStats.getAvgBuildOffHeapMB() + ","); + writer.write(datasetStats.getAvgSearchHeapMB() + ","); + writer.write(datasetStats.getAvgSearchOffHeapMB() + "\n"); } } diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java index 60b9a80f1..93462dc96 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java @@ -37,8 +37,15 @@ public static class SummaryStats { private final int totalConfigurations; private final double qpsStdDev; private final double avgNodesVisited; + private final double avgBuildHeapMB; + private final double avgBuildOffHeapMB; + private final double avgSearchHeapMB; + private final double avgSearchOffHeapMB; - public SummaryStats(double avgRecall, double avgQps, double avgLatency, double indexConstruction, int totalConfigurations, double qpsStdDev, double avgNodesVisited) { + public SummaryStats(double avgRecall, double avgQps, double avgLatency, double indexConstruction, + int totalConfigurations, double qpsStdDev, double avgNodesVisited, + double avgBuildHeapMB, double avgBuildOffHeapMB, + double avgSearchHeapMB, double avgSearchOffHeapMB) { this.avgRecall = avgRecall; this.avgQps = avgQps; this.avgLatency = avgLatency; @@ -46,29 +53,23 @@ public SummaryStats(double avgRecall, double avgQps, double avgLatency, double i this.totalConfigurations = totalConfigurations; this.qpsStdDev = qpsStdDev; this.avgNodesVisited = avgNodesVisited; + this.avgBuildHeapMB = avgBuildHeapMB; + this.avgBuildOffHeapMB = avgBuildOffHeapMB; + this.avgSearchHeapMB = avgSearchHeapMB; + this.avgSearchOffHeapMB = avgSearchOffHeapMB; } - public double getAvgRecall() { - return avgRecall; - } - - public double getAvgQps() { - return avgQps; - } - - public double getAvgLatency() { - return avgLatency; - } - + public double getAvgRecall() { return avgRecall; } + public double getAvgQps() { return avgQps; } + public double getAvgLatency() { return avgLatency; } public double getIndexConstruction() { return indexConstruction; } - - public int getTotalConfigurations() { - return totalConfigurations; - } - + public int getTotalConfigurations() { return totalConfigurations; } public double getQpsStdDev() { return qpsStdDev; } - public double getAvgNodesVisited() { return avgNodesVisited; } + public double getAvgBuildHeapMB() { return avgBuildHeapMB; } + public double getAvgBuildOffHeapMB() { return avgBuildOffHeapMB; } + public double getAvgSearchHeapMB() { return avgSearchHeapMB; } + public double getAvgSearchOffHeapMB() { return avgSearchOffHeapMB; } @Override public String toString() { @@ -78,8 +79,11 @@ public String toString() { " Average QPS: %.2f (± %.2f)%n" + " Average Latency: %.2f ms%n" + " Index Construction Time: %.2f%n" + - " Average Nodes Visited: %.2f", - totalConfigurations, avgRecall, avgQps, qpsStdDev, avgLatency, indexConstruction, avgNodesVisited); + " Average Nodes Visited: %.2f%n" + + " Build Heap Used: %.1f MB Build Off-Heap: %.1f MB%n" + + " Search Heap Used: %.1f MB Search Off-Heap: %.1f MB", + totalConfigurations, avgRecall, avgQps, qpsStdDev, avgLatency, indexConstruction, + avgNodesVisited, avgBuildHeapMB, avgBuildOffHeapMB, avgSearchHeapMB, avgSearchOffHeapMB); } } @@ -99,23 +103,31 @@ public static SummaryStats summarize(List results) { double indexConstruction = 0; double totalQpsStdDev = 0; double totalNodesVisited = 0; - + double totalBuildHeapMB = 0; + double totalBuildOffHeapMB = 0; + double totalSearchHeapMB = 0; + double totalSearchOffHeapMB = 0; + int recallCount = 0; int qpsCount = 0; int latencyCount = 0; int qpsStdDevCount = 0; int nodesVisitedCount = 0; + int buildHeapCount = 0; + int buildOffHeapCount = 0; + int searchHeapCount = 0; + int searchOffHeapCount = 0; for (BenchResult result : results) { if (result.metrics == null) continue; - + // Extract recall metrics (format is "Recall@N" where N is the topK value) Double recall = extractRecallMetric(result.metrics); if (recall != null) { totalRecall += recall; recallCount++; } - + // Extract QPS metric Double qps = extractQpsMetric(result.metrics); if (qps != null) { @@ -129,7 +141,7 @@ public static SummaryStats summarize(List results) { totalQpsStdDev += qpsStdDev; qpsStdDevCount++; } - + // Extract latency metric (format is "Mean Latency (ms)") Double latency = extractLatencyMetric(result.metrics); if (latency != null) { @@ -145,6 +157,19 @@ public static SummaryStats summarize(List results) { totalNodesVisited += nodesVisited; nodesVisitedCount++; } + + // Extract memory metrics + Double buildHeap = extractMetric(result.metrics, "Heap Memory Used (MB)"); + if (buildHeap != null) { totalBuildHeapMB += buildHeap; buildHeapCount++; } + + Double buildOffHeap = extractMetric(result.metrics, "Total Off-Heap (MB)"); + if (buildOffHeap != null) { totalBuildOffHeapMB += buildOffHeap; buildOffHeapCount++; } + + Double searchHeap = extractMetric(result.metrics, "Max heap usage (MB)"); + if (searchHeap != null) { totalSearchHeapMB += searchHeap; searchHeapCount++; } + + Double searchOffHeap = extractMetric(result.metrics, "Max offheap usage (MB)"); + if (searchOffHeap != null) { totalSearchOffHeapMB += searchOffHeap; searchOffHeapCount++; } } // Calculate averages, handling cases where some metrics might not be present @@ -153,11 +178,17 @@ public static SummaryStats summarize(List results) { double avgLatency = latencyCount > 0 ? totalLatency / latencyCount : 0; double avgQpsStdDev = qpsStdDevCount > 0 ? totalQpsStdDev / qpsStdDevCount : 0; double avgNodesVisited = nodesVisitedCount > 0 ? totalNodesVisited / nodesVisitedCount : 0; - + double avgBuildHeapMB = buildHeapCount > 0 ? totalBuildHeapMB / buildHeapCount : 0; + double avgBuildOffHeapMB = buildOffHeapCount > 0 ? totalBuildOffHeapMB / buildOffHeapCount : 0; + double avgSearchHeapMB = searchHeapCount > 0 ? totalSearchHeapMB / searchHeapCount : 0; + double avgSearchOffHeapMB = searchOffHeapCount > 0 ? totalSearchOffHeapMB / searchOffHeapCount : 0; + // Count total valid configurations as the maximum count of any metric int totalConfigurations = Math.max(Math.max(recallCount, qpsCount), latencyCount); - return new SummaryStats(avgRecall, avgQps, avgLatency, indexConstruction, totalConfigurations, avgQpsStdDev, avgNodesVisited); + return new SummaryStats(avgRecall, avgQps, avgLatency, indexConstruction, totalConfigurations, + avgQpsStdDev, avgNodesVisited, + avgBuildHeapMB, avgBuildOffHeapMB, avgSearchHeapMB, avgSearchOffHeapMB); } private static Double extractIndexConstructionMetric(Map metrics) { diff --git a/visualize_benchmarks.py b/visualize_benchmarks.py index 903c3448a..dbcd6fb8b 100644 --- a/visualize_benchmarks.py +++ b/visualize_benchmarks.py @@ -30,7 +30,9 @@ # Define metrics where higher values are better and lower values are better HIGHER_IS_BETTER = ["QPS", "Recall@10"] -LOWER_IS_BETTER = ["Mean Latency", "Index Build Time", "Average Nodes Visited"] +LOWER_IS_BETTER = ["Mean Latency", "Index Build Time", "Average Nodes Visited", + "Build Heap Used (MB)", "Build Off-Heap (MB)", + "Search Heap Used (MB)", "Search Off-Heap (MB)"] class BenchmarkData: From d916dbde6367ca1bb76c53d5e772ecfd8a495170 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 29 May 2026 10:28:15 -0400 Subject: [PATCH 2/4] fixed tests --- .../jvector/example/util/BenchmarkSummarizer.java | 2 +- .../main/resources/compare_benchmark_iterations.py | 0 .../src/main/resources/visualize_benchmarks.py | 0 .../example/util/BenchmarkSummarizerTest.java | 11 ++++++----- .../jvector/example/util/SummarizerTest.java | 14 +++++++++----- 5 files changed, 16 insertions(+), 11 deletions(-) rename compare_benchmark_iterations.py => jvector-examples/src/main/resources/compare_benchmark_iterations.py (100%) rename visualize_benchmarks.py => jvector-examples/src/main/resources/visualize_benchmarks.py (100%) diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java index 93462dc96..c3c89800b 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java @@ -94,7 +94,7 @@ public String toString() { */ public static SummaryStats summarize(List results) { if (results == null || results.isEmpty()) { - return new SummaryStats(0, 0, 0, 0, 0, 0, 0); + return new SummaryStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } double totalRecall = 0; diff --git a/compare_benchmark_iterations.py b/jvector-examples/src/main/resources/compare_benchmark_iterations.py similarity index 100% rename from compare_benchmark_iterations.py rename to jvector-examples/src/main/resources/compare_benchmark_iterations.py diff --git a/visualize_benchmarks.py b/jvector-examples/src/main/resources/visualize_benchmarks.py similarity index 100% rename from visualize_benchmarks.py rename to jvector-examples/src/main/resources/visualize_benchmarks.py diff --git a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java index 668f573ac..c66fa6833 100644 --- a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java +++ b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java @@ -115,8 +115,7 @@ public void testSummarizeWithNullList() { @Test public void testSummaryStatsToString() { // Create a SummaryStats instance - SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100) -; + SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100, 0, 0, 0, 0); // Verify toString output String expected = String.format( "Benchmark Summary (across %d configurations):%n" + @@ -124,9 +123,11 @@ public void testSummaryStatsToString() { " Average QPS: %.2f (± %.2f)%n" + " Average Latency: %.2f ms%n" + " Index Construction Time: %.2f%n" + - " Average Nodes Visited: %.2f", - 4, 0.85, 1200.0, 0.2, 5.2, 1000000.00, 100.00); - + " Average Nodes Visited: %.2f%n" + + " Build Heap Used: %.1f MB Build Off-Heap: %.1f MB%n" + + " Search Heap Used: %.1f MB Search Off-Heap: %.1f MB", + 4, 0.85, 1200.0, 0.2, 5.2, 1000000.00, 100.00, 0.0, 0.0, 0.0, 0.0); + assertEquals(expected, stats.toString()); } diff --git a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java index c3d698bab..66e5547ab 100644 --- a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java +++ b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java @@ -121,16 +121,20 @@ private static void testSummaryStatsToString() { System.out.println("\nTest: SummaryStats toString method"); // Create a SummaryStats instance - SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100); - + SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100, 0, 0, 0, 0); + // Verify toString output String expected = String.format( "Benchmark Summary (across %d configurations):%n" + " Average Recall@k: %.4f%n" + " Average QPS: %.2f (± %.2f)%n" + - " Average Latency: %.2f ms", - 4, 0.85, 1200.0, 0.0, 5.2); - + " Average Latency: %.2f ms%n" + + " Index Construction Time: %.2f%n" + + " Average Nodes Visited: %.2f%n" + + " Build Heap Used: %.1f MB Build Off-Heap: %.1f MB%n" + + " Search Heap Used: %.1f MB Search Off-Heap: %.1f MB", + 4, 0.85, 1200.0, 0.2, 5.2, 1000000.00, 100.00, 0.0, 0.0, 0.0, 0.0); + assertEquals("toString output", expected, stats.toString()); } From f49e6572311cd03f7622ebecbb156804b9942b9f Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 29 May 2026 10:39:42 -0400 Subject: [PATCH 3/4] fixed script file path --- .github/workflows/run-bench.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml index 4026e3707..fcdff2e06 100644 --- a/.github/workflows/run-bench.yml +++ b/.github/workflows/run-bench.yml @@ -289,7 +289,7 @@ jobs: OUTPUT_DIR="benchmark_reports" # Run the visualization script with all files, default threshold (5.0) - python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}" + python jvector-examples/src/resources/visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}" - name: Upload visualization artifacts uses: actions/upload-artifact@v4 From 4ac092e59b66041727e8cb6a4b16ca7f281d3c3a Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Fri, 29 May 2026 10:50:17 -0400 Subject: [PATCH 4/4] fixed script file path --- .github/workflows/run-bench.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml index fcdff2e06..1b9533cad 100644 --- a/.github/workflows/run-bench.yml +++ b/.github/workflows/run-bench.yml @@ -289,7 +289,7 @@ jobs: OUTPUT_DIR="benchmark_reports" # Run the visualization script with all files, default threshold (5.0) - python jvector-examples/src/resources/visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}" + python jvector-examples/src/main/resources/visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}" - name: Upload visualization artifacts uses: actions/upload-artifact@v4