From a65bcf8462116b4570e593c5b466db16fa3bb8b8 Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Fri, 10 Apr 2026 03:18:04 +0530 Subject: [PATCH 1/9] Auto-tune Embedding Model Parameters & Add Benchmarking Tool --- src/typeagent/aitools/vectorbase.py | 30 +++++- tools/benchmark_embeddings.py | 157 ++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 tools/benchmark_embeddings.py diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py index 63e2e77a..d4f84c21 100644 --- a/src/typeagent/aitools/vectorbase.py +++ b/src/typeagent/aitools/vectorbase.py @@ -34,11 +34,35 @@ def __init__( max_matches: int | None = None, batch_size: int | None = None, ): - self.min_score = min_score if min_score is not None else 0.85 - self.max_matches = max_matches if max_matches and max_matches >= 1 else None - self.batch_size = batch_size if batch_size and batch_size >= 1 else 8 self.embedding_model = embedding_model or create_embedding_model() + # Default fallback values + default_min_score = 0.85 + default_max_matches = None + + # Determine optimal parameters automatically for well-known models. + # Format: (min_score, max_matches) + # Note: text-embedding-3 models produce structurally lower cosine scores than older models + # and typically perform best in the 0.3 - 0.5 range for relevance filtering. + MODEL_DEFAULTS = { + "text-embedding-3-large": (0.30, 20), + "text-embedding-3-small": (0.35, 20), + "text-embedding-ada-002": (0.75, 20), + } + + # Check if the model_name matches any known ones + model_name = getattr(self.embedding_model, 'model_name', "") + + if model_name: + for known_model, defaults in MODEL_DEFAULTS.items(): + if known_model in model_name: + default_min_score, default_max_matches = defaults + break + + self.min_score = min_score if min_score is not None else default_min_score + self.max_matches = max_matches if max_matches is not None else default_max_matches + self.batch_size = batch_size if batch_size and batch_size >= 1 else 8 + class VectorBase: settings: TextEmbeddingIndexSettings diff --git a/tools/benchmark_embeddings.py b/tools/benchmark_embeddings.py new file mode 100644 index 00000000..2d6fc3a7 --- /dev/null +++ b/tools/benchmark_embeddings.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Utility script to benchmark different TextEmbeddingIndexSettings parameters. + +Usage: + uv run python tools/benchmark_embeddings.py [--model provider:model] +""" + +import argparse +import asyncio +import json +import logging +from pathlib import Path +from statistics import mean +import sys +from typing import Any + +from typeagent.aitools.model_adapters import create_embedding_model +from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase + + +async def run_benchmark(model_spec: str | None) -> None: + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + # Paths + script_dir = Path(__file__).resolve().parent + repo_root = script_dir.parent + index_data_path = repo_root / "tests" / "testdata" / "Episode_53_AdrianTchaikovsky_index_data.json" + search_data_path = repo_root / "tests" / "testdata" / "Episode_53_Search_results.json" + + logger.info(f"Loading index data from {index_data_path}") + try: + with open(index_data_path, "r", encoding="utf-8") as f: + index_json = json.load(f) + except Exception as e: + logger.error(f"Failed to load index data: {e}") + return + + messages = index_json.get("messages", []) + message_texts = [" ".join(m.get("textChunks", [])) for m in messages] + + logger.info(f"Loading search queries from {search_data_path}") + try: + with open(search_data_path, "r", encoding="utf-8") as f: + search_json = json.load(f) + except Exception as e: + logger.error(f"Failed to load search queries: {e}") + return + + # Filter out ones without results or expected matches + queries = [] + for item in search_json: + search_text = item.get("searchText") + results = item.get("results", []) + if not results: + continue + expected = results[0].get("messageMatches", []) + if not expected: + continue + queries.append((search_text, expected)) + + logger.info(f"Found {len(message_texts)} messages to embed.") + logger.info(f"Found {len(queries)} queries with expected matches to test.") + + try: + if model_spec == "test:fake": + from typeagent.aitools.model_adapters import create_test_embedding_model + model = create_test_embedding_model(embedding_size=384) + else: + model = create_embedding_model(model_spec) + except Exception as e: + logger.error(f"Failed to create embedding model: {e}") + logger.info("Are your environment variables (e.g. OPENAI_API_KEY) set?") + return + settings = TextEmbeddingIndexSettings(model) + vbase = VectorBase(settings) + + logger.info("Computing embeddings for messages (this may take some time...)") + # Batch the embeddings + batch_size = 50 + for i in range(0, len(message_texts), batch_size): + batch = message_texts[i : i + batch_size] + await vbase.add_keys(batch) + print(f" ... embedded {min(i + batch_size, len(message_texts))}/{len(message_texts)}") + + logger.info("Computing embeddings for queries...") + query_texts = [q[0] for q in queries] + query_embeddings = await model.get_embeddings(query_texts) + + # Grid search config + min_scores_to_test = [0.70, 0.75, 0.80, 0.85, 0.90, 0.95] + max_hits_to_test = [5, 10, 15, 20] + + logger.info(f"Starting grid search over model: {model.model_name}") + print("-" * 65) + print(f"{'Min Score':<12} | {'Max Hits':<10} | {'Hit Rate (%)':<15} | {'MRR':<10}") + print("-" * 65) + + best_mrr = -1.0 + best_config = None + + for ms in min_scores_to_test: + for mh in max_hits_to_test: + hits = 0 + reciprocal_ranks = [] + + for (query_text, expected_indices), q_emb in zip(queries, query_embeddings): + scored_results = vbase.fuzzy_lookup_embedding(q_emb, max_hits=mh, min_score=ms) + retrieved_indices = [sr.item for sr in scored_results] + + # Check if any of the expected items are in the retrieved answers + rank = -1 + for r_idx, retrieved in enumerate(retrieved_indices): + if retrieved in expected_indices: + rank = r_idx + 1 + break + + if rank > 0: + hits += 1 + reciprocal_ranks.append(1.0 / rank) + else: + reciprocal_ranks.append(0.0) + + hit_rate = (hits / len(queries)) * 100 + mrr = mean(reciprocal_ranks) + + print(f"{ms:<12.2f} | {mh:<10d} | {hit_rate:<15.2f} | {mrr:<10.4f}") + + if mrr > best_mrr: + best_mrr = mrr + best_config = (ms, mh) + + print("-" * 65) + if best_config: + logger.info(f"Optimal parameters found: min_score={best_config[0]}, max_hits={best_config[1]} (MRR={best_mrr:.4f})") + else: + logger.info("Could not determine optimal parameters (no hits).") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Benchmark embedding model parameters.") + parser.add_argument( + "--model", + type=str, + default=None, + help="Provider and model name, e.g. 'openai:text-embedding-3-small'", + ) + args = parser.parse_args() + asyncio.run(run_benchmark(args.model)) + + +if __name__ == "__main__": + main() From b95d94f853991c3b47dd3636d232da253be87f85 Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Fri, 10 Apr 2026 04:18:36 +0530 Subject: [PATCH 2/9] update --- tools/benchmark_embeddings.py | 223 ++++++++++++++++++++++++++++++++-- 1 file changed, 213 insertions(+), 10 deletions(-) diff --git a/tools/benchmark_embeddings.py b/tools/benchmark_embeddings.py index 2d6fc3a7..ee77c215 100644 --- a/tools/benchmark_embeddings.py +++ b/tools/benchmark_embeddings.py @@ -5,6 +5,20 @@ """ Utility script to benchmark different TextEmbeddingIndexSettings parameters. +Uses the Adrian Tchaikovsky podcast dataset (Episode 53) which contains: +- Index data: ~96 messages from the podcast conversation +- Search results: Queries with expected messageMatches (ground truth for retrieval) +- Answer results: Curated Q&A pairs with expected answers (ground truth for Q&A quality) + +The benchmark evaluates embedding model retrieval quality using: +1. Search-based evaluation: Compares fuzzy_lookup results against expected messageMatches +2. Answer-based evaluation: Tests if queries from the Answer dataset retrieve messages + that contain the expected answer content (substring matching) + +Metrics: +- Hit Rate: Percentage of queries where at least one expected result was retrieved +- MRR (Mean Reciprocal Rank): Average of 1/rank of the first relevant result + Usage: uv run python tools/benchmark_embeddings.py [--model provider:model] """ @@ -31,7 +45,9 @@ async def run_benchmark(model_spec: str | None) -> None: repo_root = script_dir.parent index_data_path = repo_root / "tests" / "testdata" / "Episode_53_AdrianTchaikovsky_index_data.json" search_data_path = repo_root / "tests" / "testdata" / "Episode_53_Search_results.json" + answer_data_path = repo_root / "tests" / "testdata" / "Episode_53_Answer_results.json" + # ── Load index data (messages to embed) ── logger.info(f"Loading index data from {index_data_path}") try: with open(index_data_path, "r", encoding="utf-8") as f: @@ -43,6 +59,7 @@ async def run_benchmark(model_spec: str | None) -> None: messages = index_json.get("messages", []) message_texts = [" ".join(m.get("textChunks", [])) for m in messages] + # ── Load search queries (ground truth: messageMatches) ── logger.info(f"Loading search queries from {search_data_path}") try: with open(search_data_path, "r", encoding="utf-8") as f: @@ -52,7 +69,7 @@ async def run_benchmark(model_spec: str | None) -> None: return # Filter out ones without results or expected matches - queries = [] + search_queries: list[tuple[str, list[int]]] = [] for item in search_json: search_text = item.get("searchText") results = item.get("results", []) @@ -61,11 +78,30 @@ async def run_benchmark(model_spec: str | None) -> None: expected = results[0].get("messageMatches", []) if not expected: continue - queries.append((search_text, expected)) + search_queries.append((search_text, expected)) + + # ── Load answer results (Q&A ground truth from Adrian Tchaikovsky dataset) ── + answer_queries: list[tuple[str, str, bool]] = [] # (question, answer, hasNoAnswer) + logger.info(f"Loading answer results from {answer_data_path}") + try: + with open(answer_data_path, "r", encoding="utf-8") as f: + answer_json = json.load(f) + for item in answer_json: + question = item.get("question", "") + answer = item.get("answer", "") + has_no_answer = item.get("hasNoAnswer", False) + if question and answer: + answer_queries.append((question, answer, has_no_answer)) + logger.info(f"Found {len(answer_queries)} answer Q&A pairs " + f"({sum(1 for _, _, h in answer_queries if not h)} with answers, " + f"{sum(1 for _, _, h in answer_queries if h)} with no-answer).") + except Exception as e: + logger.warning(f"Failed to load answer results (continuing without): {e}") logger.info(f"Found {len(message_texts)} messages to embed.") - logger.info(f"Found {len(queries)} queries with expected matches to test.") + logger.info(f"Found {len(search_queries)} search queries with expected matches.") + # ── Create embedding model and index ── try: if model_spec == "test:fake": from typeagent.aitools.model_adapters import create_test_embedding_model @@ -87,16 +123,30 @@ async def run_benchmark(model_spec: str | None) -> None: await vbase.add_keys(batch) print(f" ... embedded {min(i + batch_size, len(message_texts))}/{len(message_texts)}") - logger.info("Computing embeddings for queries...") - query_texts = [q[0] for q in queries] - query_embeddings = await model.get_embeddings(query_texts) + # ── Compute query embeddings ── + logger.info("Computing embeddings for search queries...") + search_query_texts = [q[0] for q in search_queries] + search_query_embeddings = await model.get_embeddings(search_query_texts) + + answer_query_embeddings = None + if answer_queries: + logger.info("Computing embeddings for answer queries...") + answer_query_texts = [q[0] for q in answer_queries] + answer_query_embeddings = await model.get_embeddings(answer_query_texts) + + # ────────────────────────────────────────────────────────────────────── + # Section 1: Grid Search using Search Results (messageMatches) + # ────────────────────────────────────────────────────────────────────── # Grid search config min_scores_to_test = [0.70, 0.75, 0.80, 0.85, 0.90, 0.95] max_hits_to_test = [5, 10, 15, 20] logger.info(f"Starting grid search over model: {model.model_name}") - print("-" * 65) + print() + print("=" * 72) + print(" SEARCH RESULTS BENCHMARK (messageMatches ground truth)") + print("=" * 72) print(f"{'Min Score':<12} | {'Max Hits':<10} | {'Hit Rate (%)':<15} | {'MRR':<10}") print("-" * 65) @@ -108,7 +158,7 @@ async def run_benchmark(model_spec: str | None) -> None: hits = 0 reciprocal_ranks = [] - for (query_text, expected_indices), q_emb in zip(queries, query_embeddings): + for (query_text, expected_indices), q_emb in zip(search_queries, search_query_embeddings): scored_results = vbase.fuzzy_lookup_embedding(q_emb, max_hits=mh, min_score=ms) retrieved_indices = [sr.item for sr in scored_results] @@ -125,7 +175,7 @@ async def run_benchmark(model_spec: str | None) -> None: else: reciprocal_ranks.append(0.0) - hit_rate = (hits / len(queries)) * 100 + hit_rate = (hits / len(search_queries)) * 100 mrr = mean(reciprocal_ranks) print(f"{ms:<12.2f} | {mh:<10d} | {hit_rate:<15.2f} | {mrr:<10.4f}") @@ -136,10 +186,163 @@ async def run_benchmark(model_spec: str | None) -> None: print("-" * 65) if best_config: - logger.info(f"Optimal parameters found: min_score={best_config[0]}, max_hits={best_config[1]} (MRR={best_mrr:.4f})") + logger.info(f"Search benchmark optimal: min_score={best_config[0]}, " + f"max_hits={best_config[1]} (MRR={best_mrr:.4f})") else: logger.info("Could not determine optimal parameters (no hits).") + # ────────────────────────────────────────────────────────────────────── + # Section 2: Answer Results Benchmark (Adrian Tchaikovsky Q&A pairs) + # ────────────────────────────────────────────────────────────────────── + + if answer_queries and answer_query_embeddings is not None: + print() + print("=" * 72) + print(" ANSWER RESULTS BENCHMARK (Adrian Tchaikovsky Q&A ground truth)") + print("=" * 72) + print() + + # For each answer query, check if retrieved messages contain key terms + # from the expected answer. This is a content-based relevance check. + # + # We split answers with hasNoAnswer=True vs False to evaluate separately. + + answerable = [(q, a, emb) for (q, a, h), emb + in zip(answer_queries, answer_query_embeddings) if not h] + unanswerable = [(q, a, emb) for (q, a, h), emb + in zip(answer_queries, answer_query_embeddings) if h] + + print(f"Answerable queries: {len(answerable)}") + print(f"Unanswerable queries (hasNoAnswer=True): {len(unanswerable)}") + print() + + # Extract key terms from expected answers for content matching + def extract_answer_keywords(answer_text: str) -> list[str]: + """Extract distinctive keywords/phrases from an answer for matching.""" + # Look for quoted items, proper nouns, and distinctive phrases + keywords = [] + # Extract quoted phrases + import re + quoted = re.findall(r"'([^']+)'", answer_text) + keywords.extend(quoted) + quoted2 = re.findall(r'"([^"]+)"', answer_text) + keywords.extend(quoted2) + + # Extract proper-noun-like terms (capitalized words that aren't sentence starters) + # and key named entities from the Adrian Tchaikovsky dataset + known_entities = [ + "Adrian Tchaikovsky", "Tchaikovsky", "Kevin Scott", "Christina Warren", + "Children of Time", "Children of Ruin", "Children of Memory", + "Shadows of the Apt", "Empire in Black and Gold", + "Final Architecture", "Lords of Uncreation", + "Dragonlance Chronicles", "Skynet", "Portids", "Corvids", + "University of Reading", "Magnus Carlsen", "Warhammer", + "Asimov", "Peter Watts", "William Gibson", "Iain Banks", + "Peter Hamilton", "Arthur C. Clarke", "Profiles of the Future", + "Dune", "Brave New World", "Iron Sunrise", "Wall-E", + "George RR Martin", "Alastair Reynolds", "Ovid", + "zoology", "psychology", "spiders", "arachnids", "insects", + ] + for entity in known_entities: + if entity.lower() in answer_text.lower(): + keywords.append(entity) + + return keywords + + # Run answer benchmark with the best config from search benchmark + if best_config: + eval_min_score, eval_max_hits = best_config + else: + eval_min_score, eval_max_hits = 0.80, 10 + + print(f"Using parameters: min_score={eval_min_score}, max_hits={eval_max_hits}") + print("-" * 72) + print(f"{'#':<4} | {'Question':<45} | {'Keywords Found':<14} | {'Msgs':<5}") + print("-" * 72) + + answer_hits = 0 + answer_keyword_scores: list[float] = [] + + for idx, (question, answer, q_emb) in enumerate(answerable, 1): + scored_results = vbase.fuzzy_lookup_embedding( + q_emb, max_hits=eval_max_hits, min_score=eval_min_score + ) + retrieved_indices = [sr.item for sr in scored_results] + + # Concatenate the text of all retrieved messages + retrieved_text = " ".join( + message_texts[i] for i in retrieved_indices if i < len(message_texts) + ) + + # Check how many answer keywords appear in retrieved text + keywords = extract_answer_keywords(answer) + if keywords: + found = sum( + 1 for kw in keywords + if kw.lower() in retrieved_text.lower() + ) + keyword_score = found / len(keywords) + else: + # No keywords extracted — just check if we retrieved anything + keyword_score = 1.0 if retrieved_indices else 0.0 + + if keyword_score > 0: + answer_hits += 1 + answer_keyword_scores.append(keyword_score) + + q_display = question[:42] + "..." if len(question) > 45 else question + kw_display = f"{int(keyword_score * 100):>3}%" + if keywords: + kw_display += f" ({sum(1 for kw in keywords if kw.lower() in retrieved_text.lower())}/{len(keywords)})" + print(f"{idx:<4} | {q_display:<45} | {kw_display:<14} | {len(retrieved_indices):<5}") + + print("-" * 72) + + if answerable: + answer_hit_rate = (answer_hits / len(answerable)) * 100 + avg_keyword_score = mean(answer_keyword_scores) * 100 + print(f"Answer Hit Rate: {answer_hit_rate:.1f}% " + f"({answer_hits}/{len(answerable)} queries found relevant content)") + print(f"Avg Keyword Coverage: {avg_keyword_score:.1f}%") + + # Evaluate unanswerable queries — ideally these should retrieve fewer/no results + if unanswerable: + print() + print("-" * 72) + print("Unanswerable queries (should ideally retrieve less relevant content):") + print("-" * 72) + false_positive_count = 0 + for question, answer, q_emb in unanswerable: + scored_results = vbase.fuzzy_lookup_embedding( + q_emb, max_hits=eval_max_hits, min_score=eval_min_score + ) + n_results = len(scored_results) + avg_score = mean(sr.score for sr in scored_results) if scored_results else 0.0 + q_display = question[:55] + "..." if len(question) > 58 else question + flag = "[!]" if n_results > 3 else "[ok]" + if n_results > 3: + false_positive_count += 1 + print(f" {flag} {q_display:<58} | {n_results:>3} results (avg={avg_score:.3f})") + print(f"\nFalse positives (>3 results): {false_positive_count}/{len(unanswerable)}") + + # ── Summary ── + print() + print("=" * 72) + print(" SUMMARY") + print("=" * 72) + print(f"Model: {model.model_name}") + print(f"Messages indexed: {len(message_texts)}") + print(f"Search queries tested: {len(search_queries)}") + if best_config: + print(f"Best search params: min_score={best_config[0]}, max_hits={best_config[1]}") + print(f"Best search MRR: {best_mrr:.4f}") + if answer_queries: + print(f"Answer queries tested: {len(answerable)} answerable, {len(unanswerable)} unanswerable") + if answerable: + print(f"Answer hit rate: {answer_hit_rate:.1f}%") + print(f"Keyword coverage: {avg_keyword_score:.1f}%") + print("=" * 72) + def main() -> None: parser = argparse.ArgumentParser(description="Benchmark embedding model parameters.") From 9a3faebea281be1cfac25dd2a4f1b21a16296015 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 9 Apr 2026 22:45:02 -0500 Subject: [PATCH 3/9] Bump uv_build upper bound to <0.11.0 uv 0.10.x is current; the <0.10.0 constraint caused build warnings. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 01470ecf..1339e34e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["uv_build>=0.9.10,<0.10.0"] +requires = ["uv_build>=0.9.10,<0.11.0"] build-backend = "uv_build" [project] From bc5b3197d48c8452f782b795ab61b05f06a35507 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 02:24:57 -0500 Subject: [PATCH 4/9] Optimize fuzzy_lookup_embedding with numpy vectorized ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace Python-level list comprehension + sort with numpy operations: - No-predicate path: np.flatnonzero for score filtering, np.argpartition for O(n) top-k selection — avoids building ScoredInt for every vector - Predicate path: numpy pre-filters by score, applies predicate only to candidates above threshold - Subset lookup: numpy fancy indexing computes dot products only for subset indices instead of delegating to full-vector scan with predicate --- src/typeagent/aitools/vectorbase.py | 65 +++++++++++---- tests/benchmarks/test_benchmark_vectorbase.py | 83 +++++++++++++++++++ 2 files changed, 133 insertions(+), 15 deletions(-) create mode 100644 tests/benchmarks/test_benchmark_vectorbase.py diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py index 63e2e77a..4ca8be06 100644 --- a/src/typeagent/aitools/vectorbase.py +++ b/src/typeagent/aitools/vectorbase.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import Callable, Iterable +from collections.abc import Callable from dataclasses import dataclass import numpy as np @@ -132,17 +132,35 @@ def fuzzy_lookup_embedding( min_score = 0.0 if len(self._vectors) == 0: return [] - # This line does most of the work: - scores: Iterable[float] = np.dot(self._vectors, embedding) - scored_ordinals = [ - ScoredInt(i, score) - for i, score in enumerate(scores) - if score >= min_score and (predicate is None or predicate(i)) - ] - scored_ordinals.sort(key=lambda x: x.score, reverse=True) - return scored_ordinals[:max_hits] + scores = np.dot(self._vectors, embedding) + + if predicate is None: + # Fast numpy path: filter and top-k without Python-level iteration. + indices = np.flatnonzero(scores >= min_score) + if len(indices) == 0: + return [] + filtered_scores = scores[indices] + if len(indices) <= max_hits: + order = np.argsort(filtered_scores)[::-1] + else: + # argpartition is O(n) vs O(n log n) for full sort. + top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:] + order = top_k[np.argsort(filtered_scores[top_k])[::-1]] + return [ + ScoredInt(int(indices[i]), float(filtered_scores[i])) for i in order + ] + else: + # Predicate path: pre-filter by score in numpy, then apply predicate + # only to candidates that pass the score threshold. + candidates = np.flatnonzero(scores >= min_score) + scored_ordinals = [ + ScoredInt(int(i), float(scores[i])) + for i in candidates + if predicate(int(i)) + ] + scored_ordinals.sort(key=lambda x: x.score, reverse=True) + return scored_ordinals[:max_hits] - # TODO: Make this and fuzzy_lookup_embedding() more similar. def fuzzy_lookup_embedding_in_subset( self, embedding: NormalizedEmbedding, @@ -150,10 +168,27 @@ def fuzzy_lookup_embedding_in_subset( max_hits: int | None = None, min_score: float | None = None, ) -> list[ScoredInt]: - ordinals_set = set(ordinals_of_subset) - return self.fuzzy_lookup_embedding( - embedding, max_hits, min_score, lambda i: i in ordinals_set - ) + if max_hits is None: + max_hits = 10 + if min_score is None: + min_score = 0.0 + if not ordinals_of_subset or len(self._vectors) == 0: + return [] + # Compute dot products only for the subset instead of all vectors. + subset = np.asarray(ordinals_of_subset) + scores = np.dot(self._vectors[subset], embedding) + indices = np.flatnonzero(scores >= min_score) + if len(indices) == 0: + return [] + filtered_scores = scores[indices] + if len(indices) <= max_hits: + order = np.argsort(filtered_scores)[::-1] + else: + top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:] + order = top_k[np.argsort(filtered_scores[top_k])[::-1]] + return [ + ScoredInt(int(subset[indices[i]]), float(filtered_scores[i])) for i in order + ] async def fuzzy_lookup( self, diff --git a/tests/benchmarks/test_benchmark_vectorbase.py b/tests/benchmarks/test_benchmark_vectorbase.py new file mode 100644 index 00000000..b61859a8 --- /dev/null +++ b/tests/benchmarks/test_benchmark_vectorbase.py @@ -0,0 +1,83 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Benchmarks for VectorBase fuzzy lookup methods. + +Measures fuzzy_lookup_embedding and fuzzy_lookup_embedding_in_subset +with varying vector counts and result sizes. +""" + +import numpy as np +import pytest + +from typeagent.aitools.model_adapters import create_test_embedding_model +from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase + +EMBEDDING_DIM = 384 # Typical small embedding model dimension + + +def make_populated_vector_base(n_vectors: int) -> tuple[VectorBase, np.ndarray]: + """Create a VectorBase with n_vectors random normalized embeddings.""" + settings = TextEmbeddingIndexSettings(create_test_embedding_model()) + vb = VectorBase(settings) + rng = np.random.default_rng(42) + embeddings = rng.standard_normal((n_vectors, EMBEDDING_DIM)).astype(np.float32) + # Normalize to unit vectors (as the real pipeline does). + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + embeddings = embeddings / norms + vb.add_embeddings(None, embeddings) + # Query vector: also normalized. + query = rng.standard_normal(EMBEDDING_DIM).astype(np.float32) + query = query / np.linalg.norm(query) + return vb, query + + +# --- fuzzy_lookup_embedding --- + + +@pytest.mark.asyncio +async def test_benchmark_fuzzy_lookup_1k(async_benchmark): + vb, query = make_populated_vector_base(1_000) + + async def target(): + vb.fuzzy_lookup_embedding(query, max_hits=10, min_score=0.0) + + await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) + + +@pytest.mark.asyncio +async def test_benchmark_fuzzy_lookup_10k(async_benchmark): + vb, query = make_populated_vector_base(10_000) + + async def target(): + vb.fuzzy_lookup_embedding(query, max_hits=10, min_score=0.0) + + await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) + + +@pytest.mark.asyncio +async def test_benchmark_fuzzy_lookup_10k_with_predicate(async_benchmark): + vb, query = make_populated_vector_base(10_000) + # Predicate that accepts ~50% of indices. + even_only = lambda i: i % 2 == 0 + + async def target(): + vb.fuzzy_lookup_embedding( + query, max_hits=10, min_score=0.0, predicate=even_only + ) + + await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) + + +# --- fuzzy_lookup_embedding_in_subset --- + + +@pytest.mark.asyncio +async def test_benchmark_fuzzy_lookup_subset_1k_of_10k(async_benchmark): + vb, query = make_populated_vector_base(10_000) + subset = list(range(0, 10_000, 10)) # 1000 indices + + async def target(): + vb.fuzzy_lookup_embedding_in_subset(query, subset, max_hits=10, min_score=0.0) + + await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) From 921ed9bc8e13740c6a118a4fccc84b1c08583b32 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 12:57:47 -0500 Subject: [PATCH 5/9] Revert "Optimize fuzzy_lookup_embedding with numpy vectorized ops" This reverts commit bc5b3197d48c8452f782b795ab61b05f06a35507. --- src/typeagent/aitools/vectorbase.py | 65 ++++----------- tests/benchmarks/test_benchmark_vectorbase.py | 83 ------------------- 2 files changed, 15 insertions(+), 133 deletions(-) delete mode 100644 tests/benchmarks/test_benchmark_vectorbase.py diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py index 267b576a..d4f84c21 100644 --- a/src/typeagent/aitools/vectorbase.py +++ b/src/typeagent/aitools/vectorbase.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import Callable +from collections.abc import Callable, Iterable from dataclasses import dataclass import numpy as np @@ -156,35 +156,17 @@ def fuzzy_lookup_embedding( min_score = 0.0 if len(self._vectors) == 0: return [] - scores = np.dot(self._vectors, embedding) - - if predicate is None: - # Fast numpy path: filter and top-k without Python-level iteration. - indices = np.flatnonzero(scores >= min_score) - if len(indices) == 0: - return [] - filtered_scores = scores[indices] - if len(indices) <= max_hits: - order = np.argsort(filtered_scores)[::-1] - else: - # argpartition is O(n) vs O(n log n) for full sort. - top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:] - order = top_k[np.argsort(filtered_scores[top_k])[::-1]] - return [ - ScoredInt(int(indices[i]), float(filtered_scores[i])) for i in order - ] - else: - # Predicate path: pre-filter by score in numpy, then apply predicate - # only to candidates that pass the score threshold. - candidates = np.flatnonzero(scores >= min_score) - scored_ordinals = [ - ScoredInt(int(i), float(scores[i])) - for i in candidates - if predicate(int(i)) - ] - scored_ordinals.sort(key=lambda x: x.score, reverse=True) - return scored_ordinals[:max_hits] + # This line does most of the work: + scores: Iterable[float] = np.dot(self._vectors, embedding) + scored_ordinals = [ + ScoredInt(i, score) + for i, score in enumerate(scores) + if score >= min_score and (predicate is None or predicate(i)) + ] + scored_ordinals.sort(key=lambda x: x.score, reverse=True) + return scored_ordinals[:max_hits] + # TODO: Make this and fuzzy_lookup_embedding() more similar. def fuzzy_lookup_embedding_in_subset( self, embedding: NormalizedEmbedding, @@ -192,27 +174,10 @@ def fuzzy_lookup_embedding_in_subset( max_hits: int | None = None, min_score: float | None = None, ) -> list[ScoredInt]: - if max_hits is None: - max_hits = 10 - if min_score is None: - min_score = 0.0 - if not ordinals_of_subset or len(self._vectors) == 0: - return [] - # Compute dot products only for the subset instead of all vectors. - subset = np.asarray(ordinals_of_subset) - scores = np.dot(self._vectors[subset], embedding) - indices = np.flatnonzero(scores >= min_score) - if len(indices) == 0: - return [] - filtered_scores = scores[indices] - if len(indices) <= max_hits: - order = np.argsort(filtered_scores)[::-1] - else: - top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:] - order = top_k[np.argsort(filtered_scores[top_k])[::-1]] - return [ - ScoredInt(int(subset[indices[i]]), float(filtered_scores[i])) for i in order - ] + ordinals_set = set(ordinals_of_subset) + return self.fuzzy_lookup_embedding( + embedding, max_hits, min_score, lambda i: i in ordinals_set + ) async def fuzzy_lookup( self, diff --git a/tests/benchmarks/test_benchmark_vectorbase.py b/tests/benchmarks/test_benchmark_vectorbase.py deleted file mode 100644 index b61859a8..00000000 --- a/tests/benchmarks/test_benchmark_vectorbase.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Benchmarks for VectorBase fuzzy lookup methods. - -Measures fuzzy_lookup_embedding and fuzzy_lookup_embedding_in_subset -with varying vector counts and result sizes. -""" - -import numpy as np -import pytest - -from typeagent.aitools.model_adapters import create_test_embedding_model -from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase - -EMBEDDING_DIM = 384 # Typical small embedding model dimension - - -def make_populated_vector_base(n_vectors: int) -> tuple[VectorBase, np.ndarray]: - """Create a VectorBase with n_vectors random normalized embeddings.""" - settings = TextEmbeddingIndexSettings(create_test_embedding_model()) - vb = VectorBase(settings) - rng = np.random.default_rng(42) - embeddings = rng.standard_normal((n_vectors, EMBEDDING_DIM)).astype(np.float32) - # Normalize to unit vectors (as the real pipeline does). - norms = np.linalg.norm(embeddings, axis=1, keepdims=True) - embeddings = embeddings / norms - vb.add_embeddings(None, embeddings) - # Query vector: also normalized. - query = rng.standard_normal(EMBEDDING_DIM).astype(np.float32) - query = query / np.linalg.norm(query) - return vb, query - - -# --- fuzzy_lookup_embedding --- - - -@pytest.mark.asyncio -async def test_benchmark_fuzzy_lookup_1k(async_benchmark): - vb, query = make_populated_vector_base(1_000) - - async def target(): - vb.fuzzy_lookup_embedding(query, max_hits=10, min_score=0.0) - - await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) - - -@pytest.mark.asyncio -async def test_benchmark_fuzzy_lookup_10k(async_benchmark): - vb, query = make_populated_vector_base(10_000) - - async def target(): - vb.fuzzy_lookup_embedding(query, max_hits=10, min_score=0.0) - - await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) - - -@pytest.mark.asyncio -async def test_benchmark_fuzzy_lookup_10k_with_predicate(async_benchmark): - vb, query = make_populated_vector_base(10_000) - # Predicate that accepts ~50% of indices. - even_only = lambda i: i % 2 == 0 - - async def target(): - vb.fuzzy_lookup_embedding( - query, max_hits=10, min_score=0.0, predicate=even_only - ) - - await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) - - -# --- fuzzy_lookup_embedding_in_subset --- - - -@pytest.mark.asyncio -async def test_benchmark_fuzzy_lookup_subset_1k_of_10k(async_benchmark): - vb, query = make_populated_vector_base(10_000) - subset = list(range(0, 10_000, 10)) # 1000 indices - - async def target(): - vb.fuzzy_lookup_embedding_in_subset(query, subset, max_hits=10, min_score=0.0) - - await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) From 72d4fcb153aca737e2025e1f24eeef90c0e4ce42 Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Sat, 11 Apr 2026 23:08:34 +0530 Subject: [PATCH 6/9] update --- .../20260411T163642Z/metadata.json | 26 + .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../20260411T163642Z/summary.json | 263 ++++++ benchmark_results/20260411T163642Z/summary.md | 5 + .../20260411T170901Z/metadata.json | 26 + .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../20260411T170901Z/summary.json | 263 ++++++ benchmark_results/20260411T170901Z/summary.md | 5 + .../20260411T171331Z/metadata.json | 26 + .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../20260411T171331Z/summary.json | 263 ++++++ benchmark_results/20260411T171331Z/summary.md | 5 + .../20260411T172116Z/metadata.json | 28 + .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../run_01.json | 255 ++++++ .../run_02.json | 255 ++++++ .../run_03.json | 255 ++++++ .../run_04.json | 255 ++++++ .../run_05.json | 255 ++++++ .../run_06.json | 255 ++++++ .../run_07.json | 255 ++++++ .../run_08.json | 255 ++++++ .../run_09.json | 255 ++++++ .../run_10.json | 255 ++++++ .../run_11.json | 255 ++++++ .../run_12.json | 255 ++++++ .../run_13.json | 255 ++++++ .../run_14.json | 255 ++++++ .../run_15.json | 255 ++++++ .../run_16.json | 255 ++++++ .../run_17.json | 255 ++++++ .../run_18.json | 255 ++++++ .../run_19.json | 255 ++++++ .../run_20.json | 255 ++++++ .../run_21.json | 255 ++++++ .../run_22.json | 255 ++++++ .../run_23.json | 255 ++++++ .../run_24.json | 255 ++++++ .../run_25.json | 255 ++++++ .../run_26.json | 255 ++++++ .../run_27.json | 255 ++++++ .../run_28.json | 255 ++++++ .../run_29.json | 255 ++++++ .../run_30.json | 255 ++++++ .../summary.json | 261 ++++++ .../20260411T172116Z/summary.json | 785 ++++++++++++++++++ benchmark_results/20260411T172116Z/summary.md | 7 + src/typeagent/aitools/vectorbase.py | 73 +- tools/benchmark_embeddings.py | 530 +++++------- tools/repeat_embedding_benchmarks.py | 320 +++++++ 201 files changed, 49749 insertions(+), 342 deletions(-) create mode 100644 benchmark_results/20260411T163642Z/metadata.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json create mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json create mode 100644 benchmark_results/20260411T163642Z/summary.json create mode 100644 benchmark_results/20260411T163642Z/summary.md create mode 100644 benchmark_results/20260411T170901Z/metadata.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json create mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json create mode 100644 benchmark_results/20260411T170901Z/summary.json create mode 100644 benchmark_results/20260411T170901Z/summary.md create mode 100644 benchmark_results/20260411T171331Z/metadata.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json create mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json create mode 100644 benchmark_results/20260411T171331Z/summary.json create mode 100644 benchmark_results/20260411T171331Z/summary.md create mode 100644 benchmark_results/20260411T172116Z/metadata.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json create mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json create mode 100644 benchmark_results/20260411T172116Z/summary.json create mode 100644 benchmark_results/20260411T172116Z/summary.md create mode 100644 tools/repeat_embedding_benchmarks.py diff --git a/benchmark_results/20260411T163642Z/metadata.json b/benchmark_results/20260411T163642Z/metadata.json new file mode 100644 index 00000000..aeb8c76c --- /dev/null +++ b/benchmark_results/20260411T163642Z/metadata.json @@ -0,0 +1,26 @@ +{ + "created_at_utc": "20260411T163642Z", + "runs_per_model": 30, + "models": [ + "openai:text-embedding-ada-002" + ], + "min_scores": [ + 0.25, + 0.3, + 0.35, + 0.4, + 0.5, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85 + ], + "max_hits_values": [ + 5, + 10, + 15, + 20 + ], + "batch_size": 16 +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json new file mode 100644 index 00000000..f42ed99c --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json new file mode 100644 index 00000000..91d93052 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json new file mode 100644 index 00000000..c612d42d --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json new file mode 100644 index 00000000..9221c946 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json new file mode 100644 index 00000000..a91328fa --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json new file mode 100644 index 00000000..5a205dd3 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json new file mode 100644 index 00000000..45e7a7df --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json new file mode 100644 index 00000000..6db6beff --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json new file mode 100644 index 00000000..8c45ea9b --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json new file mode 100644 index 00000000..723a31ee --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json new file mode 100644 index 00000000..c42d84a5 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json new file mode 100644 index 00000000..6f2c60b5 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json new file mode 100644 index 00000000..29c6deb1 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json new file mode 100644 index 00000000..3d4286d3 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json new file mode 100644 index 00000000..dacac5e0 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json new file mode 100644 index 00000000..d22e1dc7 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json new file mode 100644 index 00000000..2c9c7a35 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json new file mode 100644 index 00000000..d248c4b3 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json new file mode 100644 index 00000000..60a555e6 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json new file mode 100644 index 00000000..f313eb1f --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json new file mode 100644 index 00000000..88b2fc2a --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json new file mode 100644 index 00000000..8919b4eb --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json new file mode 100644 index 00000000..a313722a --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json new file mode 100644 index 00000000..fb919a66 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json new file mode 100644 index 00000000..af8bb9bb --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json new file mode 100644 index 00000000..ec9fa263 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json new file mode 100644 index 00000000..ffa32277 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json new file mode 100644 index 00000000..a8548590 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json new file mode 100644 index 00000000..dffe68de --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6333747927031509 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json new file mode 100644 index 00000000..df9701d3 --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json new file mode 100644 index 00000000..948925bb --- /dev/null +++ b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 80.59701492537313, + "mean_mrr": 0.6336324330727315 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346274579483534 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346274579483534 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/summary.json b/benchmark_results/20260411T163642Z/summary.json new file mode 100644 index 00000000..6f12d3bc --- /dev/null +++ b/benchmark_results/20260411T163642Z/summary.json @@ -0,0 +1,263 @@ +[ + { + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7493040748637763 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 80.59701492537313, + "mean_mrr": 0.6336324330727315 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346274579483534 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346274579483534 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514472053651158 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } + } +] \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/summary.md b/benchmark_results/20260411T163642Z/summary.md new file mode 100644 index 00000000..fc0d054c --- /dev/null +++ b/benchmark_results/20260411T163642Z/summary.md @@ -0,0 +1,5 @@ +# Repeated Embedding Benchmark Summary + +| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | +| --- | ---: | ---: | ---: | ---: | ---: | +| text-embedding-ada-002 | 30 | 0.25 | 15 | 98.51 | 0.7514 | diff --git a/benchmark_results/20260411T170901Z/metadata.json b/benchmark_results/20260411T170901Z/metadata.json new file mode 100644 index 00000000..b90ccd56 --- /dev/null +++ b/benchmark_results/20260411T170901Z/metadata.json @@ -0,0 +1,26 @@ +{ + "created_at_utc": "20260411T170901Z", + "runs_per_model": 30, + "models": [ + "openai:text-embedding-3-small" + ], + "min_scores": [ + 0.25, + 0.3, + 0.35, + 0.4, + 0.5, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85 + ], + "max_hits_values": [ + 5, + 10, + 15, + 20 + ], + "batch_size": 16 +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json new file mode 100644 index 00000000..480efa93 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json new file mode 100644 index 00000000..9b716fa9 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json new file mode 100644 index 00000000..95ba901c --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json new file mode 100644 index 00000000..9918f19e --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json new file mode 100644 index 00000000..7e10c385 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json new file mode 100644 index 00000000..6b8a6ac9 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json new file mode 100644 index 00000000..80fde7a0 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json new file mode 100644 index 00000000..3fb706a0 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json new file mode 100644 index 00000000..8d96e4e1 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json new file mode 100644 index 00000000..02605a05 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json new file mode 100644 index 00000000..32fc0de5 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json new file mode 100644 index 00000000..7fe61cc8 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json new file mode 100644 index 00000000..24d9b86c --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json new file mode 100644 index 00000000..fc5f030a --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json new file mode 100644 index 00000000..bb87e5c8 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json new file mode 100644 index 00000000..af78a67d --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json new file mode 100644 index 00000000..91775a17 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json new file mode 100644 index 00000000..4a260259 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json new file mode 100644 index 00000000..be6cfab3 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json new file mode 100644 index 00000000..2d768925 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json new file mode 100644 index 00000000..bab7604d --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json new file mode 100644 index 00000000..9ad59995 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json new file mode 100644 index 00000000..3fb6461c --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json new file mode 100644 index 00000000..1e6ae3a4 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json new file mode 100644 index 00000000..3cb6dcbc --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json new file mode 100644 index 00000000..04c9d790 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json new file mode 100644 index 00000000..7c28c7ce --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json new file mode 100644 index 00000000..5080c761 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json new file mode 100644 index 00000000..17566b00 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json new file mode 100644 index 00000000..088ae095 --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json new file mode 100644 index 00000000..9935ddaa --- /dev/null +++ b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.67987818261633 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 73.13432835820896, + "mean_mrr": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6037587796312555 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 65.67164179104478, + "mean_mrr": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 68.65671641791045, + "mean_mrr": 0.5392065408252853 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.67987818261633 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "20": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/summary.json b/benchmark_results/20260411T170901Z/summary.json new file mode 100644 index 00000000..7492220f --- /dev/null +++ b/benchmark_results/20260411T170901Z/summary.json @@ -0,0 +1,263 @@ +[ + { + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.67987818261633 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 73.13432835820896, + "mean_mrr": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6037587796312555 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 65.67164179104478, + "mean_mrr": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 68.65671641791045, + "mean_mrr": 0.5392065408252853 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.67987818261633 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "20": 30 + } + } +] \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/summary.md b/benchmark_results/20260411T170901Z/summary.md new file mode 100644 index 00000000..e3fe3102 --- /dev/null +++ b/benchmark_results/20260411T170901Z/summary.md @@ -0,0 +1,5 @@ +# Repeated Embedding Benchmark Summary + +| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | +| --- | ---: | ---: | ---: | ---: | ---: | +| text-embedding-3-small | 30 | 0.25 | 20 | 88.06 | 0.6799 | diff --git a/benchmark_results/20260411T171331Z/metadata.json b/benchmark_results/20260411T171331Z/metadata.json new file mode 100644 index 00000000..c80d2a95 --- /dev/null +++ b/benchmark_results/20260411T171331Z/metadata.json @@ -0,0 +1,26 @@ +{ + "created_at_utc": "20260411T171331Z", + "runs_per_model": 30, + "models": [ + "openai:text-embedding-3-large" + ], + "min_scores": [ + 0.25, + 0.3, + 0.35, + 0.4, + 0.5, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85 + ], + "max_hits_values": [ + 5, + 10, + 15, + 20 + ], + "batch_size": 16 +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json new file mode 100644 index 00000000..3cb92bc5 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json new file mode 100644 index 00000000..dce2186d --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json new file mode 100644 index 00000000..659629c6 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json new file mode 100644 index 00000000..646c1b2e --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json new file mode 100644 index 00000000..70a071a5 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json new file mode 100644 index 00000000..56ef83c1 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json new file mode 100644 index 00000000..c195bb16 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json new file mode 100644 index 00000000..cd8598d2 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json new file mode 100644 index 00000000..251bf9f5 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json new file mode 100644 index 00000000..4526d50b --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6266169154228856 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6266169154228856 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6292175486205337 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6292175486205337 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.559452736318408 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.559452736318408 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5620533695160561 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5620533695160561 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5186567164179104 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5186567164179104 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5199004975124378 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5199004975124378 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4626865671641791 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4626865671641791 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4639303482587065 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4639303482587065 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19776119402985073 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19776119402985073 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19776119402985073 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19776119402985073 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6292175486205337 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json new file mode 100644 index 00000000..e6570cd6 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json new file mode 100644 index 00000000..57252912 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json new file mode 100644 index 00000000..efe0209f --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json new file mode 100644 index 00000000..7b4e9b26 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json new file mode 100644 index 00000000..e27a93b4 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json new file mode 100644 index 00000000..12ee964a --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json new file mode 100644 index 00000000..99038ee8 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json new file mode 100644 index 00000000..94f69a50 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json new file mode 100644 index 00000000..2881b67b --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json new file mode 100644 index 00000000..0d223549 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json new file mode 100644 index 00000000..5b3d8df7 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json new file mode 100644 index 00000000..f93cfa38 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json new file mode 100644 index 00000000..d901ca6c --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json new file mode 100644 index 00000000..7ac39e82 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json new file mode 100644 index 00000000..f7bdf62a --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json new file mode 100644 index 00000000..585ee336 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json new file mode 100644 index 00000000..88c17b30 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json new file mode 100644 index 00000000..98eb25c9 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json new file mode 100644 index 00000000..344d33d5 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json new file mode 100644 index 00000000..3fa07e44 --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json new file mode 100644 index 00000000..e98c3e2b --- /dev/null +++ b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6242122719734661 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6242122719734661 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5570480928689884 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5570480928689884 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5596487260666365 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5596487260666365 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5162520729684908 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5162520729684908 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174958540630182 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174958540630182 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.46028192371475957 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.46028192371475957 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4615257048092869 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4615257048092869 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/summary.json b/benchmark_results/20260411T171331Z/summary.json new file mode 100644 index 00000000..79ad0d22 --- /dev/null +++ b/benchmark_results/20260411T171331Z/summary.json @@ -0,0 +1,263 @@ +[ + { + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6242122719734661 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6242122719734661 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5570480928689884 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5570480928689884 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5596487260666365 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5596487260666365 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5162520729684908 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5162520729684908 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174958540630182 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174958540630182 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.46028192371475957 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.46028192371475957 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4615257048092869 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4615257048092869 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19535655058043117 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6268129051711141 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } + } +] \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/summary.md b/benchmark_results/20260411T171331Z/summary.md new file mode 100644 index 00000000..e4ceadb6 --- /dev/null +++ b/benchmark_results/20260411T171331Z/summary.md @@ -0,0 +1,5 @@ +# Repeated Embedding Benchmark Summary + +| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | +| --- | ---: | ---: | ---: | ---: | ---: | +| text-embedding-3-large | 30 | 0.25 | 15 | 77.61 | 0.6268 | diff --git a/benchmark_results/20260411T172116Z/metadata.json b/benchmark_results/20260411T172116Z/metadata.json new file mode 100644 index 00000000..1bd0005a --- /dev/null +++ b/benchmark_results/20260411T172116Z/metadata.json @@ -0,0 +1,28 @@ +{ + "created_at_utc": "20260411T172116Z", + "runs_per_model": 30, + "models": [ + "openai:text-embedding-3-small", + "openai:text-embedding-3-large", + "openai:text-embedding-ada-002" + ], + "min_scores": [ + 0.25, + 0.3, + 0.35, + 0.4, + 0.5, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85 + ], + "max_hits_values": [ + 5, + 10, + 15, + 20 + ], + "batch_size": 16 +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json new file mode 100644 index 00000000..3cb92bc5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json new file mode 100644 index 00000000..dce2186d --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json new file mode 100644 index 00000000..659629c6 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json new file mode 100644 index 00000000..646c1b2e --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json new file mode 100644 index 00000000..70a071a5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json new file mode 100644 index 00000000..56ef83c1 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json new file mode 100644 index 00000000..c195bb16 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json new file mode 100644 index 00000000..cd8598d2 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json new file mode 100644 index 00000000..251bf9f5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json new file mode 100644 index 00000000..152979ff --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json new file mode 100644 index 00000000..e6570cd6 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json new file mode 100644 index 00000000..57252912 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json new file mode 100644 index 00000000..efe0209f --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json new file mode 100644 index 00000000..7b4e9b26 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json new file mode 100644 index 00000000..e27a93b4 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json new file mode 100644 index 00000000..12ee964a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json new file mode 100644 index 00000000..99038ee8 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json new file mode 100644 index 00000000..94f69a50 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json new file mode 100644 index 00000000..2881b67b --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json new file mode 100644 index 00000000..0d223549 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json new file mode 100644 index 00000000..5b3d8df7 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json new file mode 100644 index 00000000..f93cfa38 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json new file mode 100644 index 00000000..d901ca6c --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json new file mode 100644 index 00000000..7ac39e82 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json new file mode 100644 index 00000000..f7bdf62a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json new file mode 100644 index 00000000..585ee336 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json new file mode 100644 index 00000000..88c17b30 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json new file mode 100644 index 00000000..98eb25c9 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json new file mode 100644 index 00000000..344d33d5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json new file mode 100644 index 00000000..3fa07e44 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 74.6268656716418, + "mean_reciprocal_rank": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 70.1492537313433, + "mean_reciprocal_rank": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 59.70149253731343, + "mean_reciprocal_rank": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 61.19402985074627, + "mean_reciprocal_rank": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 52.23880597014925, + "mean_reciprocal_rank": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 22.388059701492537, + "mean_reciprocal_rank": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6267299864314789 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json new file mode 100644 index 00000000..f805217a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json new file mode 100644 index 00000000..306a8110 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json new file mode 100644 index 00000000..7f6ad4d1 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json new file mode 100644 index 00000000..93e7551d --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json new file mode 100644 index 00000000..83612db9 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json new file mode 100644 index 00000000..7e10c385 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json new file mode 100644 index 00000000..5694312b --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json new file mode 100644 index 00000000..80fde7a0 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json new file mode 100644 index 00000000..c0bf202c --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json new file mode 100644 index 00000000..8f8cc815 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json new file mode 100644 index 00000000..ffaa9eca --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json new file mode 100644 index 00000000..58a0e15f --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json new file mode 100644 index 00000000..7fe61cc8 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json new file mode 100644 index 00000000..d60ca1d2 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json new file mode 100644 index 00000000..fc5f030a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json new file mode 100644 index 00000000..bb87e5c8 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json new file mode 100644 index 00000000..31c2f1f1 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json new file mode 100644 index 00000000..91775a17 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json new file mode 100644 index 00000000..a71856e9 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json new file mode 100644 index 00000000..1d16bf56 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json new file mode 100644 index 00000000..2d768925 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json new file mode 100644 index 00000000..0d6641d9 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json new file mode 100644 index 00000000..0cd7e68a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json new file mode 100644 index 00000000..8a428bd4 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json new file mode 100644 index 00000000..7b5dea71 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json new file mode 100644 index 00000000..3cb6dcbc --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json new file mode 100644 index 00000000..04c9d790 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json new file mode 100644 index 00000000..7c28c7ce --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json new file mode 100644 index 00000000..5080c761 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037935323383085 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5392412935323383 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.679912935323383 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json new file mode 100644 index 00000000..17566b00 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json new file mode 100644 index 00000000..088ae095 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 86.56716417910447, + "mean_reciprocal_rank": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 73.13432835820896, + "mean_reciprocal_rank": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 77.61194029850746, + "mean_reciprocal_rank": 0.6037386596429617 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 65.67164179104478, + "mean_reciprocal_rank": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 67.16417910447761, + "mean_reciprocal_rank": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 68.65671641791045, + "mean_reciprocal_rank": 0.5391864208369915 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 53.73134328358209, + "mean_reciprocal_rank": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 17.91044776119403, + "mean_reciprocal_rank": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 2.9850746268656714, + "mean_reciprocal_rank": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 88.05970149253731, + "mean_reciprocal_rank": 0.6798580626280363 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json new file mode 100644 index 00000000..caf1e862 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.6798726953467954 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 73.13432835820896, + "mean_mrr": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6037532923617208 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 65.67164179104478, + "mean_mrr": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 68.65671641791045, + "mean_mrr": 0.5392010535557507 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.6798726953467954 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "20": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json new file mode 100644 index 00000000..f42ed99c --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json @@ -0,0 +1,255 @@ +{ + "run_index": 1, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json new file mode 100644 index 00000000..91d93052 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json @@ -0,0 +1,255 @@ +{ + "run_index": 2, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json new file mode 100644 index 00000000..c612d42d --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json @@ -0,0 +1,255 @@ +{ + "run_index": 3, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json new file mode 100644 index 00000000..9221c946 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json @@ -0,0 +1,255 @@ +{ + "run_index": 4, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json new file mode 100644 index 00000000..a91328fa --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json @@ -0,0 +1,255 @@ +{ + "run_index": 5, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json new file mode 100644 index 00000000..5a205dd3 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json @@ -0,0 +1,255 @@ +{ + "run_index": 6, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json new file mode 100644 index 00000000..45e7a7df --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json @@ -0,0 +1,255 @@ +{ + "run_index": 7, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json new file mode 100644 index 00000000..6db6beff --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json @@ -0,0 +1,255 @@ +{ + "run_index": 8, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json new file mode 100644 index 00000000..8c45ea9b --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json @@ -0,0 +1,255 @@ +{ + "run_index": 9, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json new file mode 100644 index 00000000..723a31ee --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json @@ -0,0 +1,255 @@ +{ + "run_index": 10, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json new file mode 100644 index 00000000..c42d84a5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json @@ -0,0 +1,255 @@ +{ + "run_index": 11, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json new file mode 100644 index 00000000..6f2c60b5 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json @@ -0,0 +1,255 @@ +{ + "run_index": 12, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json new file mode 100644 index 00000000..29c6deb1 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json @@ -0,0 +1,255 @@ +{ + "run_index": 13, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json new file mode 100644 index 00000000..3d4286d3 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json @@ -0,0 +1,255 @@ +{ + "run_index": 14, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json new file mode 100644 index 00000000..dacac5e0 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json @@ -0,0 +1,255 @@ +{ + "run_index": 15, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json new file mode 100644 index 00000000..d22e1dc7 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json @@ -0,0 +1,255 @@ +{ + "run_index": 16, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json new file mode 100644 index 00000000..2c9c7a35 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json @@ -0,0 +1,255 @@ +{ + "run_index": 17, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json new file mode 100644 index 00000000..d248c4b3 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json @@ -0,0 +1,255 @@ +{ + "run_index": 18, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json new file mode 100644 index 00000000..60a555e6 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json @@ -0,0 +1,255 @@ +{ + "run_index": 19, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json new file mode 100644 index 00000000..aa9db928 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json @@ -0,0 +1,255 @@ +{ + "run_index": 20, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6333747927031509 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json new file mode 100644 index 00000000..88b2fc2a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json @@ -0,0 +1,255 @@ +{ + "run_index": 21, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json new file mode 100644 index 00000000..8919b4eb --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json @@ -0,0 +1,255 @@ +{ + "run_index": 22, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json new file mode 100644 index 00000000..a313722a --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json @@ -0,0 +1,255 @@ +{ + "run_index": 23, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json new file mode 100644 index 00000000..fb919a66 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json @@ -0,0 +1,255 @@ +{ + "run_index": 24, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json new file mode 100644 index 00000000..af8bb9bb --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json @@ -0,0 +1,255 @@ +{ + "run_index": 25, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json new file mode 100644 index 00000000..ec9fa263 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json @@ -0,0 +1,255 @@ +{ + "run_index": 26, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json new file mode 100644 index 00000000..ffa32277 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json @@ -0,0 +1,255 @@ +{ + "run_index": 27, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json new file mode 100644 index 00000000..a8548590 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json @@ -0,0 +1,255 @@ +{ + "run_index": 28, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json new file mode 100644 index 00000000..dffe68de --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json @@ -0,0 +1,255 @@ +{ + "run_index": 29, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7490464344941957 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6333747927031509 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6343698175787728 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.7511895649955351 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json new file mode 100644 index 00000000..df9701d3 --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json @@ -0,0 +1,255 @@ +{ + "run_index": 30, + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "message_count": 106, + "query_count": 67, + "rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.25, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.3, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.3, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.35, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.35, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.4, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.4, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.5, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.5, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.6, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.6, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 5, + "hit_rate": 91.04477611940298, + "mean_reciprocal_rank": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "hit_rate": 95.52238805970148, + "mean_reciprocal_rank": 0.7493129590144515 + }, + { + "min_score": 0.7, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.7, + "max_hits": 20, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + }, + { + "min_score": 0.75, + "max_hits": 5, + "hit_rate": 76.11940298507463, + "mean_reciprocal_rank": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "hit_rate": 80.59701492537313, + "mean_reciprocal_rank": 0.6336413172234068 + }, + { + "min_score": 0.75, + "max_hits": 15, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.75, + "max_hits": 20, + "hit_rate": 82.08955223880598, + "mean_reciprocal_rank": 0.6346363420990286 + }, + { + "min_score": 0.8, + "max_hits": 5, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "hit_rate": 47.76119402985074, + "mean_reciprocal_rank": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "hit_rate": 0.0, + "mean_reciprocal_rank": 0.0 + } + ], + "best_row": { + "min_score": 0.25, + "max_hits": 15, + "hit_rate": 98.50746268656717, + "mean_reciprocal_rank": 0.751456089515791 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json new file mode 100644 index 00000000..1476d1cc --- /dev/null +++ b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json @@ -0,0 +1,261 @@ +{ + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 80.59701492537313, + "mean_mrr": 0.6336235489220564 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346185737976783 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346185737976783 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } +} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/summary.json b/benchmark_results/20260411T172116Z/summary.json new file mode 100644 index 00000000..264f31cd --- /dev/null +++ b/benchmark_results/20260411T172116Z/summary.json @@ -0,0 +1,785 @@ +[ + { + "model_spec": "openai:text-embedding-3-small", + "resolved_model_name": "text-embedding-3-small", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.673134328358209 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 86.56716417910447, + "mean_mrr": 0.6789800995024876 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.6798726953467954 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 73.13432835820896, + "mean_mrr": 0.5985074626865672 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.6028606965174129 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6037532923617208 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 65.67164179104478, + "mean_mrr": 0.5358208955223881 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5383084577114428 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 68.65671641791045, + "mean_mrr": 0.5392010535557507 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.45 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 17.91044776119403, + "mean_mrr": 0.1417910447761194 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 2.9850746268656714, + "mean_mrr": 0.029850746268656716 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 88.05970149253731, + "mean_mrr": 0.6798726953467954 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "20": 30 + } + }, + { + "model_spec": "openai:text-embedding-3-large", + "resolved_model_name": "text-embedding-3-large", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 74.6268656716418, + "mean_mrr": 0.6241293532338309 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 67.16417910447761, + "mean_mrr": 0.5569651741293532 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5595658073270013 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 70.1492537313433, + "mean_mrr": 0.5595658073270013 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 59.70149253731343, + "mean_mrr": 0.5161691542288557 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174129353233831 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 61.19402985074627, + "mean_mrr": 0.5174129353233831 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 52.23880597014925, + "mean_mrr": 0.4601990049751244 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4614427860696517 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 53.73134328358209, + "mean_mrr": 0.4614427860696517 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 22.388059701492537, + "mean_mrr": 0.19527363184079602 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 77.61194029850746, + "mean_mrr": 0.6267299864314789 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } + }, + { + "model_spec": "openai:text-embedding-ada-002", + "resolved_model_name": "text-embedding-ada-002", + "run_count": 30, + "message_count": 106, + "query_count": 67, + "candidate_rows": [ + { + "min_score": 0.25, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.25, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.25, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.3, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.3, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.3, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.3, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.35, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.35, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.35, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.35, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.4, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.4, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.4, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.4, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.5, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.5, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.5, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.5, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.6, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.6, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.6, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.6, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.7, + "max_hits": 5, + "mean_hit_rate": 91.04477611940298, + "mean_mrr": 0.7430348258706467 + }, + { + "min_score": 0.7, + "max_hits": 10, + "mean_hit_rate": 95.52238805970148, + "mean_mrr": 0.7492951907131011 + }, + { + "min_score": 0.7, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.7, + "max_hits": 20, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + { + "min_score": 0.75, + "max_hits": 5, + "mean_hit_rate": 76.11940298507463, + "mean_mrr": 0.627363184079602 + }, + { + "min_score": 0.75, + "max_hits": 10, + "mean_hit_rate": 80.59701492537313, + "mean_mrr": 0.6336235489220564 + }, + { + "min_score": 0.75, + "max_hits": 15, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346185737976783 + }, + { + "min_score": 0.75, + "max_hits": 20, + "mean_hit_rate": 82.08955223880598, + "mean_mrr": 0.6346185737976783 + }, + { + "min_score": 0.8, + "max_hits": 5, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 10, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 15, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.8, + "max_hits": 20, + "mean_hit_rate": 47.76119402985074, + "mean_mrr": 0.4017412935323383 + }, + { + "min_score": 0.85, + "max_hits": 5, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 10, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 15, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + }, + { + "min_score": 0.85, + "max_hits": 20, + "mean_hit_rate": 0.0, + "mean_mrr": 0.0 + } + ], + "recommended_row": { + "min_score": 0.25, + "max_hits": 15, + "mean_hit_rate": 98.50746268656717, + "mean_mrr": 0.7514383212144407 + }, + "best_min_score_counts": { + "0.25": 30 + }, + "best_max_hits_counts": { + "15": 30 + } + } +] \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/summary.md b/benchmark_results/20260411T172116Z/summary.md new file mode 100644 index 00000000..b9254232 --- /dev/null +++ b/benchmark_results/20260411T172116Z/summary.md @@ -0,0 +1,7 @@ +# Repeated Embedding Benchmark Summary + +| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | +| --- | ---: | ---: | ---: | ---: | ---: | +| text-embedding-3-small | 30 | 0.25 | 20 | 88.06 | 0.6799 | +| text-embedding-3-large | 30 | 0.25 | 15 | 77.61 | 0.6267 | +| text-embedding-ada-002 | 30 | 0.25 | 15 | 98.51 | 0.7514 | diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py index d4f84c21..1bec469a 100644 --- a/src/typeagent/aitools/vectorbase.py +++ b/src/typeagent/aitools/vectorbase.py @@ -13,6 +13,25 @@ ) from .model_adapters import create_embedding_model +DEFAULT_MIN_SCORE = 0.25 + +# Empirical defaults for built-in OpenAI embedding models. +# These values come from repeated runs of the Adrian Tchaikovsky Episode 53 +# search benchmark in `tools/benchmark_embeddings.py`, with raw outputs stored +# under `benchmark_results/`. +# They are intended as repository defaults for known models, not universal +# truths; callers can always override `min_score` explicitly for their own use +# cases or models. +MODEL_DEFAULT_MIN_SCORES: dict[str, float] = { + "text-embedding-3-large": 0.25, + "text-embedding-3-small": 0.25, + "text-embedding-ada-002": 0.25, +} + + +def get_default_min_score(model_name: str) -> float: + return MODEL_DEFAULT_MIN_SCORES.get(model_name, DEFAULT_MIN_SCORE) + @dataclass class ScoredInt: @@ -35,32 +54,10 @@ def __init__( batch_size: int | None = None, ): self.embedding_model = embedding_model or create_embedding_model() - - # Default fallback values - default_min_score = 0.85 - default_max_matches = None - - # Determine optimal parameters automatically for well-known models. - # Format: (min_score, max_matches) - # Note: text-embedding-3 models produce structurally lower cosine scores than older models - # and typically perform best in the 0.3 - 0.5 range for relevance filtering. - MODEL_DEFAULTS = { - "text-embedding-3-large": (0.30, 20), - "text-embedding-3-small": (0.35, 20), - "text-embedding-ada-002": (0.75, 20), - } - - # Check if the model_name matches any known ones - model_name = getattr(self.embedding_model, 'model_name', "") - - if model_name: - for known_model, defaults in MODEL_DEFAULTS.items(): - if known_model in model_name: - default_min_score, default_max_matches = defaults - break - + model_name = getattr(self.embedding_model, "model_name", "") + default_min_score = get_default_min_score(model_name) self.min_score = min_score if min_score is not None else default_min_score - self.max_matches = max_matches if max_matches is not None else default_max_matches + self.max_matches = max_matches # None means no limit self.batch_size = batch_size if batch_size and batch_size >= 1 else 8 @@ -166,7 +163,6 @@ def fuzzy_lookup_embedding( scored_ordinals.sort(key=lambda x: x.score, reverse=True) return scored_ordinals[:max_hits] - # TODO: Make this and fuzzy_lookup_embedding() more similar. def fuzzy_lookup_embedding_in_subset( self, embedding: NormalizedEmbedding, @@ -174,10 +170,25 @@ def fuzzy_lookup_embedding_in_subset( max_hits: int | None = None, min_score: float | None = None, ) -> list[ScoredInt]: - ordinals_set = set(ordinals_of_subset) - return self.fuzzy_lookup_embedding( - embedding, max_hits, min_score, lambda i: i in ordinals_set - ) + if max_hits is None: + max_hits = 10 + if min_score is None: + min_score = 0.0 + if len(self._vectors) == 0 or not ordinals_of_subset: + return [] + + subset_ordinals = np.fromiter(set(ordinals_of_subset), dtype=np.intp) + if len(subset_ordinals) == 0: + return [] + + scores: Iterable[float] = np.dot(self._vectors[subset_ordinals], embedding) + scored_ordinals = [ + ScoredInt(int(ordinal), score) + for ordinal, score in zip(subset_ordinals, scores) + if score >= min_score + ] + scored_ordinals.sort(key=lambda x: x.score, reverse=True) + return scored_ordinals[:max_hits] async def fuzzy_lookup( self, @@ -226,7 +237,7 @@ def deserialize(self, data: NormalizedEmbeddings | None) -> None: return if self._embedding_size == 0: if data.ndim < 2 or data.shape[0] == 0: - # Empty data — can't determine size; just clear. + # Empty data can't determine size; just clear. self.clear() return self._set_embedding_size(data.shape[1]) diff --git a/tools/benchmark_embeddings.py b/tools/benchmark_embeddings.py index ee77c215..4358ea31 100644 --- a/tools/benchmark_embeddings.py +++ b/tools/benchmark_embeddings.py @@ -1,359 +1,267 @@ -#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -""" -Utility script to benchmark different TextEmbeddingIndexSettings parameters. - -Uses the Adrian Tchaikovsky podcast dataset (Episode 53) which contains: -- Index data: ~96 messages from the podcast conversation -- Search results: Queries with expected messageMatches (ground truth for retrieval) -- Answer results: Curated Q&A pairs with expected answers (ground truth for Q&A quality) +"""Benchmark retrieval settings for known embedding models. -The benchmark evaluates embedding model retrieval quality using: -1. Search-based evaluation: Compares fuzzy_lookup results against expected messageMatches -2. Answer-based evaluation: Tests if queries from the Answer dataset retrieve messages - that contain the expected answer content (substring matching) +This script evaluates the Adrian Tchaikovsky Episode 53 search dataset in +`tests/testdata/` and reports retrieval quality for combinations of +`min_score` and `max_hits`. -Metrics: -- Hit Rate: Percentage of queries where at least one expected result was retrieved -- MRR (Mean Reciprocal Rank): Average of 1/rank of the first relevant result +The benchmark is intentionally narrow: +- It only measures retrieval against `messageMatches` ground truth. +- It is meant to help choose repository defaults for known models. +- In practice, `min_score` is the primary library default this informs. +- It does not prove universal "best" settings for every dataset. Usage: - uv run python tools/benchmark_embeddings.py [--model provider:model] + uv run python tools/benchmark_embeddings.py + uv run python tools/benchmark_embeddings.py --model openai:text-embedding-3-small """ import argparse import asyncio +from dataclasses import dataclass import json -import logging from pathlib import Path from statistics import mean -import sys -from typing import Any +from dotenv import load_dotenv + +from typeagent.aitools.embeddings import IEmbeddingModel, NormalizedEmbeddings from typeagent.aitools.model_adapters import create_embedding_model from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase +DEFAULT_MIN_SCORES = [0.25, 0.30, 0.35, 0.40, 0.50, 0.60, 0.70, 0.75, 0.80, 0.85] +DEFAULT_MAX_HITS = [5, 10, 15, 20] +DATA_DIR = Path("tests") / "testdata" +INDEX_DATA_PATH = DATA_DIR / "Episode_53_AdrianTchaikovsky_index_data.json" +SEARCH_RESULTS_PATH = DATA_DIR / "Episode_53_Search_results.json" + + +@dataclass +class SearchQueryCase: + query: str + expected_matches: list[int] + + +@dataclass +class SearchMetrics: + hit_rate: float + mean_reciprocal_rank: float + + +@dataclass +class BenchmarkRow: + min_score: float + max_hits: int + metrics: SearchMetrics + + +def parse_float_list(raw: str | None) -> list[float]: + if raw is None: + return DEFAULT_MIN_SCORES + values = [float(item.strip()) for item in raw.split(",") if item.strip()] + if not values: + raise ValueError("--min-scores must contain at least one value") + return values + -async def run_benchmark(model_spec: str | None) -> None: - logging.basicConfig(level=logging.INFO) - logger = logging.getLogger(__name__) - - # Paths - script_dir = Path(__file__).resolve().parent - repo_root = script_dir.parent - index_data_path = repo_root / "tests" / "testdata" / "Episode_53_AdrianTchaikovsky_index_data.json" - search_data_path = repo_root / "tests" / "testdata" / "Episode_53_Search_results.json" - answer_data_path = repo_root / "tests" / "testdata" / "Episode_53_Answer_results.json" - - # ── Load index data (messages to embed) ── - logger.info(f"Loading index data from {index_data_path}") - try: - with open(index_data_path, "r", encoding="utf-8") as f: - index_json = json.load(f) - except Exception as e: - logger.error(f"Failed to load index data: {e}") - return - - messages = index_json.get("messages", []) - message_texts = [" ".join(m.get("textChunks", [])) for m in messages] - - # ── Load search queries (ground truth: messageMatches) ── - logger.info(f"Loading search queries from {search_data_path}") - try: - with open(search_data_path, "r", encoding="utf-8") as f: - search_json = json.load(f) - except Exception as e: - logger.error(f"Failed to load search queries: {e}") - return - - # Filter out ones without results or expected matches - search_queries: list[tuple[str, list[int]]] = [] - for item in search_json: +def parse_int_list(raw: str | None) -> list[int]: + if raw is None: + return DEFAULT_MAX_HITS + values = [int(item.strip()) for item in raw.split(",") if item.strip()] + if not values: + raise ValueError("--max-hits must contain at least one value") + if any(value <= 0 for value in values): + raise ValueError("--max-hits values must be positive integers") + return values + + +def load_message_texts(repo_root: Path) -> list[str]: + index_data = json.loads((repo_root / INDEX_DATA_PATH).read_text(encoding="utf-8")) + messages = index_data["messages"] + return [" ".join(message.get("textChunks", [])) for message in messages] + + +def load_search_queries(repo_root: Path) -> list[SearchQueryCase]: + search_data = json.loads( + (repo_root / SEARCH_RESULTS_PATH).read_text(encoding="utf-8") + ) + cases: list[SearchQueryCase] = [] + for item in search_data: search_text = item.get("searchText") results = item.get("results", []) - if not results: + if not search_text or not results: continue - expected = results[0].get("messageMatches", []) - if not expected: + expected_matches = results[0].get("messageMatches", []) + if not expected_matches: continue - search_queries.append((search_text, expected)) - - # ── Load answer results (Q&A ground truth from Adrian Tchaikovsky dataset) ── - answer_queries: list[tuple[str, str, bool]] = [] # (question, answer, hasNoAnswer) - logger.info(f"Loading answer results from {answer_data_path}") - try: - with open(answer_data_path, "r", encoding="utf-8") as f: - answer_json = json.load(f) - for item in answer_json: - question = item.get("question", "") - answer = item.get("answer", "") - has_no_answer = item.get("hasNoAnswer", False) - if question and answer: - answer_queries.append((question, answer, has_no_answer)) - logger.info(f"Found {len(answer_queries)} answer Q&A pairs " - f"({sum(1 for _, _, h in answer_queries if not h)} with answers, " - f"{sum(1 for _, _, h in answer_queries if h)} with no-answer).") - except Exception as e: - logger.warning(f"Failed to load answer results (continuing without): {e}") - - logger.info(f"Found {len(message_texts)} messages to embed.") - logger.info(f"Found {len(search_queries)} search queries with expected matches.") - - # ── Create embedding model and index ── - try: - if model_spec == "test:fake": - from typeagent.aitools.model_adapters import create_test_embedding_model - model = create_test_embedding_model(embedding_size=384) + cases.append(SearchQueryCase(search_text, expected_matches)) + return cases + + +async def build_vector_base( + model_spec: str | None, + message_texts: list[str], + batch_size: int, +) -> tuple[IEmbeddingModel, VectorBase]: + model = create_embedding_model(model_spec) + settings = TextEmbeddingIndexSettings( + embedding_model=model, + min_score=0.0, + max_matches=None, + batch_size=batch_size, + ) + vector_base = VectorBase(settings) + + for start in range(0, len(message_texts), batch_size): + batch = message_texts[start : start + batch_size] + await vector_base.add_keys(batch) + + return model, vector_base + + +def evaluate_search_queries( + vector_base: VectorBase, + query_cases: list[SearchQueryCase], + query_embeddings: NormalizedEmbeddings, + min_score: float, + max_hits: int, +) -> SearchMetrics: + hit_count = 0 + reciprocal_ranks: list[float] = [] + + for case, query_embedding in zip(query_cases, query_embeddings): + scored_results = vector_base.fuzzy_lookup_embedding( + query_embedding, + max_hits=max_hits, + min_score=min_score, + ) + rank = 0 + for result_index, scored_result in enumerate(scored_results, start=1): + if scored_result.item in case.expected_matches: + rank = result_index + break + if rank > 0: + hit_count += 1 + reciprocal_ranks.append(1.0 / rank) else: - model = create_embedding_model(model_spec) - except Exception as e: - logger.error(f"Failed to create embedding model: {e}") - logger.info("Are your environment variables (e.g. OPENAI_API_KEY) set?") - return - settings = TextEmbeddingIndexSettings(model) - vbase = VectorBase(settings) - - logger.info("Computing embeddings for messages (this may take some time...)") - # Batch the embeddings - batch_size = 50 - for i in range(0, len(message_texts), batch_size): - batch = message_texts[i : i + batch_size] - await vbase.add_keys(batch) - print(f" ... embedded {min(i + batch_size, len(message_texts))}/{len(message_texts)}") - - # ── Compute query embeddings ── - logger.info("Computing embeddings for search queries...") - search_query_texts = [q[0] for q in search_queries] - search_query_embeddings = await model.get_embeddings(search_query_texts) - - answer_query_embeddings = None - if answer_queries: - logger.info("Computing embeddings for answer queries...") - answer_query_texts = [q[0] for q in answer_queries] - answer_query_embeddings = await model.get_embeddings(answer_query_texts) - - # ────────────────────────────────────────────────────────────────────── - # Section 1: Grid Search using Search Results (messageMatches) - # ────────────────────────────────────────────────────────────────────── - - # Grid search config - min_scores_to_test = [0.70, 0.75, 0.80, 0.85, 0.90, 0.95] - max_hits_to_test = [5, 10, 15, 20] - - logger.info(f"Starting grid search over model: {model.model_name}") - print() + reciprocal_ranks.append(0.0) + + return SearchMetrics( + hit_rate=(hit_count / len(query_cases)) * 100, + mean_reciprocal_rank=mean(reciprocal_ranks), + ) + + +def select_best_row(rows: list[BenchmarkRow]) -> BenchmarkRow: + return max( + rows, + key=lambda row: ( + row.metrics.mean_reciprocal_rank, + row.metrics.hit_rate, + -row.min_score, + -row.max_hits, + ), + ) + + +def print_rows(rows: list[BenchmarkRow]) -> None: print("=" * 72) - print(" SEARCH RESULTS BENCHMARK (messageMatches ground truth)") + print("SEARCH BENCHMARK (Episode 53 messageMatches ground truth)") print("=" * 72) print(f"{'Min Score':<12} | {'Max Hits':<10} | {'Hit Rate (%)':<15} | {'MRR':<10}") print("-" * 65) - - best_mrr = -1.0 - best_config = None - - for ms in min_scores_to_test: - for mh in max_hits_to_test: - hits = 0 - reciprocal_ranks = [] - - for (query_text, expected_indices), q_emb in zip(search_queries, search_query_embeddings): - scored_results = vbase.fuzzy_lookup_embedding(q_emb, max_hits=mh, min_score=ms) - retrieved_indices = [sr.item for sr in scored_results] - - # Check if any of the expected items are in the retrieved answers - rank = -1 - for r_idx, retrieved in enumerate(retrieved_indices): - if retrieved in expected_indices: - rank = r_idx + 1 - break - - if rank > 0: - hits += 1 - reciprocal_ranks.append(1.0 / rank) - else: - reciprocal_ranks.append(0.0) - - hit_rate = (hits / len(search_queries)) * 100 - mrr = mean(reciprocal_ranks) - - print(f"{ms:<12.2f} | {mh:<10d} | {hit_rate:<15.2f} | {mrr:<10.4f}") - - if mrr > best_mrr: - best_mrr = mrr - best_config = (ms, mh) - + for row in rows: + print( + f"{row.min_score:<12.2f} | {row.max_hits:<10d} | " + f"{row.metrics.hit_rate:<15.2f} | " + f"{row.metrics.mean_reciprocal_rank:<10.4f}" + ) print("-" * 65) - if best_config: - logger.info(f"Search benchmark optimal: min_score={best_config[0]}, " - f"max_hits={best_config[1]} (MRR={best_mrr:.4f})") - else: - logger.info("Could not determine optimal parameters (no hits).") - - # ────────────────────────────────────────────────────────────────────── - # Section 2: Answer Results Benchmark (Adrian Tchaikovsky Q&A pairs) - # ────────────────────────────────────────────────────────────────────── - - if answer_queries and answer_query_embeddings is not None: - print() - print("=" * 72) - print(" ANSWER RESULTS BENCHMARK (Adrian Tchaikovsky Q&A ground truth)") - print("=" * 72) - print() - - # For each answer query, check if retrieved messages contain key terms - # from the expected answer. This is a content-based relevance check. - # - # We split answers with hasNoAnswer=True vs False to evaluate separately. - - answerable = [(q, a, emb) for (q, a, h), emb - in zip(answer_queries, answer_query_embeddings) if not h] - unanswerable = [(q, a, emb) for (q, a, h), emb - in zip(answer_queries, answer_query_embeddings) if h] - - print(f"Answerable queries: {len(answerable)}") - print(f"Unanswerable queries (hasNoAnswer=True): {len(unanswerable)}") - print() - - # Extract key terms from expected answers for content matching - def extract_answer_keywords(answer_text: str) -> list[str]: - """Extract distinctive keywords/phrases from an answer for matching.""" - # Look for quoted items, proper nouns, and distinctive phrases - keywords = [] - # Extract quoted phrases - import re - quoted = re.findall(r"'([^']+)'", answer_text) - keywords.extend(quoted) - quoted2 = re.findall(r'"([^"]+)"', answer_text) - keywords.extend(quoted2) - - # Extract proper-noun-like terms (capitalized words that aren't sentence starters) - # and key named entities from the Adrian Tchaikovsky dataset - known_entities = [ - "Adrian Tchaikovsky", "Tchaikovsky", "Kevin Scott", "Christina Warren", - "Children of Time", "Children of Ruin", "Children of Memory", - "Shadows of the Apt", "Empire in Black and Gold", - "Final Architecture", "Lords of Uncreation", - "Dragonlance Chronicles", "Skynet", "Portids", "Corvids", - "University of Reading", "Magnus Carlsen", "Warhammer", - "Asimov", "Peter Watts", "William Gibson", "Iain Banks", - "Peter Hamilton", "Arthur C. Clarke", "Profiles of the Future", - "Dune", "Brave New World", "Iron Sunrise", "Wall-E", - "George RR Martin", "Alastair Reynolds", "Ovid", - "zoology", "psychology", "spiders", "arachnids", "insects", - ] - for entity in known_entities: - if entity.lower() in answer_text.lower(): - keywords.append(entity) - - return keywords - - # Run answer benchmark with the best config from search benchmark - if best_config: - eval_min_score, eval_max_hits = best_config - else: - eval_min_score, eval_max_hits = 0.80, 10 - - print(f"Using parameters: min_score={eval_min_score}, max_hits={eval_max_hits}") - print("-" * 72) - print(f"{'#':<4} | {'Question':<45} | {'Keywords Found':<14} | {'Msgs':<5}") - print("-" * 72) - answer_hits = 0 - answer_keyword_scores: list[float] = [] - for idx, (question, answer, q_emb) in enumerate(answerable, 1): - scored_results = vbase.fuzzy_lookup_embedding( - q_emb, max_hits=eval_max_hits, min_score=eval_min_score +async def run_benchmark( + model_spec: str | None, + min_scores: list[float], + max_hits_values: list[int], + batch_size: int, +) -> None: + load_dotenv() + + repo_root = Path(__file__).resolve().parent.parent + message_texts = load_message_texts(repo_root) + query_cases = load_search_queries(repo_root) + if not query_cases: + raise ValueError("No search queries with messageMatches found in the dataset") + model, vector_base = await build_vector_base(model_spec, message_texts, batch_size) + query_embeddings = await model.get_embeddings([case.query for case in query_cases]) + + rows: list[BenchmarkRow] = [] + for min_score in min_scores: + for max_hits in max_hits_values: + metrics = evaluate_search_queries( + vector_base, + query_cases, + query_embeddings, + min_score, + max_hits, ) - retrieved_indices = [sr.item for sr in scored_results] + rows.append(BenchmarkRow(min_score, max_hits, metrics)) - # Concatenate the text of all retrieved messages - retrieved_text = " ".join( - message_texts[i] for i in retrieved_indices if i < len(message_texts) - ) + print(f"Model: {model.model_name}") + print(f"Messages indexed: {len(message_texts)}") + print(f"Queries evaluated: {len(query_cases)}") + print() + print_rows(rows) - # Check how many answer keywords appear in retrieved text - keywords = extract_answer_keywords(answer) - if keywords: - found = sum( - 1 for kw in keywords - if kw.lower() in retrieved_text.lower() - ) - keyword_score = found / len(keywords) - else: - # No keywords extracted — just check if we retrieved anything - keyword_score = 1.0 if retrieved_indices else 0.0 - - if keyword_score > 0: - answer_hits += 1 - answer_keyword_scores.append(keyword_score) - - q_display = question[:42] + "..." if len(question) > 45 else question - kw_display = f"{int(keyword_score * 100):>3}%" - if keywords: - kw_display += f" ({sum(1 for kw in keywords if kw.lower() in retrieved_text.lower())}/{len(keywords)})" - print(f"{idx:<4} | {q_display:<45} | {kw_display:<14} | {len(retrieved_indices):<5}") - - print("-" * 72) - - if answerable: - answer_hit_rate = (answer_hits / len(answerable)) * 100 - avg_keyword_score = mean(answer_keyword_scores) * 100 - print(f"Answer Hit Rate: {answer_hit_rate:.1f}% " - f"({answer_hits}/{len(answerable)} queries found relevant content)") - print(f"Avg Keyword Coverage: {avg_keyword_score:.1f}%") - - # Evaluate unanswerable queries — ideally these should retrieve fewer/no results - if unanswerable: - print() - print("-" * 72) - print("Unanswerable queries (should ideally retrieve less relevant content):") - print("-" * 72) - false_positive_count = 0 - for question, answer, q_emb in unanswerable: - scored_results = vbase.fuzzy_lookup_embedding( - q_emb, max_hits=eval_max_hits, min_score=eval_min_score - ) - n_results = len(scored_results) - avg_score = mean(sr.score for sr in scored_results) if scored_results else 0.0 - q_display = question[:55] + "..." if len(question) > 58 else question - flag = "[!]" if n_results > 3 else "[ok]" - if n_results > 3: - false_positive_count += 1 - print(f" {flag} {q_display:<58} | {n_results:>3} results (avg={avg_score:.3f})") - print(f"\nFalse positives (>3 results): {false_positive_count}/{len(unanswerable)}") - - # ── Summary ── + best_row = select_best_row(rows) print() - print("=" * 72) - print(" SUMMARY") - print("=" * 72) - print(f"Model: {model.model_name}") - print(f"Messages indexed: {len(message_texts)}") - print(f"Search queries tested: {len(search_queries)}") - if best_config: - print(f"Best search params: min_score={best_config[0]}, max_hits={best_config[1]}") - print(f"Best search MRR: {best_mrr:.4f}") - if answer_queries: - print(f"Answer queries tested: {len(answerable)} answerable, {len(unanswerable)} unanswerable") - if answerable: - print(f"Answer hit rate: {answer_hit_rate:.1f}%") - print(f"Keyword coverage: {avg_keyword_score:.1f}%") - print("=" * 72) + print("Best-scoring benchmark row:") + print(f" min_score={best_row.min_score:.2f}") + print(f" max_hits={best_row.max_hits}") + print(f" hit_rate={best_row.metrics.hit_rate:.2f}%") + print(f" mrr={best_row.metrics.mean_reciprocal_rank:.4f}") def main() -> None: - parser = argparse.ArgumentParser(description="Benchmark embedding model parameters.") + parser = argparse.ArgumentParser( + description="Benchmark retrieval settings for an embedding model." + ) parser.add_argument( "--model", type=str, default=None, help="Provider and model name, e.g. 'openai:text-embedding-3-small'", ) + parser.add_argument( + "--min-scores", + type=str, + default=None, + help="Comma-separated min_score values to test.", + ) + parser.add_argument( + "--max-hits", + type=str, + default=None, + help="Comma-separated max_hits values to test.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=16, + help="Batch size used when building the index.", + ) args = parser.parse_args() - asyncio.run(run_benchmark(args.model)) + + asyncio.run( + run_benchmark( + model_spec=args.model, + min_scores=parse_float_list(args.min_scores), + max_hits_values=parse_int_list(args.max_hits), + batch_size=args.batch_size, + ) + ) if __name__ == "__main__": diff --git a/tools/repeat_embedding_benchmarks.py b/tools/repeat_embedding_benchmarks.py new file mode 100644 index 00000000..5ac74062 --- /dev/null +++ b/tools/repeat_embedding_benchmarks.py @@ -0,0 +1,320 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Run embedding benchmarks repeatedly and save raw/summary JSON results. + +This script runs `tools/benchmark_embeddings.py` logic multiple times for each +embedding model, stores every run as JSON, and writes aggregate summaries that +can be used to justify tuned defaults. + +Usage: + uv run python tools/repeat_embedding_benchmarks.py + uv run python tools/repeat_embedding_benchmarks.py --runs 30 + uv run python tools/repeat_embedding_benchmarks.py --models openai:text-embedding-3-small,openai:text-embedding-3-large,openai:text-embedding-ada-002 +""" + +import argparse +import asyncio +from dataclasses import asdict, dataclass +from datetime import datetime, UTC +import json +from pathlib import Path +from statistics import mean + +from dotenv import load_dotenv + +from benchmark_embeddings import ( + BenchmarkRow, + build_vector_base, + DEFAULT_MAX_HITS, + DEFAULT_MIN_SCORES, + evaluate_search_queries, + load_message_texts, + load_search_queries, + parse_float_list, + parse_int_list, + select_best_row, +) + +DEFAULT_MODELS = [ + "openai:text-embedding-3-small", + "openai:text-embedding-3-large", + "openai:text-embedding-ada-002", +] +DEFAULT_OUTPUT_DIR = Path("benchmark_results") + + +@dataclass +class RunRow: + min_score: float + max_hits: int + hit_rate: float + mean_reciprocal_rank: float + + +@dataclass +class RunResult: + run_index: int + model_spec: str + resolved_model_name: str + message_count: int + query_count: int + rows: list[RunRow] + best_row: RunRow + + +def sanitize_model_name(model_spec: str) -> str: + return model_spec.replace(":", "__").replace("/", "_").replace("\\", "_") + + +def benchmark_row_to_run_row(row: BenchmarkRow) -> RunRow: + return RunRow( + min_score=row.min_score, + max_hits=row.max_hits, + hit_rate=row.metrics.hit_rate, + mean_reciprocal_rank=row.metrics.mean_reciprocal_rank, + ) + + +def summarize_runs(model_spec: str, runs: list[RunResult]) -> dict[str, object]: + summary_rows: dict[tuple[float, int], list[RunRow]] = {} + for run in runs: + for row in run.rows: + summary_rows.setdefault((row.min_score, row.max_hits), []).append(row) + + averaged_rows: list[dict[str, float | int]] = [] + for (min_score, max_hits), rows in sorted(summary_rows.items()): + averaged_rows.append( + { + "min_score": min_score, + "max_hits": max_hits, + "mean_hit_rate": mean(row.hit_rate for row in rows), + "mean_mrr": mean(row.mean_reciprocal_rank for row in rows), + } + ) + + best_rows = [run.best_row for run in runs] + best_min_score_counts: dict[str, int] = {} + best_max_hits_counts: dict[str, int] = {} + for row in best_rows: + best_min_score_counts[f"{row.min_score:.2f}"] = ( + best_min_score_counts.get(f"{row.min_score:.2f}", 0) + 1 + ) + best_max_hits_counts[str(row.max_hits)] = ( + best_max_hits_counts.get(str(row.max_hits), 0) + 1 + ) + + averaged_best_row = max( + averaged_rows, + key=lambda row: ( + float(row["mean_mrr"]), + float(row["mean_hit_rate"]), + -float(row["min_score"]), + -int(row["max_hits"]), + ), + ) + + return { + "model_spec": model_spec, + "resolved_model_name": runs[0].resolved_model_name, + "run_count": len(runs), + "message_count": runs[0].message_count, + "query_count": runs[0].query_count, + "candidate_rows": averaged_rows, + "recommended_row": averaged_best_row, + "best_min_score_counts": best_min_score_counts, + "best_max_hits_counts": best_max_hits_counts, + } + + +def write_json(path: Path, data: object) -> None: + path.write_text(json.dumps(data, indent=2), encoding="utf-8") + + +def write_markdown_summary(path: Path, summaries: list[dict[str, object]]) -> None: + lines = [ + "# Repeated Embedding Benchmark Summary", + "", + "| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR |", + "| --- | ---: | ---: | ---: | ---: | ---: |", + ] + for summary in summaries: + recommended_row = summary["recommended_row"] + assert isinstance(recommended_row, dict) + lines.append( + "| " + f"{summary['resolved_model_name']} | " + f"{summary['run_count']} | " + f"{recommended_row['min_score']:.2f} | " + f"{recommended_row['max_hits']} | " + f"{recommended_row['mean_hit_rate']:.2f} | " + f"{recommended_row['mean_mrr']:.4f} |" + ) + lines.append("") + path.write_text("\n".join(lines), encoding="utf-8") + + +async def run_single_model_benchmark( + model_spec: str, + runs: int, + min_scores: list[float], + max_hits_values: list[int], + batch_size: int, + output_dir: Path, +) -> dict[str, object]: + repo_root = Path(__file__).resolve().parent.parent + message_texts = load_message_texts(repo_root) + query_cases = load_search_queries(repo_root) + model_output_dir = output_dir / sanitize_model_name(model_spec) + model_output_dir.mkdir(parents=True, exist_ok=True) + + run_results: list[RunResult] = [] + for run_index in range(1, runs + 1): + model, vector_base = await build_vector_base( + model_spec, message_texts, batch_size + ) + query_embeddings = await model.get_embeddings( + [case.query for case in query_cases] + ) + benchmark_rows: list[BenchmarkRow] = [] + for min_score in min_scores: + for max_hits in max_hits_values: + metrics = evaluate_search_queries( + vector_base, + query_cases, + query_embeddings, + min_score, + max_hits, + ) + benchmark_rows.append(BenchmarkRow(min_score, max_hits, metrics)) + + best_row = select_best_row(benchmark_rows) + run_result = RunResult( + run_index=run_index, + model_spec=model_spec, + resolved_model_name=model.model_name, + message_count=len(message_texts), + query_count=len(query_cases), + rows=[benchmark_row_to_run_row(row) for row in benchmark_rows], + best_row=benchmark_row_to_run_row(best_row), + ) + run_results.append(run_result) + write_json(model_output_dir / f"run_{run_index:02d}.json", asdict(run_result)) + + summary = summarize_runs(model_spec, run_results) + write_json(model_output_dir / "summary.json", summary) + return summary + + +async def run_repeated_benchmarks( + models: list[str], + runs: int, + min_scores: list[float], + max_hits_values: list[int], + batch_size: int, + output_root: Path, +) -> Path: + timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") + output_dir = output_root / timestamp + output_dir.mkdir(parents=True, exist_ok=True) + + metadata = { + "created_at_utc": timestamp, + "runs_per_model": runs, + "models": models, + "min_scores": min_scores, + "max_hits_values": max_hits_values, + "batch_size": batch_size, + } + write_json(output_dir / "metadata.json", metadata) + + summaries: list[dict[str, object]] = [] + for model_spec in models: + print(f"Running {runs} benchmark iterations for {model_spec}...") + summary = await run_single_model_benchmark( + model_spec=model_spec, + runs=runs, + min_scores=min_scores, + max_hits_values=max_hits_values, + batch_size=batch_size, + output_dir=output_dir, + ) + summaries.append(summary) + + write_json(output_dir / "summary.json", summaries) + write_markdown_summary(output_dir / "summary.md", summaries) + return output_dir + + +def parse_models(raw: str | None) -> list[str]: + if raw is None: + return DEFAULT_MODELS + models = [item.strip() for item in raw.split(",") if item.strip()] + if not models: + raise ValueError("--models must contain at least one model") + return models + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run embedding benchmarks repeatedly and save JSON results." + ) + parser.add_argument( + "--models", + type=str, + default=None, + help="Comma-separated model specs to benchmark.", + ) + parser.add_argument( + "--runs", + type=int, + default=30, + help="Number of repeated runs per model.", + ) + parser.add_argument( + "--min-scores", + type=str, + default=",".join(f"{score:.2f}" for score in DEFAULT_MIN_SCORES), + help="Comma-separated min_score values to test.", + ) + parser.add_argument( + "--max-hits", + type=str, + default=",".join(str(value) for value in DEFAULT_MAX_HITS), + help="Comma-separated max_hits values to test.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=16, + help="Batch size used when building the index.", + ) + parser.add_argument( + "--output-dir", + type=str, + default=str(DEFAULT_OUTPUT_DIR), + help="Directory where benchmark results will be written.", + ) + args = parser.parse_args() + + if args.runs <= 0: + raise ValueError("--runs must be a positive integer") + if args.batch_size <= 0: + raise ValueError("--batch-size must be a positive integer") + + load_dotenv() + output_dir = asyncio.run( + run_repeated_benchmarks( + models=parse_models(args.models), + runs=args.runs, + min_scores=parse_float_list(args.min_scores), + max_hits_values=parse_int_list(args.max_hits), + batch_size=args.batch_size, + output_root=Path(args.output_dir), + ) + ) + print(f"Wrote benchmark results to {output_dir}") + + +if __name__ == "__main__": + main() From c1327cfb5b82f6a8b413364668f5127e3c06bcd6 Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Sun, 12 Apr 2026 00:25:09 +0530 Subject: [PATCH 7/9] update --- .../20260411T163642Z/metadata.json | 26 - .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../20260411T163642Z/summary.json | 263 ------ benchmark_results/20260411T163642Z/summary.md | 5 - .../20260411T170901Z/metadata.json | 26 - .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../20260411T170901Z/summary.json | 263 ------ benchmark_results/20260411T170901Z/summary.md | 5 - .../20260411T171331Z/metadata.json | 26 - .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../20260411T171331Z/summary.json | 263 ------ benchmark_results/20260411T171331Z/summary.md | 5 - .../20260411T172116Z/metadata.json | 28 - .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../run_01.json | 255 ------ .../run_02.json | 255 ------ .../run_03.json | 255 ------ .../run_04.json | 255 ------ .../run_05.json | 255 ------ .../run_06.json | 255 ------ .../run_07.json | 255 ------ .../run_08.json | 255 ------ .../run_09.json | 255 ------ .../run_10.json | 255 ------ .../run_11.json | 255 ------ .../run_12.json | 255 ------ .../run_13.json | 255 ------ .../run_14.json | 255 ------ .../run_15.json | 255 ------ .../run_16.json | 255 ------ .../run_17.json | 255 ------ .../run_18.json | 255 ------ .../run_19.json | 255 ------ .../run_20.json | 255 ------ .../run_21.json | 255 ------ .../run_22.json | 255 ------ .../run_23.json | 255 ------ .../run_24.json | 255 ------ .../run_25.json | 255 ------ .../run_26.json | 255 ------ .../run_27.json | 255 ------ .../run_28.json | 255 ------ .../run_29.json | 255 ------ .../run_30.json | 255 ------ .../summary.json | 261 ------ .../20260411T172116Z/summary.json | 785 ------------------ benchmark_results/20260411T172116Z/summary.md | 7 - 198 files changed, 49168 deletions(-) delete mode 100644 benchmark_results/20260411T163642Z/metadata.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json delete mode 100644 benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json delete mode 100644 benchmark_results/20260411T163642Z/summary.json delete mode 100644 benchmark_results/20260411T163642Z/summary.md delete mode 100644 benchmark_results/20260411T170901Z/metadata.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json delete mode 100644 benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json delete mode 100644 benchmark_results/20260411T170901Z/summary.json delete mode 100644 benchmark_results/20260411T170901Z/summary.md delete mode 100644 benchmark_results/20260411T171331Z/metadata.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json delete mode 100644 benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json delete mode 100644 benchmark_results/20260411T171331Z/summary.json delete mode 100644 benchmark_results/20260411T171331Z/summary.md delete mode 100644 benchmark_results/20260411T172116Z/metadata.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json delete mode 100644 benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json delete mode 100644 benchmark_results/20260411T172116Z/summary.json delete mode 100644 benchmark_results/20260411T172116Z/summary.md diff --git a/benchmark_results/20260411T163642Z/metadata.json b/benchmark_results/20260411T163642Z/metadata.json deleted file mode 100644 index aeb8c76c..00000000 --- a/benchmark_results/20260411T163642Z/metadata.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "created_at_utc": "20260411T163642Z", - "runs_per_model": 30, - "models": [ - "openai:text-embedding-ada-002" - ], - "min_scores": [ - 0.25, - 0.3, - 0.35, - 0.4, - 0.5, - 0.6, - 0.7, - 0.75, - 0.8, - 0.85 - ], - "max_hits_values": [ - 5, - 10, - 15, - 20 - ], - "batch_size": 16 -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json deleted file mode 100644 index f42ed99c..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json deleted file mode 100644 index 91d93052..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json deleted file mode 100644 index c612d42d..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json deleted file mode 100644 index 9221c946..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json deleted file mode 100644 index a91328fa..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json deleted file mode 100644 index 5a205dd3..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json deleted file mode 100644 index 45e7a7df..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json deleted file mode 100644 index 6db6beff..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json deleted file mode 100644 index 8c45ea9b..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json deleted file mode 100644 index 723a31ee..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json deleted file mode 100644 index c42d84a5..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json deleted file mode 100644 index 6f2c60b5..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json deleted file mode 100644 index 29c6deb1..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json deleted file mode 100644 index 3d4286d3..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json deleted file mode 100644 index dacac5e0..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json deleted file mode 100644 index d22e1dc7..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json deleted file mode 100644 index 2c9c7a35..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json deleted file mode 100644 index d248c4b3..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json deleted file mode 100644 index 60a555e6..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json deleted file mode 100644 index f313eb1f..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json deleted file mode 100644 index 88b2fc2a..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json deleted file mode 100644 index 8919b4eb..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json deleted file mode 100644 index a313722a..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json deleted file mode 100644 index fb919a66..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json deleted file mode 100644 index af8bb9bb..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json deleted file mode 100644 index ec9fa263..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json deleted file mode 100644 index ffa32277..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json deleted file mode 100644 index a8548590..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json deleted file mode 100644 index dffe68de..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6333747927031509 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json deleted file mode 100644 index df9701d3..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json b/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json deleted file mode 100644 index 948925bb..00000000 --- a/benchmark_results/20260411T163642Z/openai__text-embedding-ada-002/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 80.59701492537313, - "mean_mrr": 0.6336324330727315 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346274579483534 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346274579483534 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/summary.json b/benchmark_results/20260411T163642Z/summary.json deleted file mode 100644 index 6f12d3bc..00000000 --- a/benchmark_results/20260411T163642Z/summary.json +++ /dev/null @@ -1,263 +0,0 @@ -[ - { - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7493040748637763 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 80.59701492537313, - "mean_mrr": 0.6336324330727315 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346274579483534 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346274579483534 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514472053651158 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } - } -] \ No newline at end of file diff --git a/benchmark_results/20260411T163642Z/summary.md b/benchmark_results/20260411T163642Z/summary.md deleted file mode 100644 index fc0d054c..00000000 --- a/benchmark_results/20260411T163642Z/summary.md +++ /dev/null @@ -1,5 +0,0 @@ -# Repeated Embedding Benchmark Summary - -| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | -| --- | ---: | ---: | ---: | ---: | ---: | -| text-embedding-ada-002 | 30 | 0.25 | 15 | 98.51 | 0.7514 | diff --git a/benchmark_results/20260411T170901Z/metadata.json b/benchmark_results/20260411T170901Z/metadata.json deleted file mode 100644 index b90ccd56..00000000 --- a/benchmark_results/20260411T170901Z/metadata.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "created_at_utc": "20260411T170901Z", - "runs_per_model": 30, - "models": [ - "openai:text-embedding-3-small" - ], - "min_scores": [ - 0.25, - 0.3, - 0.35, - 0.4, - 0.5, - 0.6, - 0.7, - 0.75, - 0.8, - 0.85 - ], - "max_hits_values": [ - 5, - 10, - 15, - 20 - ], - "batch_size": 16 -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json deleted file mode 100644 index 480efa93..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json deleted file mode 100644 index 9b716fa9..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json deleted file mode 100644 index 95ba901c..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json deleted file mode 100644 index 9918f19e..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json deleted file mode 100644 index 7e10c385..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json deleted file mode 100644 index 6b8a6ac9..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json deleted file mode 100644 index 80fde7a0..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json deleted file mode 100644 index 3fb706a0..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json deleted file mode 100644 index 8d96e4e1..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json deleted file mode 100644 index 02605a05..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json deleted file mode 100644 index 32fc0de5..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json deleted file mode 100644 index 7fe61cc8..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json deleted file mode 100644 index 24d9b86c..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json deleted file mode 100644 index fc5f030a..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json deleted file mode 100644 index bb87e5c8..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json deleted file mode 100644 index af78a67d..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json deleted file mode 100644 index 91775a17..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json deleted file mode 100644 index 4a260259..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json deleted file mode 100644 index be6cfab3..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json deleted file mode 100644 index 2d768925..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json deleted file mode 100644 index bab7604d..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json deleted file mode 100644 index 9ad59995..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json deleted file mode 100644 index 3fb6461c..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json deleted file mode 100644 index 1e6ae3a4..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json deleted file mode 100644 index 3cb6dcbc..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json deleted file mode 100644 index 04c9d790..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json deleted file mode 100644 index 7c28c7ce..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json deleted file mode 100644 index 5080c761..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json deleted file mode 100644 index 17566b00..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json deleted file mode 100644 index 088ae095..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json b/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json deleted file mode 100644 index 9935ddaa..00000000 --- a/benchmark_results/20260411T170901Z/openai__text-embedding-3-small/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.67987818261633 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 73.13432835820896, - "mean_mrr": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6037587796312555 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 65.67164179104478, - "mean_mrr": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 68.65671641791045, - "mean_mrr": 0.5392065408252853 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.67987818261633 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "20": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/summary.json b/benchmark_results/20260411T170901Z/summary.json deleted file mode 100644 index 7492220f..00000000 --- a/benchmark_results/20260411T170901Z/summary.json +++ /dev/null @@ -1,263 +0,0 @@ -[ - { - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.67987818261633 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 73.13432835820896, - "mean_mrr": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6037587796312555 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 65.67164179104478, - "mean_mrr": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 68.65671641791045, - "mean_mrr": 0.5392065408252853 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.67987818261633 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "20": 30 - } - } -] \ No newline at end of file diff --git a/benchmark_results/20260411T170901Z/summary.md b/benchmark_results/20260411T170901Z/summary.md deleted file mode 100644 index e3fe3102..00000000 --- a/benchmark_results/20260411T170901Z/summary.md +++ /dev/null @@ -1,5 +0,0 @@ -# Repeated Embedding Benchmark Summary - -| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | -| --- | ---: | ---: | ---: | ---: | ---: | -| text-embedding-3-small | 30 | 0.25 | 20 | 88.06 | 0.6799 | diff --git a/benchmark_results/20260411T171331Z/metadata.json b/benchmark_results/20260411T171331Z/metadata.json deleted file mode 100644 index c80d2a95..00000000 --- a/benchmark_results/20260411T171331Z/metadata.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "created_at_utc": "20260411T171331Z", - "runs_per_model": 30, - "models": [ - "openai:text-embedding-3-large" - ], - "min_scores": [ - 0.25, - 0.3, - 0.35, - 0.4, - 0.5, - 0.6, - 0.7, - 0.75, - 0.8, - 0.85 - ], - "max_hits_values": [ - 5, - 10, - 15, - 20 - ], - "batch_size": 16 -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json deleted file mode 100644 index 3cb92bc5..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json deleted file mode 100644 index dce2186d..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json deleted file mode 100644 index 659629c6..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json deleted file mode 100644 index 646c1b2e..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json deleted file mode 100644 index 70a071a5..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json deleted file mode 100644 index 56ef83c1..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json deleted file mode 100644 index c195bb16..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json deleted file mode 100644 index cd8598d2..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json deleted file mode 100644 index 251bf9f5..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json deleted file mode 100644 index 4526d50b..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6266169154228856 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6266169154228856 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6292175486205337 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6292175486205337 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.559452736318408 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.559452736318408 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5620533695160561 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5620533695160561 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5186567164179104 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5186567164179104 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5199004975124378 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5199004975124378 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4626865671641791 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4626865671641791 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4639303482587065 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4639303482587065 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19776119402985073 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19776119402985073 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19776119402985073 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19776119402985073 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6292175486205337 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json deleted file mode 100644 index e6570cd6..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json deleted file mode 100644 index 57252912..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json deleted file mode 100644 index efe0209f..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json deleted file mode 100644 index 7b4e9b26..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json deleted file mode 100644 index e27a93b4..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json deleted file mode 100644 index 12ee964a..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json deleted file mode 100644 index 99038ee8..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json deleted file mode 100644 index 94f69a50..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json deleted file mode 100644 index 2881b67b..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json deleted file mode 100644 index 0d223549..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json deleted file mode 100644 index 5b3d8df7..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json deleted file mode 100644 index f93cfa38..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json deleted file mode 100644 index d901ca6c..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json deleted file mode 100644 index 7ac39e82..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json deleted file mode 100644 index f7bdf62a..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json deleted file mode 100644 index 585ee336..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json deleted file mode 100644 index 88c17b30..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json deleted file mode 100644 index 98eb25c9..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json deleted file mode 100644 index 344d33d5..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json deleted file mode 100644 index 3fa07e44..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json b/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json deleted file mode 100644 index e98c3e2b..00000000 --- a/benchmark_results/20260411T171331Z/openai__text-embedding-3-large/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6242122719734661 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6242122719734661 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5570480928689884 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5570480928689884 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5596487260666365 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5596487260666365 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5162520729684908 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5162520729684908 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174958540630182 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174958540630182 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.46028192371475957 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.46028192371475957 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4615257048092869 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4615257048092869 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/summary.json b/benchmark_results/20260411T171331Z/summary.json deleted file mode 100644 index 79ad0d22..00000000 --- a/benchmark_results/20260411T171331Z/summary.json +++ /dev/null @@ -1,263 +0,0 @@ -[ - { - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6242122719734661 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6242122719734661 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5570480928689884 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5570480928689884 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5596487260666365 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5596487260666365 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5162520729684908 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5162520729684908 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174958540630182 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174958540630182 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.46028192371475957 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.46028192371475957 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4615257048092869 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4615257048092869 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19535655058043117 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6268129051711141 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } - } -] \ No newline at end of file diff --git a/benchmark_results/20260411T171331Z/summary.md b/benchmark_results/20260411T171331Z/summary.md deleted file mode 100644 index e4ceadb6..00000000 --- a/benchmark_results/20260411T171331Z/summary.md +++ /dev/null @@ -1,5 +0,0 @@ -# Repeated Embedding Benchmark Summary - -| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | -| --- | ---: | ---: | ---: | ---: | ---: | -| text-embedding-3-large | 30 | 0.25 | 15 | 77.61 | 0.6268 | diff --git a/benchmark_results/20260411T172116Z/metadata.json b/benchmark_results/20260411T172116Z/metadata.json deleted file mode 100644 index 1bd0005a..00000000 --- a/benchmark_results/20260411T172116Z/metadata.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "created_at_utc": "20260411T172116Z", - "runs_per_model": 30, - "models": [ - "openai:text-embedding-3-small", - "openai:text-embedding-3-large", - "openai:text-embedding-ada-002" - ], - "min_scores": [ - 0.25, - 0.3, - 0.35, - 0.4, - 0.5, - 0.6, - 0.7, - 0.75, - 0.8, - 0.85 - ], - "max_hits_values": [ - 5, - 10, - 15, - 20 - ], - "batch_size": 16 -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json deleted file mode 100644 index 3cb92bc5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json deleted file mode 100644 index dce2186d..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json deleted file mode 100644 index 659629c6..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json deleted file mode 100644 index 646c1b2e..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json deleted file mode 100644 index 70a071a5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json deleted file mode 100644 index 56ef83c1..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json deleted file mode 100644 index c195bb16..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json deleted file mode 100644 index cd8598d2..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json deleted file mode 100644 index 251bf9f5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json deleted file mode 100644 index 152979ff..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json deleted file mode 100644 index e6570cd6..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json deleted file mode 100644 index 57252912..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json deleted file mode 100644 index efe0209f..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json deleted file mode 100644 index 7b4e9b26..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json deleted file mode 100644 index e27a93b4..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json deleted file mode 100644 index 12ee964a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json deleted file mode 100644 index 99038ee8..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json deleted file mode 100644 index 94f69a50..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json deleted file mode 100644 index 2881b67b..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json deleted file mode 100644 index 0d223549..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json deleted file mode 100644 index 5b3d8df7..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json deleted file mode 100644 index f93cfa38..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json deleted file mode 100644 index d901ca6c..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json deleted file mode 100644 index 7ac39e82..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json deleted file mode 100644 index f7bdf62a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json deleted file mode 100644 index 585ee336..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json deleted file mode 100644 index 88c17b30..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json deleted file mode 100644 index 98eb25c9..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json deleted file mode 100644 index 344d33d5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json deleted file mode 100644 index 3fa07e44..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 74.6268656716418, - "mean_reciprocal_rank": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 70.1492537313433, - "mean_reciprocal_rank": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 59.70149253731343, - "mean_reciprocal_rank": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 61.19402985074627, - "mean_reciprocal_rank": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 52.23880597014925, - "mean_reciprocal_rank": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 22.388059701492537, - "mean_reciprocal_rank": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6267299864314789 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json deleted file mode 100644 index f805217a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-large/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json deleted file mode 100644 index 306a8110..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json deleted file mode 100644 index 7f6ad4d1..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json deleted file mode 100644 index 93e7551d..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json deleted file mode 100644 index 83612db9..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json deleted file mode 100644 index 7e10c385..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json deleted file mode 100644 index 5694312b..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json deleted file mode 100644 index 80fde7a0..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json deleted file mode 100644 index c0bf202c..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json deleted file mode 100644 index 8f8cc815..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json deleted file mode 100644 index ffaa9eca..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json deleted file mode 100644 index 58a0e15f..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json deleted file mode 100644 index 7fe61cc8..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json deleted file mode 100644 index d60ca1d2..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json deleted file mode 100644 index fc5f030a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json deleted file mode 100644 index bb87e5c8..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json deleted file mode 100644 index 31c2f1f1..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json deleted file mode 100644 index 91775a17..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json deleted file mode 100644 index a71856e9..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json deleted file mode 100644 index 1d16bf56..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json deleted file mode 100644 index 2d768925..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json deleted file mode 100644 index 0d6641d9..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json deleted file mode 100644 index 0cd7e68a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json deleted file mode 100644 index 8a428bd4..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json deleted file mode 100644 index 7b5dea71..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json deleted file mode 100644 index 3cb6dcbc..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json deleted file mode 100644 index 04c9d790..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json deleted file mode 100644 index 7c28c7ce..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json deleted file mode 100644 index 5080c761..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037935323383085 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5392412935323383 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.679912935323383 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json deleted file mode 100644 index 17566b00..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json deleted file mode 100644 index 088ae095..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 86.56716417910447, - "mean_reciprocal_rank": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 73.13432835820896, - "mean_reciprocal_rank": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 77.61194029850746, - "mean_reciprocal_rank": 0.6037386596429617 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 65.67164179104478, - "mean_reciprocal_rank": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 67.16417910447761, - "mean_reciprocal_rank": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 68.65671641791045, - "mean_reciprocal_rank": 0.5391864208369915 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 53.73134328358209, - "mean_reciprocal_rank": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 17.91044776119403, - "mean_reciprocal_rank": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 2.9850746268656714, - "mean_reciprocal_rank": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 88.05970149253731, - "mean_reciprocal_rank": 0.6798580626280363 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json deleted file mode 100644 index caf1e862..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-3-small/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.6798726953467954 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 73.13432835820896, - "mean_mrr": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6037532923617208 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 65.67164179104478, - "mean_mrr": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 68.65671641791045, - "mean_mrr": 0.5392010535557507 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.6798726953467954 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "20": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json deleted file mode 100644 index f42ed99c..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_01.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 1, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json deleted file mode 100644 index 91d93052..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_02.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 2, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json deleted file mode 100644 index c612d42d..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_03.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 3, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json deleted file mode 100644 index 9221c946..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_04.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 4, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json deleted file mode 100644 index a91328fa..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_05.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 5, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json deleted file mode 100644 index 5a205dd3..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_06.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 6, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json deleted file mode 100644 index 45e7a7df..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_07.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 7, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json deleted file mode 100644 index 6db6beff..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_08.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 8, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json deleted file mode 100644 index 8c45ea9b..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_09.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 9, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json deleted file mode 100644 index 723a31ee..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_10.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 10, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json deleted file mode 100644 index c42d84a5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_11.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 11, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json deleted file mode 100644 index 6f2c60b5..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_12.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 12, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json deleted file mode 100644 index 29c6deb1..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_13.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 13, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json deleted file mode 100644 index 3d4286d3..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_14.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 14, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json deleted file mode 100644 index dacac5e0..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_15.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 15, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json deleted file mode 100644 index d22e1dc7..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_16.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 16, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json deleted file mode 100644 index 2c9c7a35..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_17.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 17, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json deleted file mode 100644 index d248c4b3..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_18.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 18, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json deleted file mode 100644 index 60a555e6..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_19.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 19, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json deleted file mode 100644 index aa9db928..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_20.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 20, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6333747927031509 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json deleted file mode 100644 index 88b2fc2a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_21.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 21, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json deleted file mode 100644 index 8919b4eb..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_22.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 22, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json deleted file mode 100644 index a313722a..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_23.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 23, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json deleted file mode 100644 index fb919a66..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_24.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 24, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json deleted file mode 100644 index af8bb9bb..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_25.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 25, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json deleted file mode 100644 index ec9fa263..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_26.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 26, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json deleted file mode 100644 index ffa32277..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_27.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 27, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json deleted file mode 100644 index a8548590..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_28.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 28, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json deleted file mode 100644 index dffe68de..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_29.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 29, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7490464344941957 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6333747927031509 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6343698175787728 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.7511895649955351 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json deleted file mode 100644 index df9701d3..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/run_30.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "run_index": 30, - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "message_count": 106, - "query_count": 67, - "rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.25, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.3, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.3, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.35, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.35, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.4, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.4, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.5, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.5, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.6, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.6, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 5, - "hit_rate": 91.04477611940298, - "mean_reciprocal_rank": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "hit_rate": 95.52238805970148, - "mean_reciprocal_rank": 0.7493129590144515 - }, - { - "min_score": 0.7, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.7, - "max_hits": 20, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - }, - { - "min_score": 0.75, - "max_hits": 5, - "hit_rate": 76.11940298507463, - "mean_reciprocal_rank": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "hit_rate": 80.59701492537313, - "mean_reciprocal_rank": 0.6336413172234068 - }, - { - "min_score": 0.75, - "max_hits": 15, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.75, - "max_hits": 20, - "hit_rate": 82.08955223880598, - "mean_reciprocal_rank": 0.6346363420990286 - }, - { - "min_score": 0.8, - "max_hits": 5, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "hit_rate": 47.76119402985074, - "mean_reciprocal_rank": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "hit_rate": 0.0, - "mean_reciprocal_rank": 0.0 - } - ], - "best_row": { - "min_score": 0.25, - "max_hits": 15, - "hit_rate": 98.50746268656717, - "mean_reciprocal_rank": 0.751456089515791 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json b/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json deleted file mode 100644 index 1476d1cc..00000000 --- a/benchmark_results/20260411T172116Z/openai__text-embedding-ada-002/summary.json +++ /dev/null @@ -1,261 +0,0 @@ -{ - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 80.59701492537313, - "mean_mrr": 0.6336235489220564 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346185737976783 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346185737976783 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } -} \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/summary.json b/benchmark_results/20260411T172116Z/summary.json deleted file mode 100644 index 264f31cd..00000000 --- a/benchmark_results/20260411T172116Z/summary.json +++ /dev/null @@ -1,785 +0,0 @@ -[ - { - "model_spec": "openai:text-embedding-3-small", - "resolved_model_name": "text-embedding-3-small", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.673134328358209 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 86.56716417910447, - "mean_mrr": 0.6789800995024876 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.6798726953467954 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 73.13432835820896, - "mean_mrr": 0.5985074626865672 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.6028606965174129 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6037532923617208 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 65.67164179104478, - "mean_mrr": 0.5358208955223881 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5383084577114428 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 68.65671641791045, - "mean_mrr": 0.5392010535557507 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.45 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 17.91044776119403, - "mean_mrr": 0.1417910447761194 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 2.9850746268656714, - "mean_mrr": 0.029850746268656716 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 88.05970149253731, - "mean_mrr": 0.6798726953467954 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "20": 30 - } - }, - { - "model_spec": "openai:text-embedding-3-large", - "resolved_model_name": "text-embedding-3-large", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 74.6268656716418, - "mean_mrr": 0.6241293532338309 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 67.16417910447761, - "mean_mrr": 0.5569651741293532 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5595658073270013 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 70.1492537313433, - "mean_mrr": 0.5595658073270013 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 59.70149253731343, - "mean_mrr": 0.5161691542288557 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174129353233831 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 61.19402985074627, - "mean_mrr": 0.5174129353233831 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 52.23880597014925, - "mean_mrr": 0.4601990049751244 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4614427860696517 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 53.73134328358209, - "mean_mrr": 0.4614427860696517 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 22.388059701492537, - "mean_mrr": 0.19527363184079602 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 77.61194029850746, - "mean_mrr": 0.6267299864314789 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } - }, - { - "model_spec": "openai:text-embedding-ada-002", - "resolved_model_name": "text-embedding-ada-002", - "run_count": 30, - "message_count": 106, - "query_count": 67, - "candidate_rows": [ - { - "min_score": 0.25, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.25, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.25, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.3, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.3, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.3, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.3, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.35, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.35, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.35, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.35, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.4, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.4, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.4, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.4, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.5, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.5, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.5, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.5, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.6, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.6, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.6, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.6, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.7, - "max_hits": 5, - "mean_hit_rate": 91.04477611940298, - "mean_mrr": 0.7430348258706467 - }, - { - "min_score": 0.7, - "max_hits": 10, - "mean_hit_rate": 95.52238805970148, - "mean_mrr": 0.7492951907131011 - }, - { - "min_score": 0.7, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.7, - "max_hits": 20, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - { - "min_score": 0.75, - "max_hits": 5, - "mean_hit_rate": 76.11940298507463, - "mean_mrr": 0.627363184079602 - }, - { - "min_score": 0.75, - "max_hits": 10, - "mean_hit_rate": 80.59701492537313, - "mean_mrr": 0.6336235489220564 - }, - { - "min_score": 0.75, - "max_hits": 15, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346185737976783 - }, - { - "min_score": 0.75, - "max_hits": 20, - "mean_hit_rate": 82.08955223880598, - "mean_mrr": 0.6346185737976783 - }, - { - "min_score": 0.8, - "max_hits": 5, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 10, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 15, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.8, - "max_hits": 20, - "mean_hit_rate": 47.76119402985074, - "mean_mrr": 0.4017412935323383 - }, - { - "min_score": 0.85, - "max_hits": 5, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 10, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 15, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - }, - { - "min_score": 0.85, - "max_hits": 20, - "mean_hit_rate": 0.0, - "mean_mrr": 0.0 - } - ], - "recommended_row": { - "min_score": 0.25, - "max_hits": 15, - "mean_hit_rate": 98.50746268656717, - "mean_mrr": 0.7514383212144407 - }, - "best_min_score_counts": { - "0.25": 30 - }, - "best_max_hits_counts": { - "15": 30 - } - } -] \ No newline at end of file diff --git a/benchmark_results/20260411T172116Z/summary.md b/benchmark_results/20260411T172116Z/summary.md deleted file mode 100644 index b9254232..00000000 --- a/benchmark_results/20260411T172116Z/summary.md +++ /dev/null @@ -1,7 +0,0 @@ -# Repeated Embedding Benchmark Summary - -| Model | Runs | Recommended min_score | Recommended max_hits | Mean hit rate | Mean MRR | -| --- | ---: | ---: | ---: | ---: | ---: | -| text-embedding-3-small | 30 | 0.25 | 20 | 88.06 | 0.6799 | -| text-embedding-3-large | 30 | 0.25 | 15 | 77.61 | 0.6267 | -| text-embedding-ada-002 | 30 | 0.25 | 15 | 98.51 | 0.7514 | From d8710f3abf86923a25743989bd81d34584fba08a Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Wed, 15 Apr 2026 21:17:07 +0530 Subject: [PATCH 8/9] Align embedding benchmark with true evals and document 0.7 message-text cutoff --- src/typeagent/aitools/vectorbase.py | 8 +- src/typeagent/knowpro/convsettings.py | 10 +- tests/test_benchmark_embeddings.py | 85 ++++++ tests/test_convsettings.py | 21 ++ tools/benchmark_embeddings.py | 413 ++++++++++++++++++++++++-- 5 files changed, 507 insertions(+), 30 deletions(-) create mode 100644 tests/test_benchmark_embeddings.py create mode 100644 tests/test_convsettings.py diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py index 633de054..805ae770 100644 --- a/src/typeagent/aitools/vectorbase.py +++ b/src/typeagent/aitools/vectorbase.py @@ -19,9 +19,9 @@ # These values come from repeated runs of the Adrian Tchaikovsky Episode 53 # search benchmark in `tools/benchmark_embeddings.py`, with raw outputs stored # under `benchmark_results/`. -# They are intended as repository defaults for known models, not universal -# truths; callers can always override `min_score` explicitly for their own use -# cases or models. +# They reflect that narrow retrieval benchmark only. Separate end-to-end evals +# have performed better with a stricter 0.7 cutoff in the message-text query +# path, so these values are not an answer-quality recommendation. MODEL_DEFAULT_MIN_SCORES: dict[str, float] = { "text-embedding-3-large": 0.25, "text-embedding-3-small": 0.25, @@ -256,7 +256,7 @@ def deserialize(self, data: NormalizedEmbeddings | None) -> None: return if self._embedding_size == 0: if data.ndim < 2 or data.shape[0] == 0: - # Empty data can't determine size; just clear. + # Empty data — can't determine size; just clear. self.clear() return self._set_embedding_size(data.shape[1]) diff --git a/src/typeagent/knowpro/convsettings.py b/src/typeagent/knowpro/convsettings.py index 9dbf1214..f7e3aaa1 100644 --- a/src/typeagent/knowpro/convsettings.py +++ b/src/typeagent/knowpro/convsettings.py @@ -10,6 +10,9 @@ from ..aitools.vectorbase import TextEmbeddingIndexSettings from .interfaces import IKnowledgeExtractor, IStorageProvider +DEFAULT_RELATED_TERM_MIN_SCORE = 0.85 +DEFAULT_MESSAGE_TEXT_MIN_SCORE = 0.7 + @dataclass class MessageTextIndexSettings: @@ -45,13 +48,16 @@ def __init__( # All settings share the same model, so they share the embedding cache. model = model or create_embedding_model() self.embedding_model = model - min_score = 0.85 + min_score = DEFAULT_RELATED_TERM_MIN_SCORE self.related_term_index_settings = RelatedTermIndexSettings( TextEmbeddingIndexSettings(model, min_score=min_score, max_matches=50) ) self.thread_settings = TextEmbeddingIndexSettings(model, min_score=min_score) self.message_text_index_settings = MessageTextIndexSettings( - TextEmbeddingIndexSettings(model, min_score=0.7) + # True end-to-end evals have performed better with 0.7 here than + # with the generic low-level VectorBase default from the narrow + # retrieval benchmark. + TextEmbeddingIndexSettings(model, min_score=DEFAULT_MESSAGE_TEXT_MIN_SCORE) ) self.semantic_ref_index_settings = SemanticRefIndexSettings( batch_size=4, # Effectively max concurrency diff --git a/tests/test_benchmark_embeddings.py b/tests/test_benchmark_embeddings.py new file mode 100644 index 00000000..94162928 --- /dev/null +++ b/tests/test_benchmark_embeddings.py @@ -0,0 +1,85 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import importlib.util +from pathlib import Path + +import pytest + +from typeagent.aitools.model_adapters import create_test_embedding_model + +MODULE_PATH = ( + Path(__file__).resolve().parent.parent / "tools" / "benchmark_embeddings.py" +) +SPEC = importlib.util.spec_from_file_location("benchmark_embeddings", MODULE_PATH) +assert SPEC is not None +assert SPEC.loader is not None +benchmark_embeddings = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(benchmark_embeddings) + +AnswerBenchmarkRow = benchmark_embeddings.AnswerBenchmarkRow +AnswerMetrics = benchmark_embeddings.AnswerMetrics +parse_float_list = benchmark_embeddings.parse_float_list +parse_int_list = benchmark_embeddings.parse_int_list +score_answer_pair = benchmark_embeddings.score_answer_pair +select_best_answer_row = benchmark_embeddings.select_best_answer_row + + +def test_parse_float_list_default_and_custom() -> None: + assert parse_float_list(None) + assert parse_float_list("0.25, 0.7") == [0.25, 0.7] + + +def test_parse_int_list_validates_positive_values() -> None: + assert parse_int_list("5,10") == [5, 10] + + +@pytest.mark.asyncio +async def test_score_answer_pair_exact_match() -> None: + model = create_test_embedding_model() + score = await score_answer_pair(model, ("Python", True), ("Python", True)) + assert score == 1.0 + + +@pytest.mark.asyncio +async def test_score_answer_pair_expected_answer_missing() -> None: + model = create_test_embedding_model() + score = await score_answer_pair(model, ("Python", True), ("No answer", False)) + assert score == 0.0 + + +@pytest.mark.asyncio +async def test_score_answer_pair_expected_no_answer_match() -> None: + model = create_test_embedding_model() + score = await score_answer_pair( + model, + ("No relevant info", False), + ("Still none", False), + ) + assert score == 1.001 + + +def test_select_best_answer_row_prefers_true_eval_metrics() -> None: + weaker = AnswerBenchmarkRow( + min_score=0.25, + max_hits=20, + metrics=AnswerMetrics( + mean_score=0.82, + exact_or_near_rate=60.0, + zero_score_rate=12.0, + zero_score_count=6, + ), + ) + stronger = AnswerBenchmarkRow( + min_score=0.7, + max_hits=10, + metrics=AnswerMetrics( + mean_score=0.91, + exact_or_near_rate=75.0, + zero_score_rate=4.0, + zero_score_count=2, + ), + ) + + best = select_best_answer_row([weaker, stronger]) + assert best is stronger diff --git a/tests/test_convsettings.py b/tests/test_convsettings.py new file mode 100644 index 00000000..4b83505c --- /dev/null +++ b/tests/test_convsettings.py @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from typeagent.aitools.model_adapters import create_test_embedding_model +from typeagent.knowpro.convsettings import ( + ConversationSettings, + DEFAULT_MESSAGE_TEXT_MIN_SCORE, + DEFAULT_RELATED_TERM_MIN_SCORE, +) + + +def test_conversation_settings_use_stricter_message_text_cutoff() -> None: + settings = ConversationSettings(model=create_test_embedding_model()) + + assert settings.related_term_index_settings.embedding_index_settings.min_score == ( + DEFAULT_RELATED_TERM_MIN_SCORE + ) + assert settings.thread_settings.min_score == DEFAULT_RELATED_TERM_MIN_SCORE + assert settings.message_text_index_settings.embedding_index_settings.min_score == ( + DEFAULT_MESSAGE_TEXT_MIN_SCORE + ) diff --git a/tools/benchmark_embeddings.py b/tools/benchmark_embeddings.py index 4358ea31..66dcf4aa 100644 --- a/tools/benchmark_embeddings.py +++ b/tools/benchmark_embeddings.py @@ -1,41 +1,78 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Benchmark retrieval settings for known embedding models. +"""Benchmark embedding settings on retrieval-only or true end-to-end evals. -This script evaluates the Adrian Tchaikovsky Episode 53 search dataset in -`tests/testdata/` and reports retrieval quality for combinations of -`min_score` and `max_hits`. +This script evaluates combinations of `min_score` and `max_hits` for the +Episode 53 dataset in `tests/testdata/`. -The benchmark is intentionally narrow: -- It only measures retrieval against `messageMatches` ground truth. -- It is meant to help choose repository defaults for known models. -- In practice, `min_score` is the primary library default this informs. -- It does not prove universal "best" settings for every dataset. +Two benchmark modes are supported: +- `answer` (default): run the full slow eval path used by `make eval` +- `retrieval`: run the narrower `messageMatches` retrieval benchmark + +The answer mode is the one to use when choosing settings for better final +answers. The retrieval mode is still useful for quick diagnostics, but it does +not prove that a row is best for end-to-end answer quality. Usage: uv run python tools/benchmark_embeddings.py + uv run python tools/benchmark_embeddings.py --mode retrieval uv run python tools/benchmark_embeddings.py --model openai:text-embedding-3-small """ import argparse import asyncio -from dataclasses import dataclass +from dataclasses import dataclass, replace import json from pathlib import Path from statistics import mean +import time +from typing import Literal from dotenv import load_dotenv +import typechat + +from typeagent.aitools import model_adapters, utils from typeagent.aitools.embeddings import IEmbeddingModel, NormalizedEmbeddings from typeagent.aitools.model_adapters import create_embedding_model from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase +from typeagent.knowpro import ( + answer_response_schema, + answers, + search_query_schema, + searchlang, + secindex, +) +from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.podcasts.podcast import Podcast +from typeagent.storage.memory.convthreads import ConversationThreads +from typeagent.storage.utils import create_storage_provider DEFAULT_MIN_SCORES = [0.25, 0.30, 0.35, 0.40, 0.50, 0.60, 0.70, 0.75, 0.80, 0.85] DEFAULT_MAX_HITS = [5, 10, 15, 20] DATA_DIR = Path("tests") / "testdata" INDEX_DATA_PATH = DATA_DIR / "Episode_53_AdrianTchaikovsky_index_data.json" +INDEX_PREFIX_PATH = DATA_DIR / "Episode_53_AdrianTchaikovsky_index" SEARCH_RESULTS_PATH = DATA_DIR / "Episode_53_Search_results.json" +ANSWER_RESULTS_PATH = DATA_DIR / "Episode_53_Answer_results.json" +DEFAULT_SEARCH_OPTIONS = searchlang.LanguageSearchOptions( + compile_options=searchlang.LanguageQueryCompileOptions( + exact_scope=False, + verb_scope=True, + term_filter=None, + apply_scope=True, + ), + exact_match=False, + max_message_matches=25, +) +DEFAULT_ANSWER_OPTIONS = answers.AnswerContextOptions( + entities_top_k=50, + topics_top_k=50, + messages_top_k=None, + chunking=None, +) +type BenchmarkMode = Literal["answer", "retrieval"] @dataclass @@ -44,6 +81,13 @@ class SearchQueryCase: expected_matches: list[int] +@dataclass +class AnswerQueryCase: + question: str + expected_answer: str + expected_success: bool + + @dataclass class SearchMetrics: hit_rate: float @@ -51,12 +95,38 @@ class SearchMetrics: @dataclass -class BenchmarkRow: +class AnswerMetrics: + mean_score: float + exact_or_near_rate: float + zero_score_rate: float + zero_score_count: int + + +@dataclass +class RetrievalBenchmarkRow: min_score: float max_hits: int metrics: SearchMetrics +@dataclass +class AnswerBenchmarkRow: + min_score: float + max_hits: int + metrics: AnswerMetrics + + +@dataclass +class TrueEvalContext: + conversation: Podcast + embedding_model: IEmbeddingModel + query_translator: typechat.TypeChatJsonTranslator[search_query_schema.SearchQuery] + answer_translator: typechat.TypeChatJsonTranslator[ + answer_response_schema.AnswerResponse + ] + settings: ConversationSettings + + def parse_float_list(raw: str | None) -> list[float]: if raw is None: return DEFAULT_MIN_SCORES @@ -100,6 +170,27 @@ def load_search_queries(repo_root: Path) -> list[SearchQueryCase]: return cases +def load_answer_queries(repo_root: Path) -> list[AnswerQueryCase]: + answer_data = json.loads( + (repo_root / ANSWER_RESULTS_PATH).read_text(encoding="utf-8") + ) + cases: list[AnswerQueryCase] = [] + for item in answer_data: + question = item.get("question") + answer = item.get("answer") + has_no_answer = item.get("hasNoAnswer") + if question is None or answer is None or has_no_answer is None: + continue + cases.append( + AnswerQueryCase( + question=question, + expected_answer=answer, + expected_success=not has_no_answer, + ) + ) + return cases + + async def build_vector_base( model_spec: str | None, message_texts: list[str], @@ -154,7 +245,147 @@ def evaluate_search_queries( ) -def select_best_row(rows: list[BenchmarkRow]) -> BenchmarkRow: +async def create_true_eval_context( + repo_root: Path, + model_spec: str | None, +) -> TrueEvalContext: + embedding_model = create_embedding_model(model_spec) + settings = ConversationSettings(model=embedding_model) + settings.storage_provider = await create_storage_provider( + settings.message_text_index_settings, + settings.related_term_index_settings, + message_type=None, + ) + + raw_data = Podcast._read_conversation_data_from_file( + str(repo_root / INDEX_PREFIX_PATH) + ) + raw_data.pop("messageIndexData", None) + raw_data.pop("relatedTermsIndexData", None) + + conversation = await Podcast.create(settings) + await conversation.deserialize(raw_data) + await secindex.build_secondary_indexes(conversation, settings) + + threads = ( + conversation.secondary_indexes.threads + if conversation.secondary_indexes is not None + else None + ) + if isinstance(threads, ConversationThreads) and threads.threads: + await threads.build_index() + + chat_model = model_adapters.create_chat_model() + query_translator = utils.create_translator( + chat_model, search_query_schema.SearchQuery + ) + answer_translator = utils.create_translator( + chat_model, + answer_response_schema.AnswerResponse, + ) + + return TrueEvalContext( + conversation=conversation, + embedding_model=embedding_model, + query_translator=query_translator, + answer_translator=answer_translator, + settings=settings, + ) + + +def answer_response_to_eval_tuple( + response: answer_response_schema.AnswerResponse, +) -> tuple[str, bool]: + match response.type: + case "Answered": + return response.answer or "", True + case "NoAnswer": + return response.why_no_answer or "", False + case _: + raise ValueError(f"Unexpected answer type: {response.type}") + + +async def score_answer_pair( + embedding_model: IEmbeddingModel, + expected: tuple[str, bool], + actual: tuple[str, bool], +) -> float: + expected_text, expected_success = expected + actual_text, actual_success = actual + + if expected_success != actual_success: + return 0.000 if expected_success else 0.001 + if not actual_success: + return 1.001 + if expected_text == actual_text: + return 1.000 + if expected_text.lower() == actual_text.lower(): + return 0.999 + + embeddings = await embedding_model.get_embeddings([expected_text, actual_text]) + assert embeddings.shape[0] == 2, "Expected two embeddings" + return float(embeddings[0] @ embeddings[1]) + + +async def evaluate_answer_queries( + context: TrueEvalContext, + query_cases: list[AnswerQueryCase], + min_score: float, + max_hits: int, +) -> AnswerMetrics: + context.settings.message_text_index_settings.embedding_index_settings.min_score = ( + min_score + ) + search_options = replace(DEFAULT_SEARCH_OPTIONS, max_message_matches=max_hits) + + scores: list[float] = [] + total = len(query_cases) + started_at = time.perf_counter() + for index, case in enumerate(query_cases, start=1): + if index == 1 or index % 5 == 0 or index == total: + elapsed = time.perf_counter() - started_at + print( + f" Question {index}/{total} " + f"(elapsed {elapsed:.1f}s): {case.question}", + flush=True, + ) + result = await searchlang.search_conversation_with_language( + context.conversation, + context.query_translator, + case.question, + search_options, + ) + if isinstance(result, typechat.Failure): + actual = (f"Search failed: {result.message}", False) + else: + _, combined_answer = await answers.generate_answers( + context.answer_translator, + result.value, + context.conversation, + case.question, + options=DEFAULT_ANSWER_OPTIONS, + ) + actual = answer_response_to_eval_tuple(combined_answer) + + expected = (case.expected_answer, case.expected_success) + scores.append( + await score_answer_pair(context.embedding_model, expected, actual) + ) + + zero_score_count = sum(1 for score in scores if score <= 0.0) + exact_or_near_count = sum(1 for score in scores if score >= 0.97) + + return AnswerMetrics( + mean_score=mean(scores), + exact_or_near_rate=(exact_or_near_count / len(scores)) * 100, + zero_score_rate=(zero_score_count / len(scores)) * 100, + zero_score_count=zero_score_count, + ) + + +def select_best_retrieval_row( + rows: list[RetrievalBenchmarkRow], +) -> RetrievalBenchmarkRow: return max( rows, key=lambda row: ( @@ -166,9 +397,22 @@ def select_best_row(rows: list[BenchmarkRow]) -> BenchmarkRow: ) -def print_rows(rows: list[BenchmarkRow]) -> None: +def select_best_answer_row(rows: list[AnswerBenchmarkRow]) -> AnswerBenchmarkRow: + return max( + rows, + key=lambda row: ( + row.metrics.mean_score, + -row.metrics.zero_score_count, + row.metrics.exact_or_near_rate, + -row.min_score, + -row.max_hits, + ), + ) + + +def print_retrieval_rows(rows: list[RetrievalBenchmarkRow]) -> None: print("=" * 72) - print("SEARCH BENCHMARK (Episode 53 messageMatches ground truth)") + print("RETRIEVAL BENCHMARK (Episode 53 messageMatches ground truth)") print("=" * 72) print(f"{'Min Score':<12} | {'Max Hits':<10} | {'Hit Rate (%)':<15} | {'MRR':<10}") print("-" * 65) @@ -181,15 +425,33 @@ def print_rows(rows: list[BenchmarkRow]) -> None: print("-" * 65) -async def run_benchmark( +def print_answer_rows(rows: list[AnswerBenchmarkRow]) -> None: + print("=" * 94) + print("TRUE EVAL BENCHMARK (Episode 53 full answer pipeline)") + print("=" * 94) + print( + f"{'Min Score':<12} | {'Max Hits':<10} | {'Mean Score':<12} | " + f"{'Exact/Near (%)':<15} | {'Zero Scores':<12} | {'Zero Rate (%)':<14}" + ) + print("-" * 94) + for row in rows: + print( + f"{row.min_score:<12.2f} | {row.max_hits:<10d} | " + f"{row.metrics.mean_score:<12.4f} | " + f"{row.metrics.exact_or_near_rate:<15.2f} | " + f"{row.metrics.zero_score_count:<12d} | " + f"{row.metrics.zero_score_rate:<14.2f}" + ) + print("-" * 94) + + +async def run_retrieval_benchmark( + repo_root: Path, model_spec: str | None, min_scores: list[float], max_hits_values: list[int], batch_size: int, ) -> None: - load_dotenv() - - repo_root = Path(__file__).resolve().parent.parent message_texts = load_message_texts(repo_root) query_cases = load_search_queries(repo_root) if not query_cases: @@ -197,7 +459,7 @@ async def run_benchmark( model, vector_base = await build_vector_base(model_spec, message_texts, batch_size) query_embeddings = await model.get_embeddings([case.query for case in query_cases]) - rows: list[BenchmarkRow] = [] + rows: list[RetrievalBenchmarkRow] = [] for min_score in min_scores: for max_hits in max_hits_values: metrics = evaluate_search_queries( @@ -207,27 +469,122 @@ async def run_benchmark( min_score, max_hits, ) - rows.append(BenchmarkRow(min_score, max_hits, metrics)) + rows.append(RetrievalBenchmarkRow(min_score, max_hits, metrics)) + print(f"Mode: retrieval") print(f"Model: {model.model_name}") print(f"Messages indexed: {len(message_texts)}") print(f"Queries evaluated: {len(query_cases)}") print() - print_rows(rows) + print_retrieval_rows(rows) - best_row = select_best_row(rows) + best_row = select_best_retrieval_row(rows) print() - print("Best-scoring benchmark row:") + print("Best-scoring retrieval row:") print(f" min_score={best_row.min_score:.2f}") print(f" max_hits={best_row.max_hits}") print(f" hit_rate={best_row.metrics.hit_rate:.2f}%") print(f" mrr={best_row.metrics.mean_reciprocal_rank:.4f}") +async def run_answer_benchmark( + repo_root: Path, + model_spec: str | None, + min_scores: list[float], + max_hits_values: list[int], + limit: int, +) -> None: + query_cases = load_answer_queries(repo_root) + if not query_cases: + raise ValueError("No answer eval cases found in the dataset") + if limit > 0: + query_cases = query_cases[:limit] + + context = await create_true_eval_context(repo_root, model_spec) + + rows: list[AnswerBenchmarkRow] = [] + for min_score in min_scores: + for max_hits in max_hits_values: + row_started_at = time.perf_counter() + print( + f"Evaluating min_score={min_score:.2f}, max_hits={max_hits}...", + flush=True, + ) + metrics = await evaluate_answer_queries( + context, + query_cases, + min_score, + max_hits, + ) + rows.append(AnswerBenchmarkRow(min_score, max_hits, metrics)) + row_elapsed = time.perf_counter() - row_started_at + print( + " Completed row: " + f"mean_score={metrics.mean_score:.4f}, " + f"zero_scores={metrics.zero_score_count}, " + f"exact_or_near_rate={metrics.exact_or_near_rate:.2f}% " + f"in {row_elapsed:.1f}s", + flush=True, + ) + + print() + print(f"Mode: answer") + print(f"Model: {context.embedding_model.model_name}") + print(f"Queries evaluated: {len(query_cases)}") + print() + print_answer_rows(rows) + + best_row = select_best_answer_row(rows) + print() + print("Best-scoring true-eval row:") + print(f" min_score={best_row.min_score:.2f}") + print(f" max_hits={best_row.max_hits}") + print(f" mean_score={best_row.metrics.mean_score:.4f}") + print(f" exact_or_near_rate={best_row.metrics.exact_or_near_rate:.2f}%") + print(f" zero_score_count={best_row.metrics.zero_score_count}") + print(f" zero_score_rate={best_row.metrics.zero_score_rate:.2f}%") + + +async def run_benchmark( + mode: BenchmarkMode, + model_spec: str | None, + min_scores: list[float], + max_hits_values: list[int], + batch_size: int, + limit: int, +) -> None: + load_dotenv() + repo_root = Path(__file__).resolve().parent.parent + + if mode == "retrieval": + await run_retrieval_benchmark( + repo_root, + model_spec, + min_scores, + max_hits_values, + batch_size, + ) + else: + await run_answer_benchmark( + repo_root, + model_spec, + min_scores, + max_hits_values, + limit, + ) + + def main() -> None: parser = argparse.ArgumentParser( description="Benchmark retrieval settings for an embedding model." ) + parser.add_argument( + "--mode", + type=str, + choices=["answer", "retrieval"], + default="answer", + help="Use 'answer' for the slow true eval path or 'retrieval' for the narrow messageMatches benchmark.", + ) parser.add_argument( "--model", type=str, @@ -250,16 +607,24 @@ def main() -> None: "--batch-size", type=int, default=16, - help="Batch size used when building the index.", + help="Batch size used when building the retrieval-only benchmark index.", + ) + parser.add_argument( + "--limit", + type=int, + default=0, + help="Number of true-eval questions to run (default: all). Ignored in retrieval mode.", ) args = parser.parse_args() asyncio.run( run_benchmark( + mode=args.mode, model_spec=args.model, min_scores=parse_float_list(args.min_scores), max_hits_values=parse_int_list(args.max_hits), batch_size=args.batch_size, + limit=args.limit, ) ) From 9a1315d18d66e42def41be9e57e764ff303cca97 Mon Sep 17 00:00:00 2001 From: shreejaykurhade Date: Thu, 16 Apr 2026 12:15:05 +0530 Subject: [PATCH 9/9] Update embedding benchmarks for true eval tuning and fix repeated benchmark imports --- tools/repeat_embedding_benchmarks.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/repeat_embedding_benchmarks.py b/tools/repeat_embedding_benchmarks.py index 5ac74062..48ca8d12 100644 --- a/tools/repeat_embedding_benchmarks.py +++ b/tools/repeat_embedding_benchmarks.py @@ -24,7 +24,6 @@ from dotenv import load_dotenv from benchmark_embeddings import ( - BenchmarkRow, build_vector_base, DEFAULT_MAX_HITS, DEFAULT_MIN_SCORES, @@ -33,7 +32,8 @@ load_search_queries, parse_float_list, parse_int_list, - select_best_row, + RetrievalBenchmarkRow, + select_best_retrieval_row, ) DEFAULT_MODELS = [ @@ -67,7 +67,7 @@ def sanitize_model_name(model_spec: str) -> str: return model_spec.replace(":", "__").replace("/", "_").replace("\\", "_") -def benchmark_row_to_run_row(row: BenchmarkRow) -> RunRow: +def benchmark_row_to_run_row(row: RetrievalBenchmarkRow) -> RunRow: return RunRow( min_score=row.min_score, max_hits=row.max_hits, @@ -176,7 +176,7 @@ async def run_single_model_benchmark( query_embeddings = await model.get_embeddings( [case.query for case in query_cases] ) - benchmark_rows: list[BenchmarkRow] = [] + benchmark_rows: list[RetrievalBenchmarkRow] = [] for min_score in min_scores: for max_hits in max_hits_values: metrics = evaluate_search_queries( @@ -186,9 +186,11 @@ async def run_single_model_benchmark( min_score, max_hits, ) - benchmark_rows.append(BenchmarkRow(min_score, max_hits, metrics)) + benchmark_rows.append( + RetrievalBenchmarkRow(min_score, max_hits, metrics) + ) - best_row = select_best_row(benchmark_rows) + best_row = select_best_retrieval_row(benchmark_rows) run_result = RunResult( run_index=run_index, model_spec=model_spec,