Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions src/typeagent/aitools/vectorbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,25 @@
)
from .model_adapters import create_embedding_model

DEFAULT_MIN_SCORE = 0.25

# Empirical defaults for built-in OpenAI embedding models.
# These values come from repeated runs of the Adrian Tchaikovsky Episode 53
# search benchmark in `tools/benchmark_embeddings.py`, with raw outputs stored
# under `benchmark_results/`.
# They reflect that narrow retrieval benchmark only. Separate end-to-end evals
# have performed better with a stricter 0.7 cutoff in the message-text query
# path, so these values are not an answer-quality recommendation.
MODEL_DEFAULT_MIN_SCORES: dict[str, float] = {
"text-embedding-3-large": 0.25,
"text-embedding-3-small": 0.25,
"text-embedding-ada-002": 0.25,
}


def get_default_min_score(model_name: str) -> float:
return MODEL_DEFAULT_MIN_SCORES.get(model_name, DEFAULT_MIN_SCORE)


@dataclass
class ScoredInt:
Expand All @@ -34,10 +53,12 @@ def __init__(
max_matches: int | None = None,
batch_size: int | None = None,
):
self.min_score = min_score if min_score is not None else 0.85
self.max_matches = max_matches if max_matches and max_matches >= 1 else None
self.batch_size = batch_size if batch_size and batch_size >= 1 else 8
self.embedding_model = embedding_model or create_embedding_model()
model_name = getattr(self.embedding_model, "model_name", "")
default_min_score = get_default_min_score(model_name)
self.min_score = min_score if min_score is not None else default_min_score
self.max_matches = max_matches # None means no limit
self.batch_size = batch_size if batch_size and batch_size >= 1 else 8


class VectorBase:
Expand Down
10 changes: 8 additions & 2 deletions src/typeagent/knowpro/convsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from ..aitools.vectorbase import TextEmbeddingIndexSettings
from .interfaces import IKnowledgeExtractor, IStorageProvider

DEFAULT_RELATED_TERM_MIN_SCORE = 0.85
DEFAULT_MESSAGE_TEXT_MIN_SCORE = 0.7


@dataclass
class MessageTextIndexSettings:
Expand Down Expand Up @@ -45,13 +48,16 @@ def __init__(
# All settings share the same model, so they share the embedding cache.
model = model or create_embedding_model()
self.embedding_model = model
min_score = 0.85
min_score = DEFAULT_RELATED_TERM_MIN_SCORE
self.related_term_index_settings = RelatedTermIndexSettings(
TextEmbeddingIndexSettings(model, min_score=min_score, max_matches=50)
)
self.thread_settings = TextEmbeddingIndexSettings(model, min_score=min_score)
self.message_text_index_settings = MessageTextIndexSettings(
TextEmbeddingIndexSettings(model, min_score=0.7)
# True end-to-end evals have performed better with 0.7 here than
# with the generic low-level VectorBase default from the narrow
# retrieval benchmark.
TextEmbeddingIndexSettings(model, min_score=DEFAULT_MESSAGE_TEXT_MIN_SCORE)
)
self.semantic_ref_index_settings = SemanticRefIndexSettings(
batch_size=4, # Effectively max concurrency
Expand Down
85 changes: 85 additions & 0 deletions tests/test_benchmark_embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import importlib.util
from pathlib import Path

import pytest

from typeagent.aitools.model_adapters import create_test_embedding_model

MODULE_PATH = (
Path(__file__).resolve().parent.parent / "tools" / "benchmark_embeddings.py"
)
SPEC = importlib.util.spec_from_file_location("benchmark_embeddings", MODULE_PATH)
assert SPEC is not None
assert SPEC.loader is not None
benchmark_embeddings = importlib.util.module_from_spec(SPEC)
SPEC.loader.exec_module(benchmark_embeddings)

AnswerBenchmarkRow = benchmark_embeddings.AnswerBenchmarkRow
AnswerMetrics = benchmark_embeddings.AnswerMetrics
parse_float_list = benchmark_embeddings.parse_float_list
parse_int_list = benchmark_embeddings.parse_int_list
score_answer_pair = benchmark_embeddings.score_answer_pair
select_best_answer_row = benchmark_embeddings.select_best_answer_row


def test_parse_float_list_default_and_custom() -> None:
assert parse_float_list(None)
assert parse_float_list("0.25, 0.7") == [0.25, 0.7]


def test_parse_int_list_validates_positive_values() -> None:
assert parse_int_list("5,10") == [5, 10]


@pytest.mark.asyncio
async def test_score_answer_pair_exact_match() -> None:
model = create_test_embedding_model()
score = await score_answer_pair(model, ("Python", True), ("Python", True))
assert score == 1.0


@pytest.mark.asyncio
async def test_score_answer_pair_expected_answer_missing() -> None:
model = create_test_embedding_model()
score = await score_answer_pair(model, ("Python", True), ("No answer", False))
assert score == 0.0


@pytest.mark.asyncio
async def test_score_answer_pair_expected_no_answer_match() -> None:
model = create_test_embedding_model()
score = await score_answer_pair(
model,
("No relevant info", False),
("Still none", False),
)
assert score == 1.001


def test_select_best_answer_row_prefers_true_eval_metrics() -> None:
weaker = AnswerBenchmarkRow(
min_score=0.25,
max_hits=20,
metrics=AnswerMetrics(
mean_score=0.82,
exact_or_near_rate=60.0,
zero_score_rate=12.0,
zero_score_count=6,
),
)
stronger = AnswerBenchmarkRow(
min_score=0.7,
max_hits=10,
metrics=AnswerMetrics(
mean_score=0.91,
exact_or_near_rate=75.0,
zero_score_rate=4.0,
zero_score_count=2,
),
)

best = select_best_answer_row([weaker, stronger])
assert best is stronger
21 changes: 21 additions & 0 deletions tests/test_convsettings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

from typeagent.aitools.model_adapters import create_test_embedding_model
from typeagent.knowpro.convsettings import (
ConversationSettings,
DEFAULT_MESSAGE_TEXT_MIN_SCORE,
DEFAULT_RELATED_TERM_MIN_SCORE,
)


def test_conversation_settings_use_stricter_message_text_cutoff() -> None:
settings = ConversationSettings(model=create_test_embedding_model())

assert settings.related_term_index_settings.embedding_index_settings.min_score == (
DEFAULT_RELATED_TERM_MIN_SCORE
)
assert settings.thread_settings.min_score == DEFAULT_RELATED_TERM_MIN_SCORE
assert settings.message_text_index_settings.embedding_index_settings.min_score == (
DEFAULT_MESSAGE_TEXT_MIN_SCORE
)
Loading
Loading