Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions backend/app/adapters/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@
from backend.app.adapters.registry import (
SUPPORTED_ENVIRONMENTS as SUPPORTED_ENVIRONMENTS,
)
from backend.app.domain.study.normalizers import DEFAULT_NORMALIZER, normalize
from backend.app.domain.study.normalizers import (
DEFAULT_NORMALIZER,
normalize_pipeline,
steps_for_label,
)

logger = structlog.get_logger(__name__)

Expand Down Expand Up @@ -546,12 +550,14 @@ def render(

from backend.app.domain.query.render import render_template

# FR-3 pre-render hook (feat_query_normalization_tuning): pop the
# reserved query_normalizer off a LOCAL copy (never mutate the
# caller's dict) and apply it to query_text before it enters the
# Jinja context. Default "none" is a verbatim pass-through, so
# templates that never declare the key are unaffected. normalize()
# raises ValueError on an out-of-allowlist choice, which the existing
# Pre-render hook: pop the reserved query_normalizer off a LOCAL copy
# (never mutate the caller's dict) and apply it to query_text before it
# enters the Jinja context. Default "none" is a verbatim pass-through.
# The value is either a Phase-1 bundle string OR a typed-pipeline
# powerset label (feat_query_normalizer_typed_pipeline Story 1.4) — both
# resolve through steps_for_label -> normalize_pipeline, so a winning
# non-bundle label (e.g. "lowercase+strip_punctuation") applies correctly
# instead of raising. Bad tokens raise ValueError, which the existing
# trial-failure path subsumes.
local_params = dict(params)
choice = local_params.pop("query_normalizer", DEFAULT_NORMALIZER)
Expand All @@ -560,7 +566,7 @@ def render(
# so it fails through the existing render-failure path.
if not isinstance(choice, str):
raise ValueError(f"unknown normalizer: {choice!r}")
normalized_query_text = normalize(query_text, choice)
normalized_query_text = normalize_pipeline(query_text, steps_for_label(choice))

# query_normalizer is consumed here, so exclude it from the declared-vs-
# supplied check — it lives in declared_params but never in local_params.
Expand Down
19 changes: 12 additions & 7 deletions backend/app/adapters/solr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1113,18 +1113,23 @@ def render(
from jinja2 import UndefinedError

from backend.app.domain.query.render import render_template
from backend.app.domain.study.normalizers import DEFAULT_NORMALIZER, normalize
from backend.app.domain.study.normalizers import (
DEFAULT_NORMALIZER,
normalize_pipeline,
steps_for_label,
)

# FR-4 pre-render hook (identical algorithm to ElasticAdapter, FR-3):
# pop the reserved query_normalizer off a LOCAL copy and apply it to
# query_text before context construction. Runs BEFORE the LTR
# pre-flight + _pivot_to_solr_params steps below, which consume the
# rendered dict and are therefore downstream of normalization.
# Pre-render hook (identical algorithm to ElasticAdapter): pop the
# reserved query_normalizer off a LOCAL copy and apply it to query_text
# before context construction. Runs BEFORE the LTR pre-flight +
# _pivot_to_solr_params steps below. The value is a Phase-1 bundle string
# OR a typed-pipeline powerset label (feat_query_normalizer_typed_pipeline
# Story 1.4); both resolve through steps_for_label -> normalize_pipeline.
local_params = dict(params)
choice = local_params.pop("query_normalizer", DEFAULT_NORMALIZER)
if not isinstance(choice, str):
raise ValueError(f"unknown normalizer: {choice!r}")
normalized_query_text = normalize(query_text, choice)
normalized_query_text = normalize_pipeline(query_text, steps_for_label(choice))

# query_normalizer is consumed here; exclude it from the declared-vs-
# supplied check (declared but never present in local_params).
Expand Down
4 changes: 4 additions & 0 deletions backend/app/api/v1/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from backend.app.domain.study.normalizers import (
NormalizerChoiceInvalidError,
NormalizerParamShapeError,
NormalizerPipelineMisplacedError,
validate_normalizer_reservation,
)
from backend.app.domain.study.search_space import (
Expand Down Expand Up @@ -325,6 +326,9 @@ async def create_study(
# lookup, kept before the query_set/judgment_list resolution below.
try:
validate_normalizer_reservation(validated_space)
except NormalizerPipelineMisplacedError as exc:
# FR-8 (D-8): rides the existing INVALID_SEARCH_SPACE code — no new code.
raise _err(400, "INVALID_SEARCH_SPACE", str(exc), False) from exc
except NormalizerChoiceInvalidError as exc:
raise _err(400, "NORMALIZER_CHOICE_INVALID", str(exc), False) from exc
except NormalizerParamShapeError as exc:
Expand Down
9 changes: 8 additions & 1 deletion backend/app/domain/study/baseline_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,12 @@

from backend.app.db import repo
from backend.app.db.models import Study
from backend.app.domain.study.normalizers import DEFAULT_NORMALIZER
from backend.app.domain.study.search_space import (
CategoricalParam,
FloatParam,
IntParam,
NormalizerPipelineParam,
SearchSpace,
)

Expand Down Expand Up @@ -184,15 +186,20 @@ def _resolve_from_template_defaults(study: Study) -> dict[str, Any] | None:
return result


def _midpoint(param: FloatParam | IntParam | CategoricalParam) -> Any:
def _midpoint(param: FloatParam | IntParam | CategoricalParam | NormalizerPipelineParam) -> Any:
"""Deterministic mid-of-range per parameter kind.

- ``FloatParam`` with ``log=False``: arithmetic mean ``(low + high) / 2``.
- ``FloatParam`` with ``log=True``: geometric mean ``sqrt(low * high)``.
- ``IntParam``: integer division ``(low + high) // 2``.
- ``CategoricalParam``: ``choices[(len(choices) - 1) // 2]`` (lower
midpoint for even-cardinality lists).
- ``NormalizerPipelineParam``: the ``"none"`` label (empty-pipeline /
un-normalized baseline) — always a member of the param's powerset
label space, and consistent with ``compute_default_params`` (FR-7).
"""
if isinstance(param, NormalizerPipelineParam):
return DEFAULT_NORMALIZER
if isinstance(param, FloatParam):
if param.log:
return math.sqrt(param.low * param.high)
Expand Down
Loading
Loading