Skip to content

Commit 0eeafe7

Browse files
committed
fix: anchor first template literal at position 0, not rightmost occurrence
The R->L scan used rfind to locate the literal preceding a variable. When that literal is the first atom of the template and its text appears inside the variable's value, rfind lands on the occurrence inside the value rather than at position 0, leaving unconsumed characters and returning None. UriTemplate.parse("prefix-{id}").match("prefix-prefix-123") # returned None; regex returns {'id': 'prefix-123'} For templates without a greedy variable, the atom sequence IS the whole template, so atoms[0] is positionally fixed at URI position 0. _scan_suffix now takes an anchored flag: when set, the first-atom literal anchors at 0 rather than searching via rfind. Also: adjacent captures now skip the stop-char scan entirely since the result was discarded (start = pos). This drops the worst-case from O(n*v) to O(n + v) for the pathological all-adjacent-vars case (497ms -> 2ms for 256 vars against 64KB), and the module docstring now states the complexity accurately.
1 parent ba784d3 commit 0eeafe7

File tree

2 files changed

+60
-10
lines changed

2 files changed

+60
-10
lines changed

src/mcp/shared/uri_template.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
------------------
1414
1515
Matching is not specified by RFC 6570 (§1.4 explicitly defers to regex
16-
languages). This implementation uses a linear-time two-ended scan that
17-
never backtracks, so match time is O(n) in URI length regardless of
18-
template structure.
16+
languages). This implementation uses a two-ended scan that never
17+
backtracks: match time is O(n·v) where n is URI length and v is the
18+
number of template variables. Realistic templates have v < 10, making
19+
this effectively linear; there is no input that produces
20+
superpolynomial time.
1921
2022
A template may contain **at most one multi-segment variable** —
2123
``{+var}``, ``{#var}``, or an explode-modified variable (``{/var*}``,
@@ -521,7 +523,11 @@ def _scan(self, uri: str) -> dict[str, str | list[str]] | None:
521523
# vars take the minimum needed (rfind for the preceding literal).
522524
# This matches regex greedy-first semantics for templates without
523525
# a greedy var, and minimises the suffix claim when one exists.
524-
suffix = _scan_suffix(self._suffix, uri, n)
526+
# When there is no greedy var the suffix IS the whole template,
527+
# so its first atom must anchor at position 0 rather than
528+
# searching via rfind.
529+
anchored = self._greedy is None
530+
suffix = _scan_suffix(self._suffix, uri, n, anchored=anchored)
525531
if suffix is None:
526532
return None
527533
suffix_result, suffix_start = suffix
@@ -903,13 +909,19 @@ def _partition_greedy(atoms: list[_Atom], template: str) -> tuple[list[_Atom], V
903909
return atoms[:greedy_idx], greedy.var, atoms[greedy_idx + 1 :]
904910

905911

906-
def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str, str | list[str]], int] | None:
912+
def _scan_suffix(
913+
atoms: Sequence[_Atom], uri: str, end: int, *, anchored: bool
914+
) -> tuple[dict[str, str | list[str]], int] | None:
907915
"""Scan atoms right-to-left from ``end``, returning captures and start position.
908916
909917
Each bounded variable takes the minimum span that lets its
910918
preceding literal match (found via ``rfind``), which makes the
911919
*first* variable in template order greedy — identical to Python
912920
regex semantics for a sequence of greedy groups.
921+
922+
When ``anchored`` is true the atom sequence is the entire template
923+
(no greedy variable), so ``atoms[0]`` must match at URI position 0
924+
rather than at its rightmost occurrence.
913925
"""
914926
result: dict[str, str | list[str]] = {}
915927
pos = end
@@ -947,6 +959,14 @@ def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str,
947959
i -= 1
948960
continue
949961

962+
if isinstance(prev, _Cap):
963+
# Adjacent capture with no literal anchor: this (later)
964+
# var takes nothing, the earlier var takes the span. Skip
965+
# the stop-char scan entirely since the result is unused.
966+
result[var.name] = ""
967+
i -= 1
968+
continue
969+
950970
# Earliest valid start: the var cannot extend left past any
951971
# stop-char, so scan backward to find that boundary.
952972
earliest = pos
@@ -955,17 +975,21 @@ def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str,
955975

956976
if prev is None:
957977
start = earliest
958-
elif isinstance(prev, _Lit):
978+
elif anchored and i - 1 == 0:
979+
# First atom of the whole template: positionally fixed at
980+
# 0, not rightmost occurrence. rfind would land inside the
981+
# value when the literal repeats there (e.g. "prefix-{id}"
982+
# against "prefix-prefix-123").
983+
start = len(prev.text)
984+
if start < earliest or start > pos:
985+
return None
986+
else:
959987
# Rightmost occurrence of the preceding literal whose end
960988
# falls within the var's valid range.
961989
idx = uri.rfind(prev.text, 0, pos)
962990
if idx == -1 or idx + len(prev.text) < earliest:
963991
return None
964992
start = idx + len(prev.text)
965-
else:
966-
# Adjacent capture with no literal anchor: this (later)
967-
# var takes nothing, the earlier var takes the span.
968-
start = pos
969993

970994
result[var.name] = unquote(uri[start:pos])
971995
pos = start

tests/shared/test_uri_template.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,32 @@ def test_match_adjacent_vars_disambiguated_by_literal():
539539
assert t.match("foo-bar") == {"a": "foo", "b": "bar"}
540540

541541

542+
@pytest.mark.parametrize(
543+
("template", "variables"),
544+
[
545+
# Leading literal appears inside the value: must anchor at
546+
# position 0, not rfind to the rightmost occurrence.
547+
("prefix-{id}", {"id": "prefix-123"}),
548+
("u{s}", {"s": "xu"}),
549+
("_{x}", {"x": "_"}),
550+
("~{v}~", {"v": "~~~"}),
551+
# Multi-occurrence with two vars: rfind correctly picks the
552+
# rightmost literal BETWEEN vars, first literal anchors at 0.
553+
("L{a}L{b}", {"a": "xLy", "b": "z"}),
554+
# Leading literal with stop-char: earliest bound still applies.
555+
("api/{name}", {"name": "api"}),
556+
],
557+
)
558+
def test_match_leading_literal_appears_in_value(template: str, variables: dict[str, str]):
559+
# Regression: the R->L scan used rfind for the preceding literal,
560+
# which lands inside the value when the template's leading literal
561+
# is a substring of the expanded value. The first atom must anchor
562+
# at position 0, not search.
563+
t = UriTemplate.parse(template)
564+
uri = t.expand(variables)
565+
assert t.match(uri) == variables
566+
567+
542568
@pytest.mark.parametrize(
543569
("template", "uri", "expected"),
544570
[

0 commit comments

Comments
 (0)