From 5f5e72b87a4d6b3954aa511b902105a3fba99ad1 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:01:06 +0000 Subject: [PATCH 01/47] feat: add RFC 6570 URI template utility with bidirectional support Adds `mcp.shared.uri_template.UriTemplate`, a standalone utility for parsing, expanding, and matching RFC 6570 URI templates. Supports Levels 1-3 fully plus path-style explode (`{/var*}`, `{.var*}`, `{;var*}`). Matching enforces structural integrity: decoded values are validated against their operator's permitted character set. A simple `{var}` whose decoded value contains `/` is rejected, preventing `%2F` smuggling while still allowing `/` in `{+var}` where it is intentional. This is the operator-aware generalization of the post-decode check for encoded path separators. Also fixes the existing regex-escaping gap where template literals like `.` were treated as regex wildcards. The utility lives in `shared/` so it is usable from both client code (expand) and server code (match), including lowlevel server implementations that do not use MCPServer. --- src/mcp/shared/uri_template.py | 603 ++++++++++++++++++++++++++++++ tests/shared/test_uri_template.py | 401 ++++++++++++++++++++ 2 files changed, 1004 insertions(+) create mode 100644 src/mcp/shared/uri_template.py create mode 100644 tests/shared/test_uri_template.py diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py new file mode 100644 index 000000000..668db3e8f --- /dev/null +++ b/src/mcp/shared/uri_template.py @@ -0,0 +1,603 @@ +"""RFC 6570 URI Templates with bidirectional support. + +Provides both expansion (template + variables → URI) and matching +(URI → variables). RFC 6570 only specifies expansion; matching is the +inverse operation needed by MCP servers to route ``resources/read`` +requests to handlers. + +Supports Levels 1-3 fully, plus Level 4 explode modifier for path-like +operators (``{/var*}``, ``{.var*}``, ``{;var*}``). The Level 4 prefix +modifier (``{var:N}``) and query-explode (``{?var*}``) are not supported. +""" + +from __future__ import annotations + +import re +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field +from typing import Literal, cast +from urllib.parse import quote, unquote + +__all__ = ["InvalidUriTemplate", "Operator", "UriTemplate", "Variable"] + +Operator = Literal["", "+", "#", ".", "/", ";", "?", "&"] + +_OPERATORS: frozenset[str] = frozenset({"+", "#", ".", "/", ";", "?", "&"}) + +# RFC 6570 §2.3: varname = varchar *(["."] varchar), varchar = ALPHA / DIGIT / "_" +# (Percent-encoded varchars are technically allowed but unseen in practice.) +_VARNAME_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.]*$") + +DEFAULT_MAX_TEMPLATE_LENGTH = 1_000_000 +DEFAULT_MAX_EXPRESSIONS = 10_000 + +# RFC 3986 reserved characters, kept unencoded by {+var} and {#var}. +_RESERVED = ":/?#[]@!$&'()*+,;=" + + +@dataclass(frozen=True) +class _OperatorSpec: + """Expansion behavior for a single operator (RFC 6570 §3.2, Table in §A).""" + + prefix: str + """Leading character emitted before the first variable.""" + separator: str + """Character between variables (and between exploded list items).""" + named: bool + """Emit ``name=value`` pairs (query/path-param style) rather than bare values.""" + allow_reserved: bool + """Keep reserved characters unencoded ({+var}, {#var}).""" + + +_OPERATOR_SPECS: dict[Operator, _OperatorSpec] = { + "": _OperatorSpec(prefix="", separator=",", named=False, allow_reserved=False), + "+": _OperatorSpec(prefix="", separator=",", named=False, allow_reserved=True), + "#": _OperatorSpec(prefix="#", separator=",", named=False, allow_reserved=True), + ".": _OperatorSpec(prefix=".", separator=".", named=False, allow_reserved=False), + "/": _OperatorSpec(prefix="/", separator="/", named=False, allow_reserved=False), + ";": _OperatorSpec(prefix=";", separator=";", named=True, allow_reserved=False), + "?": _OperatorSpec(prefix="?", separator="&", named=True, allow_reserved=False), + "&": _OperatorSpec(prefix="&", separator="&", named=True, allow_reserved=False), +} + +# Per-operator character class for regex matching. Each pattern matches +# the characters that can appear in an expanded value for that operator, +# stopping at the next structural delimiter. +_MATCH_PATTERN: dict[Operator, str] = { + "": r"[^/?#&,]+", # simple: everything structural is pct-encoded + "+": r"[^?#]+", # reserved: / allowed, stop at query/fragment + "#": r".+", # fragment: tail of URI + ".": r"[^./?#]+", # label: stop at next . + "/": r"[^/?#]+", # path segment: stop at next / + ";": r"[^;/?#]*", # path-param value (may be empty: ;name) + "?": r"[^&#]*", # query value (may be empty: ?name=) + "&": r"[^&#]*", # query-cont value +} + +# Characters that must not appear in a DECODED value for each operator. +# If %2F smuggles a / into a simple {var}, the decoded value violates +# the template author's declared structure and the match is rejected. +_STRUCTURAL_FORBIDDEN: dict[Operator, frozenset[str]] = { + "": frozenset("/?#&"), + "+": frozenset(), + "#": frozenset(), + ".": frozenset("./?#"), + "/": frozenset("/?#"), + ";": frozenset(";/?#"), + "?": frozenset("&#"), + "&": frozenset("&#"), +} + + +class InvalidUriTemplate(ValueError): + """Raised when a URI template string is malformed or unsupported. + + Attributes: + template: The template string that failed to parse. + position: Character offset where the error was detected, or None + if the error is not tied to a specific position. + """ + + def __init__(self, message: str, *, template: str, position: int | None = None) -> None: + super().__init__(message) + self.template = template + self.position = position + + +@dataclass(frozen=True) +class Variable: + """A single variable within a URI template expression.""" + + name: str + operator: Operator + explode: bool = False + + +@dataclass(frozen=True) +class _Expression: + """A parsed ``{...}`` expression: one operator, one or more variables.""" + + operator: Operator + variables: tuple[Variable, ...] + + +_Part = str | _Expression + + +def _is_str_sequence(value: object) -> bool: + """Check if value is a non-string sequence whose items are all strings.""" + if isinstance(value, str) or not isinstance(value, Sequence): + return False + seq = cast(Sequence[object], value) + return all(isinstance(item, str) for item in seq) + + +def _encode(value: str, *, allow_reserved: bool) -> str: + """Percent-encode a value per RFC 6570 §3.2.1. + + Simple expansion encodes everything except unreserved characters. + Reserved expansion ({+var}, {#var}) additionally keeps RFC 3986 + reserved characters intact. + """ + safe = _RESERVED if allow_reserved else "" + return quote(value, safe=safe) + + +def _expand_expression(expr: _Expression, variables: Mapping[str, str | Sequence[str]]) -> str: + """Expand a single ``{...}`` expression into its URI fragment. + + Walks the expression's variables, encoding and joining defined ones + according to the operator's spec. Undefined variables are skipped + (RFC 6570 §2.3); if all are undefined, the expression contributes + nothing (no prefix is emitted). + """ + spec = _OPERATOR_SPECS[expr.operator] + rendered: list[str] = [] + + for var in expr.variables: + if var.name not in variables: + # Undefined: skip entirely, no placeholder. + continue + + value = variables[var.name] + + # Explicit type guard: reject non-str scalars with a clear message + # rather than a confusing "not iterable" from the sequence branch. + if not isinstance(value, str) and not _is_str_sequence(value): + raise TypeError(f"Variable {var.name!r} must be str or a sequence of str, got {type(value).__name__}") + + if isinstance(value, str): + encoded = _encode(value, allow_reserved=spec.allow_reserved) + if spec.named: + # ; uses "name" for empty values, ?/& use "name=" (RFC §3.2.7-8) + if value == "" and expr.operator == ";": + rendered.append(var.name) + else: + rendered.append(f"{var.name}={encoded}") + else: + rendered.append(encoded) + else: + # Sequence value. + items = [_encode(v, allow_reserved=spec.allow_reserved) for v in value] + if not items: + continue + if var.explode: + # Each item gets the operator's separator; named ops repeat the key. + if spec.named: + rendered.append(spec.separator.join(f"{var.name}={v}" for v in items)) + else: + rendered.append(spec.separator.join(items)) + else: + # Non-explode: comma-join into a single value. + joined = ",".join(items) + rendered.append(f"{var.name}={joined}" if spec.named else joined) + + if not rendered: + return "" + return spec.prefix + spec.separator.join(rendered) + + +@dataclass(frozen=True) +class UriTemplate: + """A parsed RFC 6570 URI template. + + Construct via :meth:`parse`. Instances are immutable and hashable; + equality is based on the template string alone. + """ + + template: str + _parts: tuple[_Part, ...] = field(repr=False, compare=False) + _variables: tuple[Variable, ...] = field(repr=False, compare=False) + _pattern: re.Pattern[str] = field(repr=False, compare=False) + + @classmethod + def parse( + cls, + template: str, + *, + max_length: int = DEFAULT_MAX_TEMPLATE_LENGTH, + max_expressions: int = DEFAULT_MAX_EXPRESSIONS, + ) -> UriTemplate: + """Parse a URI template string. + + Args: + template: An RFC 6570 URI template. + max_length: Maximum permitted length of the template string. + Guards against resource exhaustion. + max_expressions: Maximum number of ``{...}`` expressions + permitted. Guards against pathological inputs that could + produce expensive regexes. + + Raises: + InvalidUriTemplate: If the template is malformed, exceeds the + size limits, or uses unsupported RFC 6570 features. + """ + if len(template) > max_length: + raise InvalidUriTemplate( + f"Template exceeds maximum length of {max_length}", + template=template, + ) + + parts, variables = _parse(template, max_expressions=max_expressions) + pattern = _build_pattern(parts) + return cls(template=template, _parts=parts, _variables=variables, _pattern=pattern) + + @property + def variables(self) -> tuple[Variable, ...]: + """All variables in the template, in order of appearance.""" + return self._variables + + @property + def variable_names(self) -> tuple[str, ...]: + """All variable names in the template, in order of appearance.""" + return tuple(v.name for v in self._variables) + + def expand(self, variables: Mapping[str, str | Sequence[str]]) -> str: + """Expand the template by substituting variable values. + + String values are percent-encoded according to their operator: + simple ``{var}`` encodes reserved characters; ``{+var}`` and + ``{#var}`` leave them intact. Sequence values are joined with + commas for non-explode variables, or with the operator's + separator for explode variables. + + Example:: + + >>> t = UriTemplate.parse("file://docs/{name}") + >>> t.expand({"name": "hello world.txt"}) + 'file://docs/hello%20world.txt' + + >>> t = UriTemplate.parse("file://docs/{+path}") + >>> t.expand({"path": "src/main.py"}) + 'file://docs/src/main.py' + + >>> t = UriTemplate.parse("/search{?q,lang}") + >>> t.expand({"q": "mcp", "lang": "en"}) + '/search?q=mcp&lang=en' + + >>> t = UriTemplate.parse("/files{/path*}") + >>> t.expand({"path": ["a", "b", "c"]}) + '/files/a/b/c' + + Args: + variables: Values for each template variable. Keys must be + strings; values must be ``str`` or a sequence of ``str``. + + Returns: + The expanded URI string. + + Note: + Per RFC 6570, variables absent from the mapping are + **silently omitted**. This is the correct behavior for + optional query parameters (``{?page}`` with no page yields + no ``?page=``), but for required path segments it produces + a structurally incomplete URI. If you need all variables + present, validate before calling:: + + missing = set(t.variable_names) - variables.keys() + if missing: + raise ValueError(f"Missing: {missing}") + + Raises: + TypeError: If a value is neither ``str`` nor an iterable of + ``str``. Non-string scalars (``int``, ``None``) are not + coerced. + """ + out: list[str] = [] + for part in self._parts: + if isinstance(part, str): + out.append(part) + else: + out.append(_expand_expression(part, variables)) + return "".join(out) + + def match(self, uri: str) -> dict[str, str | list[str]] | None: + """Match a concrete URI against this template and extract variables. + + This is the inverse of :meth:`expand`. The URI is matched against + a regex derived from the template; captured values are + percent-decoded and validated for structural integrity. + + **Structural integrity**: decoded values must not contain + characters that are structurally significant for their operator. + A simple ``{name}`` whose value decodes to contain ``/`` is + rejected — if that was intended, the template author should use + ``{+name}``. This blocks the ``%2F``-smuggling vector where a + client encodes a path separator to bypass single-segment + semantics. + + Example:: + + >>> t = UriTemplate.parse("file://docs/{name}") + >>> t.match("file://docs/readme.txt") + {'name': 'readme.txt'} + >>> t.match("file://docs/hello%20world.txt") + {'name': 'hello world.txt'} + >>> t.match("file://docs/..%2Fetc%2Fpasswd") is None # / in simple var + True + + >>> t = UriTemplate.parse("file://docs/{+path}") + >>> t.match("file://docs/src/main.py") + {'path': 'src/main.py'} + + >>> t = UriTemplate.parse("/files{/path*}") + >>> t.match("/files/a/b/c") + {'path': ['a', 'b', 'c']} + + Args: + uri: A concrete URI string. + + Returns: + A mapping from variable names to decoded values (``str`` for + scalar variables, ``list[str]`` for explode variables), or + ``None`` if the URI does not match the template or a decoded + value violates structural integrity. + """ + m = self._pattern.fullmatch(uri) + if m is None: + return None + + result: dict[str, str | list[str]] = {} + # One capture group per variable, emitted in template order. + for var, raw in zip(self._variables, m.groups()): + spec = _OPERATOR_SPECS[var.operator] + forbidden = _STRUCTURAL_FORBIDDEN[var.operator] + + if var.explode: + # Explode capture holds the whole run including separators, + # e.g. "/a/b/c". Split, decode each segment, check each. + if not raw: + result[var.name] = [] + continue + segments: list[str] = [] + for seg in raw.split(spec.separator): + if not seg: # leading separator produces an empty first item + continue + decoded = unquote(seg) + if any(c in decoded for c in forbidden): + return None + segments.append(decoded) + result[var.name] = segments + else: + decoded = unquote(raw) + # Structural integrity: reject if decoding revealed a + # delimiter the operator doesn't permit. + if any(c in decoded for c in forbidden): + return None + result[var.name] = decoded + + return result + + def __str__(self) -> str: + return self.template + + +def _build_pattern(parts: tuple[_Part, ...]) -> re.Pattern[str]: + """Compile a regex that matches URIs produced by this template. + + Walks parts in order: literals are ``re.escape``'d, expressions + become capture groups. One group is emitted per variable, in the + same order as ``UriTemplate._variables``, so ``match.groups()`` can + be zipped directly. + + Raises: + re.error: Only if pattern assembly is buggy — should not happen + for templates that passed :func:`_parse`. + """ + chunks: list[str] = [] + for part in parts: + if isinstance(part, str): + chunks.append(re.escape(part)) + else: + chunks.append(_expression_pattern(part)) + return re.compile("".join(chunks)) + + +def _expression_pattern(expr: _Expression) -> str: + """Build the regex fragment for a single ``{...}`` expression. + + Emits the operator's prefix, then one capture group per variable + separated by the operator's separator. Named operators (``; ? &``) + include ``name=`` before the capture. + """ + spec = _OPERATOR_SPECS[expr.operator] + body = _MATCH_PATTERN[expr.operator] + sep = re.escape(spec.separator) + prefix = re.escape(spec.prefix) + + pieces: list[str] = [] + for i, var in enumerate(expr.variables): + # First var gets the prefix; subsequent vars get the separator. + lead = prefix if i == 0 else sep + + if var.explode: + # Capture the whole run of separator+value repetitions. + # Non-greedy so a trailing literal can terminate the run. + pieces.append(f"((?:{sep}{body})*?)") + elif spec.named: + # ;name=val or ?name=val — the = is optional for ; with empty value + eq = "=?" if expr.operator == ";" else "=" + pieces.append(f"{lead}{re.escape(var.name)}{eq}({body})") + else: + pieces.append(f"{lead}({body})") + + return "".join(pieces) + + +def _parse(template: str, *, max_expressions: int) -> tuple[tuple[_Part, ...], tuple[Variable, ...]]: + """Split a template into an ordered sequence of literals and expressions. + + Walks the string, alternating between collecting literal runs and + parsing ``{...}`` expressions. The resulting ``parts`` sequence + preserves positional interleaving so ``match()`` and ``expand()`` can + walk it in order. + + Raises: + InvalidUriTemplate: On unclosed braces, too many expressions, or + any error surfaced by :func:`_parse_expression` or + :func:`_check_adjacent_explodes`. + """ + parts: list[_Part] = [] + variables: list[Variable] = [] + expression_count = 0 + i = 0 + n = len(template) + + while i < n: + # Find the next expression opener from the current cursor. + brace = template.find("{", i) + + if brace == -1: + # No more expressions; everything left is a trailing literal. + parts.append(template[i:]) + break + + if brace > i: + # Literal text between cursor and the brace. + parts.append(template[i:brace]) + + end = template.find("}", brace) + if end == -1: + raise InvalidUriTemplate( + f"Unclosed expression at position {brace}", + template=template, + position=brace, + ) + + expression_count += 1 + if expression_count > max_expressions: + raise InvalidUriTemplate( + f"Template exceeds maximum of {max_expressions} expressions", + template=template, + ) + + # Delegate body (between braces, exclusive) to the expression parser. + expr = _parse_expression(template, template[brace + 1 : end], brace) + parts.append(expr) + variables.extend(expr.variables) + + # Advance past the closing brace. + i = end + 1 + + _check_adjacent_explodes(template, parts) + return tuple(parts), tuple(variables) + + +def _parse_expression(template: str, body: str, pos: int) -> _Expression: + """Parse the body of a single ``{...}`` expression. + + The body is everything between the braces. It consists of an optional + leading operator character followed by one or more comma-separated + variable specifiers. Each specifier is a name with an optional + trailing ``*`` (explode modifier). + + Args: + template: The full template string, for error reporting. + body: The expression body, braces excluded. + pos: Character offset of the opening brace, for error reporting. + + Raises: + InvalidUriTemplate: On empty body, invalid variable names, or + unsupported modifiers. + """ + if not body: + raise InvalidUriTemplate(f"Empty expression at position {pos}", template=template, position=pos) + + # Peel off the operator, if any. Membership check justifies the cast. + operator: Operator = "" + if body[0] in _OPERATORS: + operator = cast(Operator, body[0]) + body = body[1:] + if not body: + raise InvalidUriTemplate( + f"Expression has operator but no variables at position {pos}", + template=template, + position=pos, + ) + + # Remaining body is comma-separated variable specs: name[*] + variables: list[Variable] = [] + for spec in body.split(","): + if ":" in spec: + raise InvalidUriTemplate( + f"Prefix modifier {{var:N}} is not supported (in {spec!r} at position {pos})", + template=template, + position=pos, + ) + + explode = spec.endswith("*") + name = spec[:-1] if explode else spec + + if not _VARNAME_RE.match(name): + raise InvalidUriTemplate( + f"Invalid variable name {name!r} at position {pos}", + template=template, + position=pos, + ) + + # Explode only makes sense for operators that repeat a separator. + # Simple/reserved/fragment have no per-item separator; query-explode + # needs order-agnostic dict matching which we don't support yet. + if explode and operator in ("", "+", "#", "?", "&"): + raise InvalidUriTemplate( + f"Explode modifier on {{{operator}{name}*}} is not supported for matching", + template=template, + position=pos, + ) + + variables.append(Variable(name=name, operator=operator, explode=explode)) + + return _Expression(operator=operator, variables=tuple(variables)) + + +def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None: + """Reject templates with adjacent same-operator explode variables. + + Patterns like ``{/a*}{/b*}`` are ambiguous for matching: given + ``/x/y/z``, the split between ``a`` and ``b`` is undetermined. We + reject these at parse time rather than picking an arbitrary + resolution. A literal between them (``{/a*}/x{/b*}``) or a different + operator (``{/a*}{.b*}``) disambiguates. + + Raises: + InvalidUriTemplate: If two explode variables with the same + operator appear with no literal or non-explode variable + between them. + """ + prev_explode_op: Operator | None = None + for part in parts: + if isinstance(part, str): + # Literal text breaks any adjacency. + prev_explode_op = None + continue + for var in part.variables: + if var.explode: + if prev_explode_op == var.operator: + raise InvalidUriTemplate( + f"Adjacent explode expressions with operator {var.operator!r} are ambiguous and not supported", + template=template, + ) + prev_explode_op = var.operator + else: + # A non-explode variable also breaks adjacency. + prev_explode_op = None diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py new file mode 100644 index 000000000..1238e4283 --- /dev/null +++ b/tests/shared/test_uri_template.py @@ -0,0 +1,401 @@ +"""Tests for RFC 6570 URI template parsing, expansion, and matching.""" + +import pytest + +from mcp.shared.uri_template import InvalidUriTemplate, UriTemplate, Variable + + +def test_parse_literal_only(): + tmpl = UriTemplate.parse("file://docs/readme.txt") + assert tmpl.variables == () + assert tmpl.variable_names == () + assert str(tmpl) == "file://docs/readme.txt" + + +def test_parse_simple_variable(): + tmpl = UriTemplate.parse("file://docs/{name}") + assert tmpl.variables == (Variable(name="name", operator=""),) + assert tmpl.variable_names == ("name",) + + +@pytest.mark.parametrize( + ("template", "operator"), + [ + ("{+path}", "+"), + ("{#frag}", "#"), + ("{.ext}", "."), + ("{/seg}", "/"), + ("{;param}", ";"), + ("{?q}", "?"), + ("{&next}", "&"), + ], +) +def test_parse_all_operators(template: str, operator: str): + tmpl = UriTemplate.parse(template) + (var,) = tmpl.variables + assert var.operator == operator + assert var.explode is False + + +def test_parse_multiple_variables_in_expression(): + tmpl = UriTemplate.parse("{?q,lang,page}") + assert tmpl.variable_names == ("q", "lang", "page") + assert all(v.operator == "?" for v in tmpl.variables) + + +def test_parse_multiple_expressions(): + tmpl = UriTemplate.parse("db://{table}/{id}{?format}") + assert tmpl.variable_names == ("table", "id", "format") + ops = [v.operator for v in tmpl.variables] + assert ops == ["", "", "?"] + + +def test_parse_explode_modifier(): + tmpl = UriTemplate.parse("/files{/path*}") + (var,) = tmpl.variables + assert var.name == "path" + assert var.operator == "/" + assert var.explode is True + + +@pytest.mark.parametrize("template", ["{.labels*}", "{;params*}"]) +def test_parse_explode_supported_operators(template: str): + tmpl = UriTemplate.parse(template) + assert tmpl.variables[0].explode is True + + +def test_parse_mixed_explode_and_plain(): + tmpl = UriTemplate.parse("{/path*}{?q}") + assert tmpl.variables == ( + Variable(name="path", operator="/", explode=True), + Variable(name="q", operator="?"), + ) + + +def test_parse_varname_with_dots_and_underscores(): + tmpl = UriTemplate.parse("{foo_bar.baz}") + assert tmpl.variable_names == ("foo_bar.baz",) + + +def test_parse_rejects_unclosed_expression(): + with pytest.raises(InvalidUriTemplate, match="Unclosed expression") as exc: + UriTemplate.parse("file://{name") + assert exc.value.position == 7 + assert exc.value.template == "file://{name" + + +def test_parse_rejects_empty_expression(): + with pytest.raises(InvalidUriTemplate, match="Empty expression"): + UriTemplate.parse("file://{}") + + +def test_parse_rejects_operator_without_variable(): + with pytest.raises(InvalidUriTemplate, match="operator but no variables"): + UriTemplate.parse("{+}") + + +@pytest.mark.parametrize("name", ["-bad", "bad-name", "bad name", "bad/name"]) +def test_parse_rejects_invalid_varname(name: str): + with pytest.raises(InvalidUriTemplate, match="Invalid variable name"): + UriTemplate.parse(f"{{{name}}}") + + +def test_parse_rejects_empty_spec_in_list(): + with pytest.raises(InvalidUriTemplate, match="Invalid variable name"): + UriTemplate.parse("{a,,b}") + + +def test_parse_rejects_prefix_modifier(): + with pytest.raises(InvalidUriTemplate, match="Prefix modifier"): + UriTemplate.parse("{var:3}") + + +@pytest.mark.parametrize("template", ["{var*}", "{+var*}", "{#var*}", "{?var*}", "{&var*}"]) +def test_parse_rejects_unsupported_explode(template: str): + with pytest.raises(InvalidUriTemplate, match="Explode modifier"): + UriTemplate.parse(template) + + +def test_parse_rejects_adjacent_explodes_same_operator(): + with pytest.raises(InvalidUriTemplate, match="Adjacent explode"): + UriTemplate.parse("{/a*}{/b*}") + + +def test_invalid_uri_template_is_value_error(): + with pytest.raises(ValueError): + UriTemplate.parse("{}") + + +@pytest.mark.parametrize( + "template", + [ + "{{name}}", # nested open: body becomes "{name" + "{a{b}c}", # brace inside expression + "{{]{}}{}", # garbage soup + "{a,{b}", # brace in comma list + ], +) +def test_parse_rejects_nested_braces(template: str): + # Nested/stray { inside an expression lands in the varname and + # fails the varname regex rather than needing special handling. + with pytest.raises(InvalidUriTemplate, match="Invalid variable name"): + UriTemplate.parse(template) + + +@pytest.mark.parametrize( + ("template", "position"), + [ + ("{", 0), + ("{{", 0), + ("file://{name", 7), + ("{a}{", 3), + ("}{", 1), # stray } is literal, then unclosed { + ], +) +def test_parse_rejects_unclosed_brace(template: str, position: int): + with pytest.raises(InvalidUriTemplate, match="Unclosed") as exc: + UriTemplate.parse(template) + assert exc.value.position == position + + +@pytest.mark.parametrize( + "template", + ["}}", "}", "a}b", "{a}}{b}"], +) +def test_parse_treats_stray_close_brace_as_literal(template: str): + # RFC 6570 is lenient about } outside expressions; most implementations + # (including the TypeScript SDK) treat it as a literal rather than erroring. + tmpl = UriTemplate.parse(template) + assert str(tmpl) == template + + +def test_parse_stray_close_brace_between_expressions(): + tmpl = UriTemplate.parse("{a}}{b}") + assert tmpl.variable_names == ("a", "b") + + +def test_parse_allows_adjacent_explodes_different_operator(): + tmpl = UriTemplate.parse("{/a*}{.b*}") + assert len(tmpl.variables) == 2 + + +def test_parse_allows_explode_separated_by_literal(): + tmpl = UriTemplate.parse("{/a*}/x{/b*}") + assert len(tmpl.variables) == 2 + + +def test_parse_rejects_oversized_template(): + with pytest.raises(InvalidUriTemplate, match="maximum length"): + UriTemplate.parse("x" * 101, max_length=100) + + +def test_parse_rejects_too_many_expressions(): + with pytest.raises(InvalidUriTemplate, match="maximum of"): + UriTemplate.parse("{a}" * 11, max_expressions=10) + + +def test_parse_custom_limits_allow_larger(): + tmpl = UriTemplate.parse("{a}" * 20, max_expressions=20) + assert len(tmpl.variables) == 20 + + +def test_equality_based_on_template_string(): + a = UriTemplate.parse("file://{name}") + b = UriTemplate.parse("file://{name}") + c = UriTemplate.parse("file://{other}") + assert a == b + assert a != c + assert hash(a) == hash(b) + + +def test_frozen(): + tmpl = UriTemplate.parse("{x}") + with pytest.raises(Exception): # noqa: B017 — FrozenInstanceError + tmpl.template = "changed" # type: ignore[misc] + + +@pytest.mark.parametrize( + ("template", "variables", "expected"), + [ + # Level 1: simple, encodes reserved chars + ("{var}", {"var": "value"}, "value"), + ("{var}", {"var": "hello world"}, "hello%20world"), + ("{var}", {"var": "a/b"}, "a%2Fb"), + ("file://docs/{name}", {"name": "readme.txt"}, "file://docs/readme.txt"), + # Level 2: reserved expansion keeps / ? # etc. + ("{+var}", {"var": "a/b/c"}, "a/b/c"), + ("{+var}", {"var": "a?b#c"}, "a?b#c"), + ("file://docs/{+path}", {"path": "src/main.py"}, "file://docs/src/main.py"), + # Level 2: fragment + ("{#var}", {"var": "section"}, "#section"), + ("{#var}", {"var": "a/b"}, "#a/b"), + # Level 3: label + ("file{.ext}", {"ext": "txt"}, "file.txt"), + # Level 3: path segment + ("{/seg}", {"seg": "docs"}, "/docs"), + # Level 3: path-style param + ("{;id}", {"id": "42"}, ";id=42"), + ("{;id}", {"id": ""}, ";id"), + # Level 3: query + ("{?q}", {"q": "search"}, "?q=search"), + ("{?q}", {"q": ""}, "?q="), + ("/search{?q,lang}", {"q": "mcp", "lang": "en"}, "/search?q=mcp&lang=en"), + # Level 3: query continuation + ("?a=1{&b}", {"b": "2"}, "?a=1&b=2"), + # Multi-var in one expression + ("{x,y}", {"x": "1", "y": "2"}, "1,2"), + ("{+x,y}", {"x": "a/b", "y": "c/d"}, "a/b,c/d"), + # Sequence values, non-explode (comma-join) + ("{/list}", {"list": ["a", "b", "c"]}, "/a,b,c"), + ("{?list}", {"list": ["a", "b"]}, "?list=a,b"), + # Explode: each item gets separator + ("{/path*}", {"path": ["a", "b", "c"]}, "/a/b/c"), + ("{.labels*}", {"labels": ["x", "y"]}, ".x.y"), + ("{;keys*}", {"keys": ["a", "b"]}, ";keys=a;keys=b"), + # Undefined variables omitted + ("{?q,page}", {"q": "x"}, "?q=x"), + ("{a}{b}", {"a": "x"}, "x"), + ("{?page}", {}, ""), + # Empty sequence omitted + ("{/path*}", {"path": []}, ""), + # Literal-only template + ("file://static", {}, "file://static"), + ], +) +def test_expand(template: str, variables: dict[str, str | list[str]], expected: str): + assert UriTemplate.parse(template).expand(variables) == expected + + +def test_expand_encodes_special_chars_in_simple(): + t = UriTemplate.parse("{v}") + assert t.expand({"v": "a&b=c"}) == "a%26b%3Dc" + + +def test_expand_preserves_special_chars_in_reserved(): + t = UriTemplate.parse("{+v}") + assert t.expand({"v": "a&b=c"}) == "a&b=c" + + +@pytest.mark.parametrize( + "value", + [42, None, 3.14, {"a": "b"}, ["ok", 42], b"bytes"], +) +def test_expand_rejects_invalid_value_types(value: object): + t = UriTemplate.parse("{v}") + with pytest.raises(TypeError, match="must be str or a sequence of str"): + t.expand({"v": value}) # type: ignore[dict-item] + + +@pytest.mark.parametrize( + ("template", "uri", "expected"), + [ + # Level 1: simple + ("{var}", "hello", {"var": "hello"}), + ("file://docs/{name}", "file://docs/readme.txt", {"name": "readme.txt"}), + ("{a}/{b}", "foo/bar", {"a": "foo", "b": "bar"}), + # Level 2: reserved allows / + ("file://docs/{+path}", "file://docs/src/main.py", {"path": "src/main.py"}), + ("{+var}", "a/b/c", {"var": "a/b/c"}), + # Level 2: fragment + ("page{#section}", "page#intro", {"section": "intro"}), + # Level 3: label + ("file{.ext}", "file.txt", {"ext": "txt"}), + # Level 3: path segment + ("api{/version}", "api/v1", {"version": "v1"}), + # Level 3: path-style param + ("item{;id}", "item;id=42", {"id": "42"}), + ("item{;id}", "item;id", {"id": ""}), + # Level 3: query + ("search{?q}", "search?q=hello", {"q": "hello"}), + ("search{?q}", "search?q=", {"q": ""}), + ("search{?q,lang}", "search?q=mcp&lang=en", {"q": "mcp", "lang": "en"}), + # Level 3: query continuation + ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), + # Explode: path segments as list + ("/files{/path*}", "/files/a/b/c", {"path": ["a", "b", "c"]}), + ("/files{/path*}", "/files", {"path": []}), + ("/files{/path*}/edit", "/files/a/b/edit", {"path": ["a", "b"]}), + # Explode: labels + ("host{.labels*}", "host.example.com", {"labels": ["example", "com"]}), + ], +) +def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): + assert UriTemplate.parse(template).match(uri) == expected + + +@pytest.mark.parametrize( + ("template", "uri"), + [ + ("file://docs/{name}", "file://other/readme.txt"), + ("{a}/{b}", "foo"), + ("file{.ext}", "file"), + ("search{?q}", "search"), + ("static", "different"), + ], +) +def test_match_no_match(template: str, uri: str): + assert UriTemplate.parse(template).match(uri) is None + + +def test_match_decodes_percent_encoding(): + t = UriTemplate.parse("file://docs/{name}") + assert t.match("file://docs/hello%20world.txt") == {"name": "hello world.txt"} + + +def test_match_escapes_template_literals(): + # Regression: previous impl didn't escape . in literals, making it + # a regex wildcard. "fileXtxt" should NOT match "file.txt/{id}". + t = UriTemplate.parse("file.txt/{id}") + assert t.match("file.txt/42") == {"id": "42"} + assert t.match("fileXtxt/42") is None + + +@pytest.mark.parametrize( + ("template", "uri"), + [ + # %2F in simple var — encoded-slash path traversal + ("file://docs/{name}", "file://docs/..%2F..%2Fetc%2Fpasswd"), + ("file://docs/{name}", "file://docs/..%2f..%2fetc%2fpasswd"), + # %3F (?) in simple var + ("{var}", "a%3Fb"), + # %2E (.) in label var — would break label structure + ("file{.ext}", "file.a%2Eb"), + # %2F in path-segment var + ("api{/v}", "api/a%2Fb"), + # %26 (&) in query var — would break query structure + ("search{?q}", "search?q=a%26b"), + ], +) +def test_match_structural_integrity_rejects_smuggled_delimiters(template: str, uri: str): + assert UriTemplate.parse(template).match(uri) is None + + +def test_match_structural_integrity_allows_slash_in_reserved(): + # {+var} explicitly permits / — structural check must not block it + t = UriTemplate.parse("{+path}") + assert t.match("a%2Fb") == {"path": "a/b"} + assert t.match("a/b") == {"path": "a/b"} + + +def test_match_structural_integrity_per_explode_segment(): + t = UriTemplate.parse("/files{/path*}") + # Each segment checked independently + assert t.match("/files/a%2Fb/c") is None + + +@pytest.mark.parametrize( + ("template", "variables"), + [ + ("{var}", {"var": "hello"}), + ("file://docs/{name}", {"name": "readme.txt"}), + ("file://docs/{+path}", {"path": "src/main.py"}), + ("search{?q,lang}", {"q": "mcp", "lang": "en"}), + ("file{.ext}", {"ext": "txt"}), + ("/files{/path*}", {"path": ["a", "b", "c"]}), + ("{var}", {"var": "hello world"}), + ], +) +def test_roundtrip_expand_then_match(template: str, variables: dict[str, str | list[str]]): + t = UriTemplate.parse(template) + uri = t.expand(variables) + assert t.match(uri) == variables From e5ecf50e642d6ed9a88f2947fa0e38ad0b91cb7e Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:16:42 +0000 Subject: [PATCH 02/47] feat: add filesystem path safety primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `mcp.shared.path_security` with three standalone utilities for defending against path-traversal attacks when URI template parameters flow into filesystem operations: - `contains_path_traversal()` — base-free component-level check for `..` escapes, handles both `/` and `\` separators - `is_absolute_path()` — detects POSIX, Windows drive, and UNC absolute paths (which silently discard the base in `Path` joins) - `safe_join()` — resolve-and-verify within a sandbox root; catches `..`, absolute injection, and symlink escapes These are pure functions usable from both MCPServer and lowlevel server implementations. `PathEscapeError(ValueError)` is raised by `safe_join` on violation. --- src/mcp/shared/path_security.py | 150 +++++++++++++++++++++++++++++ tests/shared/test_path_security.py | 142 +++++++++++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 src/mcp/shared/path_security.py create mode 100644 tests/shared/test_path_security.py diff --git a/src/mcp/shared/path_security.py b/src/mcp/shared/path_security.py new file mode 100644 index 000000000..23817b949 --- /dev/null +++ b/src/mcp/shared/path_security.py @@ -0,0 +1,150 @@ +"""Filesystem path safety primitives for resource handlers. + +These functions help MCP servers defend against path-traversal attacks +when extracted URI template parameters are used in filesystem +operations. They are standalone utilities usable from both the +high-level :class:`~mcp.server.mcpserver.MCPServer` and lowlevel server +implementations. + +The canonical safe pattern:: + + from mcp.shared.path_security import safe_join + + @mcp.resource("file://docs/{+path}") + def read_doc(path: str) -> str: + return safe_join("/data/docs", path).read_text() +""" + +from pathlib import Path + +__all__ = ["PathEscapeError", "contains_path_traversal", "is_absolute_path", "safe_join"] + + +class PathEscapeError(ValueError): + """Raised by :func:`safe_join` when the resolved path escapes the base.""" + + +def contains_path_traversal(value: str) -> bool: + r"""Check whether a value, treated as a relative path, escapes its origin. + + This is a **base-free** check: it does not know the sandbox root, so + it detects only whether ``..`` components would move above the + starting point. Use :func:`safe_join` when you know the root — it + additionally catches symlink escapes and absolute-path injection. + + The check is component-based: ``..`` is dangerous only as a + standalone path segment, not as a substring. Both ``/`` and ``\`` + are treated as separators. + + Example:: + + >>> contains_path_traversal("a/b/c") + False + >>> contains_path_traversal("../etc") + True + >>> contains_path_traversal("a/../../b") + True + >>> contains_path_traversal("a/../b") + False + >>> contains_path_traversal("1.0..2.0") + False + >>> contains_path_traversal("..") + True + + Args: + value: A string that may be used as a filesystem path. + + Returns: + ``True`` if the path would escape its starting directory. + """ + depth = 0 + for part in value.replace("\\", "/").split("/"): + if part == "..": + depth -= 1 + if depth < 0: + return True + elif part and part != ".": + depth += 1 + return False + + +def is_absolute_path(value: str) -> bool: + r"""Check whether a value is an absolute filesystem path. + + Absolute paths are dangerous when joined onto a base: in Python, + ``Path("/data") / "/etc/passwd"`` yields ``/etc/passwd`` — the + absolute right-hand side silently discards the base. + + Detects POSIX absolute (``/foo``), Windows drive (``C:\foo``), + and Windows UNC/absolute (``\\server\share``, ``\foo``). + + Example:: + + >>> is_absolute_path("relative/path") + False + >>> is_absolute_path("/etc/passwd") + True + >>> is_absolute_path("C:\\Windows") + True + >>> is_absolute_path("") + False + + Args: + value: A string that may be used as a filesystem path. + + Returns: + ``True`` if the path is absolute on any common platform. + """ + if not value: + return False + if value[0] in ("/", "\\"): + return True + # Windows drive letter: C:, C:\, C:/ + if len(value) >= 2 and value[1] == ":" and value[0].isalpha(): + return True + return False + + +def safe_join(base: str | Path, *parts: str) -> Path: + """Join path components onto a base, rejecting escapes. + + Resolves the joined path and verifies it remains within ``base``. + This is the **gold-standard** check: it catches ``..`` traversal, + absolute-path injection, and symlink escapes that the base-free + checks cannot. + + Example:: + + >>> safe_join("/data/docs", "readme.txt") + PosixPath('/data/docs/readme.txt') + >>> safe_join("/data/docs", "../../../etc/passwd") + Traceback (most recent call last): + ... + PathEscapeError: ... + + Args: + base: The sandbox root. May be relative; it will be resolved. + parts: Path components to join. Each is checked for absolute + form before joining. + + Returns: + The resolved path, guaranteed to be within ``base``. + + Raises: + PathEscapeError: If any part is absolute, or if the resolved + path is not contained within the resolved base. + """ + base_resolved = Path(base).resolve() + + # Reject absolute parts up front: Path's / operator would silently + # discard everything to the left of an absolute component. + for part in parts: + if is_absolute_path(part): + raise PathEscapeError(f"Path component {part!r} is absolute; refusing to join onto {base_resolved}") + + target = base_resolved.joinpath(*parts).resolve() + + if not target.is_relative_to(base_resolved): + raise PathEscapeError(f"Path {target} escapes base {base_resolved}") + + return target diff --git a/tests/shared/test_path_security.py b/tests/shared/test_path_security.py new file mode 100644 index 000000000..fc3bd9091 --- /dev/null +++ b/tests/shared/test_path_security.py @@ -0,0 +1,142 @@ +"""Tests for filesystem path safety primitives.""" + +from pathlib import Path + +import pytest + +from mcp.shared.path_security import ( + PathEscapeError, + contains_path_traversal, + is_absolute_path, + safe_join, +) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + # Safe: no traversal + ("a/b/c", False), + ("readme.txt", False), + ("", False), + (".", False), + ("./a/b", False), + # Safe: .. balanced by prior descent + ("a/../b", False), + ("a/b/../c", False), + ("a/b/../../c", False), + # Unsafe: net escape + ("..", True), + ("../etc", True), + ("../../etc/passwd", True), + ("a/../../b", True), + ("./../../etc", True), + # .. as substring, not component — safe + ("1.0..2.0", False), + ("foo..bar", False), + ("..foo", False), + ("foo..", False), + # Backslash separator + ("..\\etc", True), + ("a\\..\\..\\b", True), + ("a\\b\\c", False), + # Mixed separators + ("a/..\\..\\b", True), + ], +) +def test_contains_path_traversal(value: str, expected: bool): + assert contains_path_traversal(value) is expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + # Relative + ("relative/path", False), + ("file.txt", False), + ("", False), + (".", False), + ("..", False), + # POSIX absolute + ("/", True), + ("/etc/passwd", True), + ("/a", True), + # Windows drive + ("C:", True), + ("C:\\Windows", True), + ("c:/foo", True), + ("Z:\\", True), + # Windows UNC / backslash-absolute + ("\\\\server\\share", True), + ("\\foo", True), + # Not a drive: digit before colon + ("1:foo", False), + # Colon not in position 1 + ("ab:c", False), + ], +) +def test_is_absolute_path(value: str, expected: bool): + assert is_absolute_path(value) is expected + + +def test_safe_join_simple(tmp_path: Path): + result = safe_join(tmp_path, "docs", "readme.txt") + assert result == tmp_path / "docs" / "readme.txt" + + +def test_safe_join_resolves_relative_base(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(tmp_path) + result = safe_join(".", "file.txt") + assert result == tmp_path / "file.txt" + + +def test_safe_join_rejects_dotdot_escape(tmp_path: Path): + with pytest.raises(PathEscapeError, match="escapes base"): + safe_join(tmp_path, "../../../etc/passwd") + + +def test_safe_join_rejects_balanced_then_escape(tmp_path: Path): + with pytest.raises(PathEscapeError, match="escapes base"): + safe_join(tmp_path, "a/../../etc") + + +def test_safe_join_allows_balanced_dotdot(tmp_path: Path): + result = safe_join(tmp_path, "a/../b") + assert result == tmp_path / "b" + + +def test_safe_join_rejects_absolute_part(tmp_path: Path): + with pytest.raises(PathEscapeError, match="is absolute"): + safe_join(tmp_path, "/etc/passwd") + + +def test_safe_join_rejects_absolute_in_later_part(tmp_path: Path): + with pytest.raises(PathEscapeError, match="is absolute"): + safe_join(tmp_path, "docs", "/etc/passwd") + + +def test_safe_join_rejects_windows_drive(tmp_path: Path): + with pytest.raises(PathEscapeError, match="is absolute"): + safe_join(tmp_path, "C:\\Windows\\System32") + + +def test_safe_join_rejects_symlink_escape(tmp_path: Path): + outside = tmp_path / "outside" + outside.mkdir() + sandbox = tmp_path / "sandbox" + sandbox.mkdir() + (sandbox / "escape").symlink_to(outside) + + with pytest.raises(PathEscapeError, match="escapes base"): + safe_join(sandbox, "escape", "secret.txt") + + +def test_safe_join_base_equals_target(tmp_path: Path): + # Joining nothing (or ".") should return the base itself + assert safe_join(tmp_path) == tmp_path + assert safe_join(tmp_path, ".") == tmp_path + + +def test_path_escape_error_is_value_error(): + with pytest.raises(ValueError): + safe_join("/tmp", "/etc") From 0018eea38feebd6b73e6da502e5121bb13aa0d7a Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:40:14 +0000 Subject: [PATCH 03/47] feat: integrate UriTemplate into MCPServer resource templates Refactors the internal `ResourceTemplate` to use the RFC 6570 `UriTemplate` engine for matching, and adds a configurable `ResourceSecurity` policy for path-safety checks on extracted parameters. `ResourceTemplate.matches()` now: - Delegates to `UriTemplate.match()` for full RFC 6570 Level 1-3 support (plus path-style explode). `{+path}` can match multi-segment paths. - Enforces structural integrity: `%2F` smuggled into a simple `{var}` is rejected. - Applies `ResourceSecurity` policy: path traversal (`..` components) and absolute paths rejected by default, with per-parameter exemption available. The `@mcp.resource()` decorator now parses the template once at decoration time via `UriTemplate.parse()`, replacing the regex-based param extraction that couldn't handle operators like `{+path}`. Malformed templates surface immediately with a clear `InvalidUriTemplate` including position info. Also fixes the pre-existing bug where template literals were not regex-escaped (a `.` in the template acted as a wildcard). --- .../mcpserver/resources/resource_manager.py | 4 +- .../server/mcpserver/resources/templates.py | 105 +++++++++++++++--- src/mcp/server/mcpserver/server.py | 20 +++- .../resources/test_resource_template.py | 71 ++++++++++++ tests/server/mcpserver/test_server.py | 18 +++ 5 files changed, 198 insertions(+), 20 deletions(-) diff --git a/src/mcp/server/mcpserver/resources/resource_manager.py b/src/mcp/server/mcpserver/resources/resource_manager.py index 6bf17376d..b1aab50f5 100644 --- a/src/mcp/server/mcpserver/resources/resource_manager.py +++ b/src/mcp/server/mcpserver/resources/resource_manager.py @@ -8,7 +8,7 @@ from pydantic import AnyUrl from mcp.server.mcpserver.resources.base import Resource -from mcp.server.mcpserver.resources.templates import ResourceTemplate +from mcp.server.mcpserver.resources.templates import DEFAULT_RESOURCE_SECURITY, ResourceSecurity, ResourceTemplate from mcp.server.mcpserver.utilities.logging import get_logger from mcp.types import Annotations, Icon @@ -64,6 +64,7 @@ def add_template( icons: list[Icon] | None = None, annotations: Annotations | None = None, meta: dict[str, Any] | None = None, + security: ResourceSecurity = DEFAULT_RESOURCE_SECURITY, ) -> ResourceTemplate: """Add a template from a function.""" template = ResourceTemplate.from_function( @@ -76,6 +77,7 @@ def add_template( icons=icons, annotations=annotations, meta=meta, + security=security, ) self._templates[template.uri_template] = template return template diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index 2d612657c..d7d9214c4 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -3,16 +3,17 @@ from __future__ import annotations import inspect -import re -from collections.abc import Callable +from collections.abc import Callable, Mapping +from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any -from urllib.parse import unquote from pydantic import BaseModel, Field, validate_call from mcp.server.mcpserver.resources.types import FunctionResource, Resource from mcp.server.mcpserver.utilities.context_injection import find_context_parameter, inject_context from mcp.server.mcpserver.utilities.func_metadata import func_metadata +from mcp.shared.path_security import contains_path_traversal, is_absolute_path +from mcp.shared.uri_template import UriTemplate from mcp.types import Annotations, Icon if TYPE_CHECKING: @@ -20,6 +21,64 @@ from mcp.server.mcpserver.context import Context +@dataclass(frozen=True) +class ResourceSecurity: + """Security policy applied to extracted resource template parameters. + + These checks run **after** :meth:`~mcp.shared.uri_template.UriTemplate.match` + has already enforced structural integrity (e.g., rejected ``%2F`` in + simple ``{var}``). They catch semantic attacks that structural checks + cannot: ``..`` traversal and absolute-path injection work even with + perfectly-formed URI components. + + Example:: + + # Opt out for a parameter that legitimately contains .. + @mcp.resource( + "git://diff/{+range}", + security=ResourceSecurity(exempt_params=frozenset({"range"})), + ) + def git_diff(range: str) -> str: ... + """ + + reject_path_traversal: bool = True + """Reject values containing ``..`` as a path component.""" + + reject_absolute_paths: bool = True + """Reject values that look like absolute filesystem paths.""" + + exempt_params: frozenset[str] = field(default_factory=frozenset[str]) + """Parameter names to skip all checks for.""" + + def validate(self, params: Mapping[str, str | list[str]]) -> bool: + """Check all parameter values against the configured policy. + + Args: + params: Extracted template parameters. List values (from + explode variables) are checked element-wise. + + Returns: + ``True`` if all values pass; ``False`` on first violation. + """ + for name, value in params.items(): + if name in self.exempt_params: + continue + values = value if isinstance(value, list) else [value] + for v in values: + if self.reject_path_traversal and contains_path_traversal(v): + return False + if self.reject_absolute_paths and is_absolute_path(v): + return False + return True + + +DEFAULT_RESOURCE_SECURITY = ResourceSecurity() +"""Secure-by-default policy: traversal and absolute paths rejected.""" + +UNSAFE_RESOURCE_SECURITY = ResourceSecurity(reject_path_traversal=False, reject_absolute_paths=False) +"""No path checks. Use only when parameters are never used as filesystem paths.""" + + class ResourceTemplate(BaseModel): """A template for dynamically creating resources.""" @@ -34,6 +93,8 @@ class ResourceTemplate(BaseModel): fn: Callable[..., Any] = Field(exclude=True) parameters: dict[str, Any] = Field(description="JSON schema for function parameters") context_kwarg: str | None = Field(None, description="Name of the kwarg that should receive context") + parsed_template: UriTemplate = Field(exclude=True, description="Parsed RFC 6570 template") + security: ResourceSecurity = Field(exclude=True, description="Path-safety policy for extracted parameters") @classmethod def from_function( @@ -48,12 +109,20 @@ def from_function( annotations: Annotations | None = None, meta: dict[str, Any] | None = None, context_kwarg: str | None = None, + security: ResourceSecurity = DEFAULT_RESOURCE_SECURITY, ) -> ResourceTemplate: - """Create a template from a function.""" + """Create a template from a function. + + Raises: + InvalidUriTemplate: If ``uri_template`` is malformed or uses + unsupported RFC 6570 features. + """ func_name = name or fn.__name__ if func_name == "": raise ValueError("You must provide a name for lambda functions") # pragma: no cover + parsed = UriTemplate.parse(uri_template) + # Find context parameter if it exists if context_kwarg is None: # pragma: no branch context_kwarg = find_context_parameter(fn) @@ -80,20 +149,28 @@ def from_function( fn=fn, parameters=parameters, context_kwarg=context_kwarg, + parsed_template=parsed, + security=security, ) - def matches(self, uri: str) -> dict[str, Any] | None: - """Check if URI matches template and extract parameters. + def matches(self, uri: str) -> dict[str, str | list[str]] | None: + """Check if a URI matches this template and extract parameters. + + Delegates to :meth:`UriTemplate.match` for RFC 6570 matching + with structural integrity (``%2F`` smuggling rejected for simple + vars), then applies this template's :class:`ResourceSecurity` + policy (path traversal, absolute paths). - Extracted parameters are URL-decoded to handle percent-encoded characters. + Returns: + Extracted parameters on success, or ``None`` if the URI + doesn't match or a parameter fails security validation. """ - # Convert template to regex pattern - pattern = self.uri_template.replace("{", "(?P<").replace("}", ">[^/]+)") - match = re.match(f"^{pattern}$", uri) - if match: - # URL-decode all extracted parameter values - return {key: unquote(value) for key, value in match.groupdict().items()} - return None + params = self.parsed_template.match(uri) + if params is None: + return None + if not self.security.validate(params): + return None + return params async def create_resource( self, diff --git a/src/mcp/server/mcpserver/server.py b/src/mcp/server/mcpserver/server.py index 2a7a58117..c40c643c5 100644 --- a/src/mcp/server/mcpserver/server.py +++ b/src/mcp/server/mcpserver/server.py @@ -5,7 +5,6 @@ import base64 import inspect import json -import re from collections.abc import AsyncIterator, Awaitable, Callable, Iterable, Sequence from contextlib import AbstractAsyncContextManager, asynccontextmanager from typing import Any, Generic, Literal, TypeVar, overload @@ -43,6 +42,7 @@ from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from mcp.server.transport_security import TransportSecuritySettings from mcp.shared.exceptions import MCPError +from mcp.shared.uri_template import UriTemplate from mcp.types import ( Annotations, BlobResourceContents, @@ -668,6 +668,13 @@ async def get_weather(city: str) -> str: data = await fetch_weather(city) return f"Weather for {city}: {data}" ``` + + Raises: + InvalidUriTemplate: If ``uri`` is not a valid RFC 6570 template. + ValueError: If URI template parameters don't match the + function's parameters. + TypeError: If the decorator is applied without being called + (``@resource`` instead of ``@resource("uri")``). """ # Check if user passed function directly instead of calling decorator if callable(uri): @@ -676,18 +683,21 @@ async def get_weather(city: str) -> str: "Did you forget to call it? Use @resource('uri') instead of @resource" ) + # Parse once, early — surfaces malformed-template errors at + # decoration time with a clear position, and gives us correct + # variable names for all RFC 6570 operators. + parsed = UriTemplate.parse(uri) + uri_params = set(parsed.variable_names) + def decorator(fn: _CallableT) -> _CallableT: # Check if this should be a template sig = inspect.signature(fn) - has_uri_params = "{" in uri and "}" in uri has_func_params = bool(sig.parameters) - if has_uri_params or has_func_params: + if uri_params or has_func_params: # Check for Context parameter to exclude from validation context_param = find_context_parameter(fn) - # Validate that URI params match function params (excluding context) - uri_params = set(re.findall(r"{(\w+)}", uri)) # We need to remove the context_param from the resource function if # there is any. func_params = {p for p in sig.parameters.keys() if p != context_param} diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 640cfe803..57011e017 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -6,9 +6,80 @@ from mcp.server.mcpserver import Context, MCPServer from mcp.server.mcpserver.resources import FunctionResource, ResourceTemplate +from mcp.server.mcpserver.resources.templates import ( + DEFAULT_RESOURCE_SECURITY, + UNSAFE_RESOURCE_SECURITY, + ResourceSecurity, +) from mcp.types import Annotations +def _make(uri_template: str, security: ResourceSecurity = DEFAULT_RESOURCE_SECURITY) -> ResourceTemplate: + def handler(**kwargs: Any) -> str: + return "ok" + + return ResourceTemplate.from_function(fn=handler, uri_template=uri_template, security=security) + + +def test_matches_rfc6570_reserved_expansion(): + # {+path} allows / — the feature the old regex implementation couldn't support + t = _make("file://docs/{+path}") + assert t.matches("file://docs/src/main.py") == {"path": "src/main.py"} + + +def test_matches_rejects_encoded_slash_in_simple_var(): + # Path traversal via encoded slash: %2F smuggled into a simple {var} + t = _make("file://docs/{name}") + assert t.matches("file://docs/..%2F..%2Fetc%2Fpasswd") is None + + +def test_matches_rejects_path_traversal_by_default(): + t = _make("file://docs/{name}") + assert t.matches("file://docs/..") is None + + +def test_matches_rejects_path_traversal_in_reserved_var(): + # Even {+path} gets the traversal check — it's semantic, not structural + t = _make("file://docs/{+path}") + assert t.matches("file://docs/../../etc/passwd") is None + + +def test_matches_rejects_absolute_path(): + t = _make("file://docs/{+path}") + assert t.matches("file://docs//etc/passwd") is None + + +def test_matches_allows_dotdot_as_substring(): + # .. is only dangerous as a path component + t = _make("git://refs/{range}") + assert t.matches("git://refs/v1.0..v2.0") == {"range": "v1.0..v2.0"} + + +def test_matches_exempt_params_skip_security(): + policy = ResourceSecurity(exempt_params=frozenset({"range"})) + t = _make("git://diff/{+range}", security=policy) + assert t.matches("git://diff/../foo") == {"range": "../foo"} + + +def test_matches_unsafe_policy_disables_checks(): + t = _make("file://docs/{name}", security=UNSAFE_RESOURCE_SECURITY) + assert t.matches("file://docs/..") == {"name": ".."} + + +def test_matches_explode_checks_each_segment(): + t = _make("api{/parts*}") + assert t.matches("api/a/b/c") == {"parts": ["a", "b", "c"]} + # Any segment with traversal rejects the whole match + assert t.matches("api/a/../c") is None + + +def test_matches_escapes_template_literals(): + # Regression: old impl treated . as regex wildcard + t = _make("data://v1.0/{id}") + assert t.matches("data://v1.0/42") == {"id": "42"} + assert t.matches("data://v1X0/42") is None + + class TestResourceTemplate: """Test ResourceTemplate functionality.""" diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index 3ef06d038..b117e157c 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -19,6 +19,7 @@ from mcp.server.mcpserver.utilities.types import Audio, Image from mcp.server.transport_security import TransportSecuritySettings from mcp.shared.exceptions import MCPError +from mcp.shared.uri_template import InvalidUriTemplate from mcp.types import ( AudioContent, BlobResourceContents, @@ -141,6 +142,23 @@ async def test_add_resource_decorator_incorrect_usage(self): def get_data(x: str) -> str: # pragma: no cover return f"Data: {x}" + async def test_resource_decorator_rfc6570_reserved_expansion(self): + # Regression: old regex-based param extraction couldn't see `path` + # in `{+path}` and failed with a confusing mismatch error. + mcp = MCPServer() + + @mcp.resource("file://docs/{+path}") + def read_doc(path: str) -> str: + raise NotImplementedError + + templates = await mcp.list_resource_templates() + assert [t.uri_template for t in templates] == ["file://docs/{+path}"] + + async def test_resource_decorator_rejects_malformed_template(self): + mcp = MCPServer() + with pytest.raises(InvalidUriTemplate, match="Unclosed expression"): + mcp.resource("file://{name") + class TestDnsRebindingProtection: """Tests for automatic DNS rebinding protection on localhost. From 5cbbc70b183ee74174bec367661f964334dd7eca Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:54:32 +0000 Subject: [PATCH 04/47] feat: wire ResourceSecurity into MCPServer configuration Adds `resource_security` to `MCPServer.__init__` and a per-resource `security` override to the `@resource()` decorator. Templates inherit the server-wide policy unless overridden. Exports `ResourceSecurity` and `DEFAULT_RESOURCE_SECURITY` from `mcp.server.mcpserver` for user configuration. Usage: # Server-wide relaxation mcp = MCPServer(resource_security=ResourceSecurity(reject_path_traversal=False)) # Per-resource exemption for non-path parameters @mcp.resource( "git://diff/{+range}", security=ResourceSecurity(exempt_params=frozenset({"range"})), ) def git_diff(range: str) -> str: ... --- src/mcp/server/mcpserver/__init__.py | 11 ++++- .../server/mcpserver/resources/__init__.py | 8 +++- .../server/mcpserver/resources/templates.py | 3 -- src/mcp/server/mcpserver/server.py | 15 ++++++- .../resources/test_resource_template.py | 6 +-- tests/server/mcpserver/test_server.py | 43 ++++++++++++++++++- 6 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src/mcp/server/mcpserver/__init__.py b/src/mcp/server/mcpserver/__init__.py index 0857e38bd..35c98a00c 100644 --- a/src/mcp/server/mcpserver/__init__.py +++ b/src/mcp/server/mcpserver/__init__.py @@ -3,7 +3,16 @@ from mcp.types import Icon from .context import Context +from .resources import DEFAULT_RESOURCE_SECURITY, ResourceSecurity from .server import MCPServer from .utilities.types import Audio, Image -__all__ = ["MCPServer", "Context", "Image", "Audio", "Icon"] +__all__ = [ + "MCPServer", + "Context", + "Image", + "Audio", + "Icon", + "ResourceSecurity", + "DEFAULT_RESOURCE_SECURITY", +] diff --git a/src/mcp/server/mcpserver/resources/__init__.py b/src/mcp/server/mcpserver/resources/__init__.py index b5805fb34..330edc324 100644 --- a/src/mcp/server/mcpserver/resources/__init__.py +++ b/src/mcp/server/mcpserver/resources/__init__.py @@ -1,6 +1,10 @@ from .base import Resource from .resource_manager import ResourceManager -from .templates import ResourceTemplate +from .templates import ( + DEFAULT_RESOURCE_SECURITY, + ResourceSecurity, + ResourceTemplate, +) from .types import ( BinaryResource, DirectoryResource, @@ -20,4 +24,6 @@ "DirectoryResource", "ResourceTemplate", "ResourceManager", + "ResourceSecurity", + "DEFAULT_RESOURCE_SECURITY", ] diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index d7d9214c4..aeef13701 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -75,9 +75,6 @@ def validate(self, params: Mapping[str, str | list[str]]) -> bool: DEFAULT_RESOURCE_SECURITY = ResourceSecurity() """Secure-by-default policy: traversal and absolute paths rejected.""" -UNSAFE_RESOURCE_SECURITY = ResourceSecurity(reject_path_traversal=False, reject_absolute_paths=False) -"""No path checks. Use only when parameters are never used as filesystem paths.""" - class ResourceTemplate(BaseModel): """A template for dynamically creating resources.""" diff --git a/src/mcp/server/mcpserver/server.py b/src/mcp/server/mcpserver/server.py index c40c643c5..cdf8e7eba 100644 --- a/src/mcp/server/mcpserver/server.py +++ b/src/mcp/server/mcpserver/server.py @@ -32,7 +32,13 @@ from mcp.server.mcpserver.context import Context from mcp.server.mcpserver.exceptions import ResourceError from mcp.server.mcpserver.prompts import Prompt, PromptManager -from mcp.server.mcpserver.resources import FunctionResource, Resource, ResourceManager +from mcp.server.mcpserver.resources import ( + DEFAULT_RESOURCE_SECURITY, + FunctionResource, + Resource, + ResourceManager, + ResourceSecurity, +) from mcp.server.mcpserver.tools import Tool, ToolManager from mcp.server.mcpserver.utilities.context_injection import find_context_parameter from mcp.server.mcpserver.utilities.logging import configure_logging, get_logger @@ -144,7 +150,9 @@ def __init__( warn_on_duplicate_prompts: bool = True, lifespan: Callable[[MCPServer[LifespanResultT]], AbstractAsyncContextManager[LifespanResultT]] | None = None, auth: AuthSettings | None = None, + resource_security: ResourceSecurity = DEFAULT_RESOURCE_SECURITY, ): + self._resource_security = resource_security self.settings = Settings( debug=debug, log_level=log_level, @@ -626,6 +634,7 @@ def resource( icons: list[Icon] | None = None, annotations: Annotations | None = None, meta: dict[str, Any] | None = None, + security: ResourceSecurity | None = None, ) -> Callable[[_CallableT], _CallableT]: """Decorator to register a function as a resource. @@ -647,6 +656,9 @@ def resource( icons: Optional list of icons for the resource annotations: Optional annotations for the resource meta: Optional metadata dictionary for the resource + security: Path-safety policy for extracted template parameters. + Defaults to the server's ``resource_security`` setting. + Only applies to template resources. Example: ```python @@ -717,6 +729,7 @@ def decorator(fn: _CallableT) -> _CallableT: mime_type=mime_type, icons=icons, annotations=annotations, + security=security if security is not None else self._resource_security, meta=meta, ) else: diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 57011e017..22638b576 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -8,7 +8,6 @@ from mcp.server.mcpserver.resources import FunctionResource, ResourceTemplate from mcp.server.mcpserver.resources.templates import ( DEFAULT_RESOURCE_SECURITY, - UNSAFE_RESOURCE_SECURITY, ResourceSecurity, ) from mcp.types import Annotations @@ -61,8 +60,9 @@ def test_matches_exempt_params_skip_security(): assert t.matches("git://diff/../foo") == {"range": "../foo"} -def test_matches_unsafe_policy_disables_checks(): - t = _make("file://docs/{name}", security=UNSAFE_RESOURCE_SECURITY) +def test_matches_disabled_policy_allows_traversal(): + policy = ResourceSecurity(reject_path_traversal=False, reject_absolute_paths=False) + t = _make("file://docs/{name}", security=policy) assert t.matches("file://docs/..") == {"name": ".."} diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index b117e157c..c14ce56dd 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -12,7 +12,7 @@ from mcp.client import Client from mcp.server.context import ServerRequestContext from mcp.server.experimental.request_context import Experimental -from mcp.server.mcpserver import Context, MCPServer +from mcp.server.mcpserver import Context, MCPServer, ResourceSecurity from mcp.server.mcpserver.exceptions import ToolError from mcp.server.mcpserver.prompts.base import Message, UserMessage from mcp.server.mcpserver.resources import FileResource, FunctionResource @@ -159,6 +159,47 @@ async def test_resource_decorator_rejects_malformed_template(self): with pytest.raises(InvalidUriTemplate, match="Unclosed expression"): mcp.resource("file://{name") + async def test_resource_security_default_rejects_traversal(self): + mcp = MCPServer() + + @mcp.resource("data://items/{name}") + def get_item(name: str) -> str: + return f"item:{name}" + + async with Client(mcp) as client: + # ".." as a path component is rejected by default policy + with pytest.raises(MCPError, match="Unknown resource"): + await client.read_resource("data://items/..") + + async def test_resource_security_per_resource_override(self): + mcp = MCPServer() + + @mcp.resource( + "git://diff/{+range}", + security=ResourceSecurity(exempt_params=frozenset({"range"})), + ) + def git_diff(range: str) -> str: + return f"diff:{range}" + + async with Client(mcp) as client: + # "../foo" would be rejected by default, but "range" is exempt + result = await client.read_resource("git://diff/../foo") + assert isinstance(result.contents[0], TextResourceContents) + assert result.contents[0].text == "diff:../foo" + + async def test_resource_security_server_wide_override(self): + mcp = MCPServer(resource_security=ResourceSecurity(reject_path_traversal=False)) + + @mcp.resource("data://items/{name}") + def get_item(name: str) -> str: + return f"item:{name}" + + async with Client(mcp) as client: + # Server-wide policy disabled traversal check; ".." now allowed + result = await client.read_resource("data://items/..") + assert isinstance(result.contents[0], TextResourceContents) + assert result.contents[0].text == "item:.." + class TestDnsRebindingProtection: """Tests for automatic DNS rebinding protection on localhost. From 928698b3eb3bc8e4c0abd2d8eb6efd0d67762eef Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:58:09 +0000 Subject: [PATCH 05/47] docs: add migration guide entry for resource template changes Documents the RFC 6570 support, security hardening defaults, and opt-out configuration for the resource template rewrite. Grouped with the existing resource URI section. --- docs/migration.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/docs/migration.md b/docs/migration.md index 3b47f9aad..c0fda063a 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -545,6 +545,69 @@ await client.read_resource("test://resource") await client.read_resource(str(my_any_url)) ``` +### Resource templates: RFC 6570 support and security hardening + +Resource template matching has been rewritten to support RFC 6570 URI +templates (Levels 1-3 plus path-style explode) and to apply path-safety +checks to extracted parameters by default. + +**New capabilities:** + +- `{+path}` (reserved expansion) now works — it matches multi-segment + paths like `src/main.py`. Previously only simple `{var}` was supported. +- All Level 3 operators: `{.ext}`, `{/seg}`, `{;param}`, `{?query}`, `{&cont}` +- Path-style explode: `{/path*}` extracts a `list[str]` of segments +- Template literals are now regex-escaped (a `.` in your template no + longer matches any character — this was a bug) + +**Security hardening (may require opt-out):** + +By default, extracted parameter values are now rejected if they: + +- Contain `..` as a path component (e.g., `..`, `../etc`, `a/../../b`) +- Look like an absolute filesystem path (e.g., `/etc/passwd`, `C:\Windows`) +- Decode to contain structural delimiters that their operator forbids + (e.g., `%2F` smuggled into a simple `{name}`) + +If your template parameters legitimately contain `..` (e.g., git commit +ranges like `HEAD~3..HEAD`) or absolute paths, exempt them: + +```python +from mcp.server.mcpserver import MCPServer, ResourceSecurity + +mcp = MCPServer() + +@mcp.resource( + "git://diff/{+range}", + security=ResourceSecurity(exempt_params=frozenset({"range"})), +) +def git_diff(range: str) -> str: + ... +``` + +Or relax the policy server-wide: + +```python +mcp = MCPServer( + resource_security=ResourceSecurity(reject_path_traversal=False), +) +``` + +**Filesystem handlers:** even with `{+path}` allowing slashes, you must +still guard against traversal in your handler. Use `safe_join`: + +```python +from mcp.shared.path_security import safe_join + +@mcp.resource("file://docs/{+path}") +def read_doc(path: str) -> str: + return safe_join("/data/docs", path).read_text() +``` + +**Malformed templates now fail at decoration time** with +`InvalidUriTemplate` (a `ValueError` subclass carrying the error +position), rather than silently misbehaving at match time. + ### Lowlevel `Server`: constructor parameters are now keyword-only All parameters after `name` are now keyword-only. If you were passing `version` or other parameters positionally, use keyword arguments instead: From 00a1336ee66e15cd1cb8bc149f5b79900f5cf06d Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:27:51 +0000 Subject: [PATCH 06/47] refactor: accept plain set for ResourceSecurity.exempt_params Changes the type from frozenset[str] to collections.abc.Set[str] so users can write exempt_params={"range"} instead of exempt_params=frozenset({"range"}). The default factory stays frozenset for immutability. --- docs/migration.md | 2 +- src/mcp/server/mcpserver/resources/templates.py | 6 +++--- tests/server/mcpserver/resources/test_resource_template.py | 2 +- tests/server/mcpserver/test_server.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index c0fda063a..9ce3f2539 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -579,7 +579,7 @@ mcp = MCPServer() @mcp.resource( "git://diff/{+range}", - security=ResourceSecurity(exempt_params=frozenset({"range"})), + security=ResourceSecurity(exempt_params={"range"}), ) def git_diff(range: str) -> str: ... diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index aeef13701..5bb434f99 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -3,7 +3,7 @@ from __future__ import annotations import inspect -from collections.abc import Callable, Mapping +from collections.abc import Callable, Mapping, Set from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any @@ -36,7 +36,7 @@ class ResourceSecurity: # Opt out for a parameter that legitimately contains .. @mcp.resource( "git://diff/{+range}", - security=ResourceSecurity(exempt_params=frozenset({"range"})), + security=ResourceSecurity(exempt_params={"range"}), ) def git_diff(range: str) -> str: ... """ @@ -47,7 +47,7 @@ def git_diff(range: str) -> str: ... reject_absolute_paths: bool = True """Reject values that look like absolute filesystem paths.""" - exempt_params: frozenset[str] = field(default_factory=frozenset[str]) + exempt_params: Set[str] = field(default_factory=frozenset[str]) """Parameter names to skip all checks for.""" def validate(self, params: Mapping[str, str | list[str]]) -> bool: diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 22638b576..c6cdabcee 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -55,7 +55,7 @@ def test_matches_allows_dotdot_as_substring(): def test_matches_exempt_params_skip_security(): - policy = ResourceSecurity(exempt_params=frozenset({"range"})) + policy = ResourceSecurity(exempt_params={"range"}) t = _make("git://diff/{+range}", security=policy) assert t.matches("git://diff/../foo") == {"range": "../foo"} diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index c14ce56dd..077b30799 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -176,7 +176,7 @@ async def test_resource_security_per_resource_override(self): @mcp.resource( "git://diff/{+range}", - security=ResourceSecurity(exempt_params=frozenset({"range"})), + security=ResourceSecurity(exempt_params={"range"}), ) def git_diff(range: str) -> str: return f"diff:{range}" From a5afb9892b078d91285200183cd92ae2d643271c Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:38:47 +0000 Subject: [PATCH 07/47] docs: add resources guide covering templates, security, and low-level usage Adds docs/server/resources.md as the first page under the planned docs/server/ directory. Covers static resources, RFC 6570 template patterns, the built-in security checks and how to relax them, the safe_join pattern for filesystem handlers, and equivalent patterns for low-level Server implementations. Creates the docs/server/ nav section in mkdocs.yml. --- docs/server/resources.md | 364 +++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 2 + 2 files changed, 366 insertions(+) create mode 100644 docs/server/resources.md diff --git a/docs/server/resources.md b/docs/server/resources.md new file mode 100644 index 000000000..8d46b2ee2 --- /dev/null +++ b/docs/server/resources.md @@ -0,0 +1,364 @@ +# Resources + +Resources give clients read-only access to your data. Think of them as +the files, records, and reference material an LLM might need as context: +a config file, a database schema, the contents of a document, yesterday's +log output. + +Resources are different from tools. A tool is something the model +*calls* to make something happen: send an email, run a query, write a +file. A resource is something the application *reads* to understand the +world. Reading a resource should not change state or kick off expensive +work. If it does either, you probably want a tool. + +## A static resource + +The simplest case is a fixed URI that returns the same kind of content +every time. + +```python +from mcp.server.mcpserver import MCPServer + +mcp = MCPServer("docs-server") + + +@mcp.resource("config://features") +def feature_flags() -> str: + return '{"beta_search": true, "new_editor": false}' +``` + +When a client reads `config://features`, your function runs and the +return value is sent back. Return `str` for text, `bytes` for binary +data, or anything JSON-serializable. + +The URI scheme (`config://` here) is up to you. The protocol reserves +`file://` and `https://` for their usual meanings, but custom schemes +like `config://`, `db://`, or `notes://` are encouraged. They make the +URI self-describing. + +## Resource templates + +Most interesting data is parameterized. You don't want to register a +separate resource for every user, every file, every database row. +Instead, register a template with placeholders: + +```python +@mcp.resource("tickets://{ticket_id}") +def get_ticket(ticket_id: str) -> dict: + ticket = helpdesk.find(ticket_id) + return {"id": ticket_id, "subject": ticket.subject, "status": ticket.status} +``` + +The `{ticket_id}` in the URI maps to the `ticket_id` parameter in your +function. A client reading `tickets://TKT-1042` calls +`get_ticket("TKT-1042")`. Reading `tickets://TKT-2001` calls +`get_ticket("TKT-2001")`. One template, unlimited resources. + +### Parameter types + +Extracted values arrive as strings, but you can declare a more specific +type and the SDK will convert: + +```python +@mcp.resource("orders://{order_id}") +def get_order(order_id: int) -> dict: + # "12345" from the URI becomes the int 12345 + return db.orders.get(order_id) +``` + +### Multi-segment paths + +A plain `{name}` matches a single URI segment. It stops at the first +slash. To match across slashes, use `{+name}`: + +```python +@mcp.resource("files://{+path}") +def read_file(path: str) -> str: + # Matches files://readme.txt + # Also matches files://guides/quickstart/intro.md + ... +``` + +This is the pattern you want for filesystem paths, nested object keys, +or anything hierarchical. + +### Query parameters + +Optional configuration goes in query parameters. Use `{?name}` or list +several with `{?a,b,c}`: + +```python +@mcp.resource("logs://{service}{?since,level}") +def tail_logs(service: str, since: str = "1h", level: str = "info") -> str: + return log_store.query(service, since=since, min_level=level) +``` + +Reading `logs://api` uses the defaults. Reading +`logs://api?since=15m&level=error` narrows it down. The path identifies +*which* resource; the query tunes *how* you read it. + +### Path segments as a list + +If you want each path segment as a separate list item rather than one +string with slashes, use `{/name*}`: + +```python +@mcp.resource("tree://nodes{/path*}") +def walk_tree(path: list[str]) -> dict: + # tree://nodes/a/b/c gives path = ["a", "b", "c"] + node = root + for segment in path: + node = node.children[segment] + return node.to_dict() +``` + +### Template reference + +The template syntax follows [RFC 6570](https://datatracker.ietf.org/doc/html/rfc6570). +Here's what the SDK supports: + +| Pattern | Example input | You get | +|--------------|-----------------------|-------------------------| +| `{name}` | `alice` | `"alice"` | +| `{name}` | `docs/intro.md` | *no match* (stops at `/`) | +| `{+path}` | `docs/intro.md` | `"docs/intro.md"` | +| `{.ext}` | `.json` | `"json"` | +| `{/segment}` | `/v2` | `"v2"` | +| `{?key}` | `?key=value` | `"value"` | +| `{?a,b}` | `?a=1&b=2` | `"1"`, `"2"` | +| `{/path*}` | `/a/b/c` | `["a", "b", "c"]` | + +## Security + +Template parameters come from the client. If they flow into filesystem +or database operations, a hostile client can try path traversal +(`../../etc/passwd`) or injection attacks. + +### What the SDK checks by default + +Before your handler runs, the SDK rejects any parameter that: + +- contains `..` as a path component +- looks like an absolute path (`/etc/passwd`, `C:\Windows`) +- smuggles a delimiter through URL encoding (for example, `%2F` in a + plain `{name}` where `/` isn't allowed) + +A request that trips these checks is treated as a non-match: the SDK +raises `ResourceError("Unknown resource: {uri}")`, which the client +receives as a JSON-RPC error. Your handler never sees the bad input. + +### Filesystem handlers: use safe_join + +The built-in checks stop obvious attacks but can't know your sandbox +boundary. For filesystem access, use `safe_join` to resolve the path +and verify it stays inside your base directory: + +```python +from mcp.shared.path_security import safe_join + +DOCS_ROOT = "/srv/app/docs" + + +@mcp.resource("files://{+path}") +def read_file(path: str) -> str: + full_path = safe_join(DOCS_ROOT, path) + return full_path.read_text() +``` + +`safe_join` catches symlink escapes, `..` sequences, and absolute-path +tricks that a simple string check would miss. If the resolved path +escapes the base, it raises `PathEscapeError`, which surfaces to the +client as a `ResourceError`. + +### When the defaults get in the way + +Sometimes `..` in a parameter is legitimate. A git commit range like +`HEAD~3..HEAD` contains `..` but it's not a path. Exempt that parameter: + +```python +from mcp.server.mcpserver import ResourceSecurity + + +@mcp.resource( + "git://diff/{+range}", + security=ResourceSecurity(exempt_params={"range"}), +) +def git_diff(range: str) -> str: + return run_git("diff", range) +``` + +Or relax the policy for the whole server: + +```python +mcp = MCPServer( + resource_security=ResourceSecurity(reject_path_traversal=False), +) +``` + +The configurable checks: + +| Setting | Default | What it does | +|-------------------------|---------|-------------------------------------| +| `reject_path_traversal` | `True` | Rejects `..` as a path component | +| `reject_absolute_paths` | `True` | Rejects `/foo`, `C:\foo`, UNC paths | +| `exempt_params` | empty | Parameter names to skip checks for | + +## Errors + +If your handler can't fulfil the request, raise an exception. The SDK +turns it into an error response: + +```python +@mcp.resource("articles://{article_id}") +def get_article(article_id: str) -> str: + article = db.articles.find(article_id) + if article is None: + raise ValueError(f"No article with id {article_id}") + return article.content +``` + +## Resources on the low-level server + +If you're building on the low-level `Server`, you register handlers for +the `resources/list` and `resources/read` protocol methods directly. +There's no decorator; you return the protocol types yourself. + +### Static resources + +For fixed URIs, keep a registry and dispatch on exact match: + +```python +from mcp.server.lowlevel import Server +from mcp.types import ( + ListResourcesResult, + ReadResourceRequestParams, + ReadResourceResult, + Resource, + TextResourceContents, +) + +RESOURCES = { + "config://features": lambda: '{"beta_search": true}', + "status://health": lambda: check_health(), +} + + +async def on_list_resources(ctx, params) -> ListResourcesResult: + return ListResourcesResult( + resources=[Resource(name=uri, uri=uri) for uri in RESOURCES] + ) + + +async def on_read_resource(ctx, params: ReadResourceRequestParams) -> ReadResourceResult: + if (producer := RESOURCES.get(params.uri)) is not None: + return ReadResourceResult( + contents=[TextResourceContents(uri=params.uri, text=producer())] + ) + raise ValueError(f"Unknown resource: {params.uri}") + + +server = Server( + "my-server", + on_list_resources=on_list_resources, + on_read_resource=on_read_resource, +) +``` + +The list handler tells clients what's available; the read handler +serves the content. Check your registry first, fall through to +templates (below) if you have any, then raise for anything else. + +### Templates + +The template engine `MCPServer` uses lives in `mcp.shared.uri_template` +and works on its own. You get the same parsing, matching, and +structural checks; you wire up the routing and policy yourself. + +#### Matching requests + +Parse your templates once, then match incoming URIs against them in +your read handler: + +```python +from mcp.server.lowlevel import Server +from mcp.shared.uri_template import UriTemplate +from mcp.types import ReadResourceRequestParams, ReadResourceResult, TextResourceContents + +TEMPLATES = { + "files": UriTemplate.parse("files://{+path}"), + "row": UriTemplate.parse("db://{table}/{id}"), +} + + +async def on_read_resource(ctx, params: ReadResourceRequestParams) -> ReadResourceResult: + if (vars := TEMPLATES["files"].match(params.uri)) is not None: + content = read_file_safely(vars["path"]) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=content)]) + + if (vars := TEMPLATES["row"].match(params.uri)) is not None: + row = db.get(vars["table"], int(vars["id"])) + return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=row.to_json())]) + + raise ValueError(f"Unknown resource: {params.uri}") + + +server = Server("my-server", on_read_resource=on_read_resource) +``` + +`UriTemplate.match()` returns the extracted variables or `None`. URL +decoding and the structural checks (rejecting `%2F` in simple `{name}` +and so on) happen inside `match()`, the same as in `MCPServer`. + +Values come out as strings. Convert them yourself: `int(vars["id"])`, +`Path(vars["path"])`, whatever your handler needs. + +#### Applying security checks + +The path traversal and absolute-path checks that `MCPServer` runs by +default are in `mcp.shared.path_security`. Call them before using an +extracted value: + +```python +from mcp.shared.path_security import contains_path_traversal, is_absolute_path, safe_join + +DOCS_ROOT = "/srv/app/docs" + + +def read_file_safely(path: str) -> str: + if contains_path_traversal(path) or is_absolute_path(path): + raise ValueError("rejected") + return safe_join(DOCS_ROOT, path).read_text() +``` + +If a parameter isn't a filesystem path (say, a git ref or a search +query), skip the checks for that value. You control the policy per +handler rather than through a config object. + +#### Listing templates + +Clients discover templates through `resources/templates/list`. Return +the protocol `ResourceTemplate` type, using the same template strings +you parsed above: + +```python +from mcp.types import ListResourceTemplatesResult, ResourceTemplate + + +async def on_list_resource_templates(ctx, params) -> ListResourceTemplatesResult: + return ListResourceTemplatesResult( + resource_templates=[ + ResourceTemplate(name="files", uri_template=str(TEMPLATES["files"])), + ResourceTemplate(name="row", uri_template=str(TEMPLATES["row"])), + ] + ) + + +server = Server( + "my-server", + on_read_resource=on_read_resource, + on_list_resource_templates=on_list_resource_templates, +) +``` + +`str(template)` gives back the original template string, so your list +handler and your matching logic can share one source of truth. diff --git a/mkdocs.yml b/mkdocs.yml index 3a555785a..7568ba28a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -16,6 +16,8 @@ nav: - Migration Guide: migration.md - Documentation: - Concepts: concepts.md + - Server: + - Resources: server/resources.md - Low-Level Server: low-level-server.md - Authorization: authorization.md - Testing: testing.md From 257504262ceb5b4af4c3275ea81ef954a4630c10 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:54:28 +0000 Subject: [PATCH 08/47] feat: reject duplicate variable names in URI templates RFC 6570 requires repeated variables to expand to the same value. Enforcing this at match time would require backreferences with potentially exponential cost. We reject at parse time instead, following the recommendation in modelcontextprotocol#697. Previously a template like {x}/{x} would parse and silently return only the last captured value on match. --- src/mcp/shared/uri_template.py | 22 ++++++++++++++++++++++ tests/shared/test_uri_template.py | 12 +++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 668db3e8f..ffae04329 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -500,6 +500,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[tuple[_Part, ...], t i = end + 1 _check_adjacent_explodes(template, parts) + _check_duplicate_variables(template, variables) return tuple(parts), tuple(variables) @@ -570,6 +571,27 @@ def _parse_expression(template: str, body: str, pos: int) -> _Expression: return _Expression(operator=operator, variables=tuple(variables)) +def _check_duplicate_variables(template: str, variables: list[Variable]) -> None: + """Reject templates that use the same variable name more than once. + + RFC 6570 requires repeated variables to expand to the same value, + which would require backreference matching with potentially + exponential cost. Rather than silently returning only the last + captured value, we reject at parse time. + + Raises: + InvalidUriTemplate: If any variable name appears more than once. + """ + seen: set[str] = set() + for var in variables: + if var.name in seen: + raise InvalidUriTemplate( + f"Variable {var.name!r} appears more than once; repeated variables are not supported", + template=template, + ) + seen.add(var.name) + + def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None: """Reject templates with adjacent same-operator explode variables. diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 1238e4283..c2ce1f872 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -121,6 +121,15 @@ def test_parse_rejects_adjacent_explodes_same_operator(): UriTemplate.parse("{/a*}{/b*}") +@pytest.mark.parametrize( + "template", + ["{x}/{x}", "{x,x}", "{a}{b}{a}", "{+x}/foo/{x}"], +) +def test_parse_rejects_duplicate_variable_names(template: str): + with pytest.raises(InvalidUriTemplate, match="appears more than once"): + UriTemplate.parse(template) + + def test_invalid_uri_template_is_value_error(): with pytest.raises(ValueError): UriTemplate.parse("{}") @@ -195,7 +204,8 @@ def test_parse_rejects_too_many_expressions(): def test_parse_custom_limits_allow_larger(): - tmpl = UriTemplate.parse("{a}" * 20, max_expressions=20) + template = "".join(f"{{v{i}}}" for i in range(20)) + tmpl = UriTemplate.parse(template, max_expressions=20) assert len(tmpl.variables) == 20 From a463ed9f76dc25430e3b735aff9dcd51dc09f02b Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:24:44 +0000 Subject: [PATCH 09/47] test: add adversarial security test cases for layered defense Adds coverage for encoding-based attack vectors across both security layers: Layer 1 (structural integrity in UriTemplate.match): - Double-encoding %252F decoded once, accepted as literal %2F - Multi-param template with one poisoned value rejects whole match - Value decoding to only the forbidden delimiter rejected Layer 2 (ResourceSecurity traversal check): - %5C backslash passes structural, caught by traversal normalization - %2E%2E encoded dots pass structural, caught by traversal check - Mixed encoded+literal slash fails at regex before decoding --- .../resources/test_resource_template.py | 23 +++++++++++++++++++ tests/shared/test_uri_template.py | 22 ++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index c6cdabcee..9c84f35f8 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -73,6 +73,29 @@ def test_matches_explode_checks_each_segment(): assert t.matches("api/a/../c") is None +def test_matches_encoded_backslash_caught_by_traversal_layer(): + # %5C decodes to '\\'. Backslash is not a URI delimiter, so it passes + # structural integrity (layer 1). The traversal check (layer 2) + # normalizes '\\' to '/' and catches the '..' components. + t = _make("file://docs/{name}") + assert t.matches("file://docs/..%5C..%5Csecret") is None + + +def test_matches_encoded_dots_caught_by_traversal_layer(): + # %2E%2E decodes to '..'. Contains no structural delimiter, so passes + # layer 1. Layer 2's traversal check catches the '..' component. + t = _make("file://docs/{name}") + assert t.matches("file://docs/%2E%2E") is None + + +def test_matches_mixed_encoded_and_literal_slash(): + # One encoded slash + one literal: literal '/' prevents the regex + # match at layer 0 (simple var stops at '/'), so this never reaches + # decoding. Different failure mode than pure-encoded traversal. + t = _make("file://docs/{name}") + assert t.matches("file://docs/..%2F../etc") is None + + def test_matches_escapes_template_literals(): # Regression: old impl treated . as regex wildcard t = _make("data://v1.0/{id}") diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index c2ce1f872..3cb238d1c 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -387,6 +387,28 @@ def test_match_structural_integrity_allows_slash_in_reserved(): assert t.match("a/b") == {"path": "a/b"} +def test_match_double_encoding_decoded_once(): + # %252F is %2F encoded again. Single decode gives "%2F" (a literal + # percent sign, a '2', and an 'F'), which contains no '/' and should + # be accepted. Guards against over-decoding. + t = UriTemplate.parse("file://docs/{name}") + assert t.match("file://docs/..%252Fetc") == {"name": "..%2Fetc"} + + +def test_match_multi_param_one_poisoned_rejects_whole(): + # One bad param in a multi-param template rejects the entire match + t = UriTemplate.parse("file://{org}/{repo}") + assert t.match("file://acme/..%2Fsecret") is None + # But the same template with clean params matches fine + assert t.match("file://acme/project") == {"org": "acme", "repo": "project"} + + +def test_match_bare_encoded_delimiter_rejected(): + # A value that decodes to only the forbidden delimiter + t = UriTemplate.parse("file://docs/{name}") + assert t.match("file://docs/%2F") is None + + def test_match_structural_integrity_per_explode_segment(): t = UriTemplate.parse("/files{/path*}") # Each segment checked independently From b278925f27ebbf89319f5803d366bda1ec18642e Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:26:12 +0000 Subject: [PATCH 10/47] feat: add UriTemplate.is_template() static method Cheap heuristic for distinguishing concrete URIs from templates without full parsing. Returns True if the string contains at least one {...} pair. Does not validate; a True result does not guarantee parse() will succeed. Matches the TypeScript SDK's UriTemplate.isTemplate() utility. --- src/mcp/shared/uri_template.py | 22 ++++++++++++++++++++++ tests/shared/test_uri_template.py | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index ffae04329..1af337524 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -210,6 +210,28 @@ class UriTemplate: _variables: tuple[Variable, ...] = field(repr=False, compare=False) _pattern: re.Pattern[str] = field(repr=False, compare=False) + @staticmethod + def is_template(value: str) -> bool: + """Check whether a string contains URI template expressions. + + A cheap heuristic for distinguishing concrete URIs from templates + without the cost of full parsing. Returns ``True`` if the string + contains at least one ``{...}`` pair. + + Example:: + + >>> UriTemplate.is_template("file://docs/{name}") + True + >>> UriTemplate.is_template("file://docs/readme.txt") + False + + Note: + This does not validate the template. A ``True`` result does + not guarantee :meth:`parse` will succeed. + """ + open_i = value.find("{") + return open_i != -1 and value.find("}", open_i) != -1 + @classmethod def parse( cls, diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 3cb238d1c..72093b580 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -12,6 +12,24 @@ def test_parse_literal_only(): assert str(tmpl) == "file://docs/readme.txt" +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("file://docs/{name}", True), + ("file://docs/readme.txt", False), + ("", False), + ("{a}", True), + ("{", False), + ("}", False), + ("}{", False), + ("prefix{+path}/suffix", True), + ("{invalid syntax but still a template}", True), + ], +) +def test_is_template(value: str, expected: bool): + assert UriTemplate.is_template(value) is expected + + def test_parse_simple_variable(): tmpl = UriTemplate.parse("file://docs/{name}") assert tmpl.variables == (Variable(name="name", operator=""),) From 3b8aaddf925428a3ea88a6923128982e6fcee50b Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:30:22 +0000 Subject: [PATCH 11/47] feat: add URI length guard to UriTemplate.match() Adds a max_uri_length keyword argument (default 64 KiB) that returns None for oversized inputs before regex evaluation. Guards against resource exhaustion from pathologically long URIs, particularly on stdio transport where there is no inherent message size limit. Consistent with the existing max_length/max_expressions limits on parse(); the default is exported as DEFAULT_MAX_URI_LENGTH. --- src/mcp/shared/uri_template.py | 13 ++++++++++--- tests/shared/test_uri_template.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 1af337524..2da52a61a 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -30,6 +30,7 @@ DEFAULT_MAX_TEMPLATE_LENGTH = 1_000_000 DEFAULT_MAX_EXPRESSIONS = 10_000 +DEFAULT_MAX_URI_LENGTH = 65_536 # RFC 3986 reserved characters, kept unencoded by {+var} and {#var}. _RESERVED = ":/?#[]@!$&'()*+,;=" @@ -333,7 +334,7 @@ def expand(self, variables: Mapping[str, str | Sequence[str]]) -> str: out.append(_expand_expression(part, variables)) return "".join(out) - def match(self, uri: str) -> dict[str, str | list[str]] | None: + def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> dict[str, str | list[str]] | None: """Match a concrete URI against this template and extract variables. This is the inverse of :meth:`expand`. The URI is matched against @@ -368,13 +369,19 @@ def match(self, uri: str) -> dict[str, str | list[str]] | None: Args: uri: A concrete URI string. + max_uri_length: Maximum permitted length of the input URI. + Oversized inputs return ``None`` without regex evaluation, + guarding against resource exhaustion. Returns: A mapping from variable names to decoded values (``str`` for scalar variables, ``list[str]`` for explode variables), or - ``None`` if the URI does not match the template or a decoded - value violates structural integrity. + ``None`` if the URI does not match the template, a decoded + value violates structural integrity, or the URI exceeds + ``max_uri_length``. """ + if len(uri) > max_uri_length: + return None m = self._pattern.fullmatch(uri) if m is None: return None diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 72093b580..94c9d9c42 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -427,6 +427,26 @@ def test_match_bare_encoded_delimiter_rejected(): assert t.match("file://docs/%2F") is None +def test_match_rejects_oversized_uri(): + t = UriTemplate.parse("{var}") + assert t.match("x" * 100, max_uri_length=50) is None + + +def test_match_accepts_uri_within_custom_limit(): + t = UriTemplate.parse("{var}") + assert t.match("x" * 100, max_uri_length=200) == {"var": "x" * 100} + + +def test_match_default_uri_length_limit(): + from mcp.shared.uri_template import DEFAULT_MAX_URI_LENGTH + + t = UriTemplate.parse("{+var}") + # Just at the limit: should match + assert t.match("x" * DEFAULT_MAX_URI_LENGTH) is not None + # One over: should reject + assert t.match("x" * (DEFAULT_MAX_URI_LENGTH + 1)) is None + + def test_match_structural_integrity_per_explode_segment(): t = UriTemplate.parse("/files{/path*}") # Each segment checked independently From 3a786f34b4309dd87c981c643b559e5cda4070ba Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:35:29 +0000 Subject: [PATCH 12/47] test: add edge-case coverage for literal handling and anchoring Ports three test scenarios from the TypeScript SDK: - Repeated-slash literals (///{a}////{b}////) preserved exactly and rejected when slash count differs - Trailing extra path component rejected (/users/{id} vs /users/123/extra); guards against a refactor from fullmatch to match or search - Adjacent variables with prefix-overlapping names ({var}{vara}); documents the greedy capture split and confirms positional groups map to the correct dict keys --- tests/shared/test_uri_template.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 94c9d9c42..a78ab538a 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -345,6 +345,8 @@ def test_expand_rejects_invalid_value_types(value: object): ("/files{/path*}/edit", "/files/a/b/edit", {"path": ["a", "b"]}), # Explode: labels ("host{.labels*}", "host.example.com", {"labels": ["example", "com"]}), + # Repeated-slash literals preserved exactly + ("///{a}////{b}////", "///x////y////", {"a": "x", "b": "y"}), ], ) def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): @@ -359,12 +361,29 @@ def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): ("file{.ext}", "file"), ("search{?q}", "search"), ("static", "different"), + # Anchoring: trailing extra component must not match. Guards + # against a refactor from fullmatch() to match() or search(). + ("/users/{id}", "/users/123/extra"), + ("/users/{id}/posts/{pid}", "/users/1/posts/2/extra"), + # Repeated-slash literal with wrong slash count + ("///{a}////{b}////", "//x////y////"), ], ) def test_match_no_match(template: str, uri: str): assert UriTemplate.parse(template).match(uri) is None +def test_match_adjacent_vars_with_prefix_names(): + # Two adjacent simple vars where one name is a prefix of the other. + # We use positional capture groups, so names only affect the result + # dict keys, not the regex. Standard greedy matching: the first var + # takes as much as it can while still letting the second satisfy +. + t = UriTemplate.parse("{var}{vara}") + assert t.match("ab") == {"var": "a", "vara": "b"} + assert t.match("abc") == {"var": "ab", "vara": "c"} + assert t.match("abcd") == {"var": "abc", "vara": "d"} + + def test_match_decodes_percent_encoding(): t = UriTemplate.parse("file://docs/{name}") assert t.match("file://docs/hello%20world.txt") == {"name": "hello world.txt"} From c4f7db0746e1249ca375ee904d1808c8c0a60304 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:43:39 +0000 Subject: [PATCH 13/47] feat: reject null bytes in safe_join path components Null bytes pass through Path construction but fail at the syscall boundary with a cryptic 'embedded null byte' error. Rejecting in safe_join gives callers a clear PathEscapeError instead, and guards against null-byte injection when the path is used for anything other than immediate file I/O (logging, subprocess args, config). --- src/mcp/shared/path_security.py | 18 ++++++++++++------ tests/shared/test_path_security.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/mcp/shared/path_security.py b/src/mcp/shared/path_security.py index 23817b949..8d75a4193 100644 --- a/src/mcp/shared/path_security.py +++ b/src/mcp/shared/path_security.py @@ -124,21 +124,27 @@ def safe_join(base: str | Path, *parts: str) -> Path: Args: base: The sandbox root. May be relative; it will be resolved. - parts: Path components to join. Each is checked for absolute - form before joining. + parts: Path components to join. Each is checked for null bytes + and absolute form before joining. Returns: The resolved path, guaranteed to be within ``base``. Raises: - PathEscapeError: If any part is absolute, or if the resolved - path is not contained within the resolved base. + PathEscapeError: If any part contains a null byte, any part is + absolute, or the resolved path is not contained within the + resolved base. """ base_resolved = Path(base).resolve() - # Reject absolute parts up front: Path's / operator would silently - # discard everything to the left of an absolute component. for part in parts: + # Null bytes pass through Path construction but fail at the + # syscall boundary with a cryptic error. Reject here so callers + # get a clear PathEscapeError instead. + if "\0" in part: + raise PathEscapeError(f"Path component contains a null byte; refusing to join onto {base_resolved}") + # Absolute parts would silently discard everything to the left + # in Path's / operator. if is_absolute_path(part): raise PathEscapeError(f"Path component {part!r} is absolute; refusing to join onto {base_resolved}") diff --git a/tests/shared/test_path_security.py b/tests/shared/test_path_security.py index fc3bd9091..75a1562d1 100644 --- a/tests/shared/test_path_security.py +++ b/tests/shared/test_path_security.py @@ -120,6 +120,16 @@ def test_safe_join_rejects_windows_drive(tmp_path: Path): safe_join(tmp_path, "C:\\Windows\\System32") +def test_safe_join_rejects_null_byte(tmp_path: Path): + with pytest.raises(PathEscapeError, match="null byte"): + safe_join(tmp_path, "file\0.txt") + + +def test_safe_join_rejects_null_byte_in_later_part(tmp_path: Path): + with pytest.raises(PathEscapeError, match="null byte"): + safe_join(tmp_path, "docs", "file\0.txt") + + def test_safe_join_rejects_symlink_escape(tmp_path: Path): outside = tmp_path / "outside" outside.mkdir() From 674783ff615e50c385522c920c8c62feecb5bbb2 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 19:46:53 +0000 Subject: [PATCH 14/47] fix: decide template vs static purely on URI variables The @resource() decorator now classifies resources based solely on whether the URI contains template variables, not on whether the handler has parameters. Previously, a handler taking only a Context parameter on a non-template URI would register as a zero-variable template. The template matched with an empty dict, which the walrus check in resource_manager treated as falsy, making the resource permanently unreachable. This has never worked. Now such a handler errors at decoration time with a clear message noting that Context injection for static resources is planned but not yet supported. Handlers with non-Context parameters on non-template URIs also get a clearer error than the old 'Mismatch' message. Also changes the resource_manager walrus check to compare against None explicitly, as defense-in-depth against any future case where matches() legitimately returns an empty dict. --- .../mcpserver/resources/resource_manager.py | 2 +- src/mcp/server/mcpserver/server.py | 29 ++++++++++++------- tests/server/mcpserver/test_server.py | 25 +++++++++++++++- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/mcp/server/mcpserver/resources/resource_manager.py b/src/mcp/server/mcpserver/resources/resource_manager.py index b1aab50f5..1e7e656cb 100644 --- a/src/mcp/server/mcpserver/resources/resource_manager.py +++ b/src/mcp/server/mcpserver/resources/resource_manager.py @@ -93,7 +93,7 @@ async def get_resource(self, uri: AnyUrl | str, context: Context[LifespanContext # Then check templates for template in self._templates.values(): - if params := template.matches(uri_str): + if (params := template.matches(uri_str)) is not None: try: return await template.create_resource(uri_str, params, context=context) except Exception as e: # pragma: no cover diff --git a/src/mcp/server/mcpserver/server.py b/src/mcp/server/mcpserver/server.py index cdf8e7eba..31018fbc9 100644 --- a/src/mcp/server/mcpserver/server.py +++ b/src/mcp/server/mcpserver/server.py @@ -702,18 +702,13 @@ async def get_weather(city: str) -> str: uri_params = set(parsed.variable_names) def decorator(fn: _CallableT) -> _CallableT: - # Check if this should be a template sig = inspect.signature(fn) - has_func_params = bool(sig.parameters) - - if uri_params or has_func_params: - # Check for Context parameter to exclude from validation - context_param = find_context_parameter(fn) - - # We need to remove the context_param from the resource function if - # there is any. - func_params = {p for p in sig.parameters.keys() if p != context_param} + context_param = find_context_parameter(fn) + func_params = {p for p in sig.parameters.keys() if p != context_param} + # Template/static is decided purely by the URI: variables + # present means template, none means static. + if uri_params: if uri_params != func_params: raise ValueError( f"Mismatch between URI parameters {uri_params} and function parameters {func_params}" @@ -733,6 +728,20 @@ def decorator(fn: _CallableT) -> _CallableT: meta=meta, ) else: + if func_params: + raise ValueError( + f"Resource {uri!r} has no URI template variables, but the " + f"handler declares parameters {func_params}. Add matching " + f"{{...}} variables to the URI or remove the parameters." + ) + if context_param is not None: + raise ValueError( + f"Resource {uri!r} has no URI template variables, but the " + f"handler declares a Context parameter. Context injection " + f"for static resources is not yet supported but is planned. " + f"For now, add a template variable to the URI or remove the " + f"Context parameter." + ) # Register as regular resource resource = FunctionResource.from_function( fn=fn, diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index 077b30799..050ce7043 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -851,7 +851,7 @@ async def test_resource_with_params(self): parameters don't match""" mcp = MCPServer() - with pytest.raises(ValueError, match="Mismatch between URI parameters"): + with pytest.raises(ValueError, match="has no URI template variables"): @mcp.resource("resource://data") def get_data_fn(param: str) -> str: # pragma: no cover @@ -1192,6 +1192,29 @@ def resource_with_context(name: str, ctx: Context) -> str: # Should have either request_id or indication that context was injected assert "Resource test - context injected" == content.text + async def test_static_resource_with_context_param_errors(self): + """A non-template URI with a Context-only handler should error + at decoration time with a clear message, not silently register + an unreachable resource.""" + mcp = MCPServer() + + with pytest.raises(ValueError, match="Context injection for static resources is not yet supported"): + + @mcp.resource("weather://current") + def current_weather(ctx: Context) -> str: + raise NotImplementedError + + async def test_static_resource_with_extra_params_errors(self): + """A non-template URI with non-Context params should error at + decoration time.""" + mcp = MCPServer() + + with pytest.raises(ValueError, match="has no URI template variables"): + + @mcp.resource("data://fixed") + def get_data(name: str) -> str: + raise NotImplementedError + async def test_resource_without_context(self): """Test that resources without context work normally.""" mcp = MCPServer() From 1987340163868029f2cb1d32c75073b31dfdebe0 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 20:20:51 +0000 Subject: [PATCH 15/47] fix: correct ; operator matching and expansion per RFC 6570 Three fixes to the path-style parameter operator: Non-explode {;id}: the regex used =? (optional equals), which let {;id} match ;identity=john by consuming 'id' as a prefix. Added a lookahead asserting the name ends at = or a delimiter. Explode {;keys*} matching: captured segments included the name= prefix (returning ['keys=a', 'keys=b'] instead of ['a', 'b']) and did not validate the parameter name (so ;admin=true matched). Now strips the prefix and rejects wrong names in post-processing. Explode {;keys*} expansion: emitted name= for empty items. RFC 3.2.7's ifemp rule says ; omits the = for empty values, so ['a', '', 'b'] now expands to ;keys=a;keys;keys=b. All three are covered by new round-trip tests including the empty-item edge case. --- src/mcp/shared/uri_template.py | 33 ++++++++++++++++++++++++++----- tests/shared/test_uri_template.py | 16 +++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 2da52a61a..8f2123658 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -185,7 +185,12 @@ def _expand_expression(expr: _Expression, variables: Mapping[str, str | Sequence if var.explode: # Each item gets the operator's separator; named ops repeat the key. if spec.named: - rendered.append(spec.separator.join(f"{var.name}={v}" for v in items)) + # RFC §3.2.7 ifemp: ; omits the = for empty values. + rendered.append( + spec.separator.join( + var.name if (v == "" and expr.operator == ";") else f"{var.name}={v}" for v in items + ) + ) else: rendered.append(spec.separator.join(items)) else: @@ -394,14 +399,26 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di if var.explode: # Explode capture holds the whole run including separators, - # e.g. "/a/b/c". Split, decode each segment, check each. + # e.g. "/a/b/c" or ";keys=a;keys=b". Split, decode each + # segment, check each. if not raw: result[var.name] = [] continue segments: list[str] = [] + prefix = f"{var.name}=" for seg in raw.split(spec.separator): if not seg: # leading separator produces an empty first item continue + if spec.named: + # Named explode emits name=value per item (or bare + # name for ; with empty value). Validate the name + # and strip the prefix before decoding. + if seg.startswith(prefix): + seg = seg[len(prefix) :] + elif seg == var.name: + seg = "" + else: + return None decoded = unquote(seg) if any(c in decoded for c in forbidden): return None @@ -464,9 +481,15 @@ def _expression_pattern(expr: _Expression) -> str: # Non-greedy so a trailing literal can terminate the run. pieces.append(f"((?:{sep}{body})*?)") elif spec.named: - # ;name=val or ?name=val — the = is optional for ; with empty value - eq = "=?" if expr.operator == ";" else "=" - pieces.append(f"{lead}{re.escape(var.name)}{eq}({body})") + name = re.escape(var.name) + if expr.operator == ";": + # RFC ifemp: ; emits bare name for empty values, so = is + # optional. The lookahead asserts the name ends at = or a + # delimiter, preventing {;id} from matching ;identity. + pieces.append(f"{lead}{name}(?==|[;/?#]|$)=?({body})") + else: + # ? and & always emit name=, even for empty values. + pieces.append(f"{lead}{name}=({body})") else: pieces.append(f"{lead}({body})") diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index a78ab538a..2d15bdac7 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -280,6 +280,8 @@ def test_frozen(): ("{/path*}", {"path": ["a", "b", "c"]}, "/a/b/c"), ("{.labels*}", {"labels": ["x", "y"]}, ".x.y"), ("{;keys*}", {"keys": ["a", "b"]}, ";keys=a;keys=b"), + # RFC §3.2.7 ifemp: ; omits = for empty explode items + ("{;keys*}", {"keys": ["a", "", "b"]}, ";keys=a;keys;keys=b"), # Undefined variables omitted ("{?q,page}", {"q": "x"}, "?q=x"), ("{a}{b}", {"a": "x"}, "x"), @@ -333,6 +335,10 @@ def test_expand_rejects_invalid_value_types(value: object): # Level 3: path-style param ("item{;id}", "item;id=42", {"id": "42"}), ("item{;id}", "item;id", {"id": ""}), + # Explode: ; emits name=value per item, match strips the prefix + ("item{;keys*}", "item;keys=a;keys=b", {"keys": ["a", "b"]}), + ("item{;keys*}", "item;keys=a;keys;keys=b", {"keys": ["a", "", "b"]}), + ("item{;keys*}", "item", {"keys": []}), # Level 3: query ("search{?q}", "search?q=hello", {"q": "hello"}), ("search{?q}", "search?q=", {"q": ""}), @@ -367,6 +373,12 @@ def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): ("/users/{id}/posts/{pid}", "/users/1/posts/2/extra"), # Repeated-slash literal with wrong slash count ("///{a}////{b}////", "//x////y////"), + # ; name boundary: {;id} must not match a longer parameter name + ("item{;id}", "item;identity=john"), + ("item{;id}", "item;ident"), + # ; explode: wrong parameter name in any segment rejects the match + ("item{;keys*}", "item;admin=true"), + ("item{;keys*}", "item;keys=a;admin=true"), ], ) def test_match_no_match(template: str, uri: str): @@ -482,6 +494,10 @@ def test_match_structural_integrity_per_explode_segment(): ("file{.ext}", {"ext": "txt"}), ("/files{/path*}", {"path": ["a", "b", "c"]}), ("{var}", {"var": "hello world"}), + ("item{;id}", {"id": "42"}), + ("item{;id}", {"id": ""}), + ("item{;keys*}", {"keys": ["a", "b", "c"]}), + ("item{;keys*}", {"keys": ["a", "", "b"]}), ], ) def test_roundtrip_expand_then_match(template: str, variables: dict[str, str | list[str]]): From c1a17872860ba91ba4c3f68d1b570b920d705488 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 21:16:22 +0000 Subject: [PATCH 16/47] refactor: remove post-decode structural checks from UriTemplate.match UriTemplate.match() no longer rejects decoded values containing characters like /, ?, #, &. It now faithfully returns whatever expand() would have encoded, so match(expand(x)) == x holds for all inputs. The previous check broke round-trip for legitimate values (a&b expanded to a%26b but match rejected it) and was inconsistent with every other MCP SDK. The spec's own canonical example file:///{path} requires multi-segment values; Kotlin and C# already decode without rejection and document handler-side validation as the security contract. Path-safety validation remains in ResourceSecurity (configurable) and safe_join (the gold-standard check). The %2F path-traversal attack vector is still blocked: ..%2Fetc%2Fpasswd decodes to ../etc/passwd, which contains_path_traversal rejects. Tests confirm this end-to-end. This aligns us with Kotlin's documented model: decode once, pass to handler, handler validates. --- docs/migration.md | 5 +- docs/server/resources.md | 14 +++-- .../server/mcpserver/resources/templates.py | 16 +++--- src/mcp/shared/uri_template.py | 52 ++++-------------- .../resources/test_resource_template.py | 31 +++++++---- tests/shared/test_uri_template.py | 55 +++++++------------ 6 files changed, 69 insertions(+), 104 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 9ce3f2539..40027c0b3 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -566,8 +566,9 @@ By default, extracted parameter values are now rejected if they: - Contain `..` as a path component (e.g., `..`, `../etc`, `a/../../b`) - Look like an absolute filesystem path (e.g., `/etc/passwd`, `C:\Windows`) -- Decode to contain structural delimiters that their operator forbids - (e.g., `%2F` smuggled into a simple `{name}`) + +These checks apply to the decoded value, so they catch traversal +regardless of encoding (`../etc`, `..%2Fetc`, `%2E%2E/etc` all caught). If your template parameters legitimately contain `..` (e.g., git commit ranges like `HEAD~3..HEAD`) or absolute paths, exempt them: diff --git a/docs/server/resources.md b/docs/server/resources.md index 8d46b2ee2..85d7e6a6b 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -140,8 +140,10 @@ Before your handler runs, the SDK rejects any parameter that: - contains `..` as a path component - looks like an absolute path (`/etc/passwd`, `C:\Windows`) -- smuggles a delimiter through URL encoding (for example, `%2F` in a - plain `{name}` where `/` isn't allowed) + +These checks apply to the decoded value, so they catch traversal +regardless of how it was encoded in the URI (`../etc`, `..%2Fetc`, +`%2E%2E/etc`, `..%5Cetc` all get caught). A request that trips these checks is treated as a non-match: the SDK raises `ResourceError("Unknown resource: {uri}")`, which the client @@ -271,8 +273,8 @@ templates (below) if you have any, then raise for anything else. ### Templates The template engine `MCPServer` uses lives in `mcp.shared.uri_template` -and works on its own. You get the same parsing, matching, and -structural checks; you wire up the routing and policy yourself. +and works on its own. You get the same parsing and matching; you wire +up the routing and security policy yourself. #### Matching requests @@ -306,8 +308,8 @@ server = Server("my-server", on_read_resource=on_read_resource) ``` `UriTemplate.match()` returns the extracted variables or `None`. URL -decoding and the structural checks (rejecting `%2F` in simple `{name}` -and so on) happen inside `match()`, the same as in `MCPServer`. +decoding happens inside `match()`; the decoded values are returned +as-is without path-safety validation. Values come out as strings. Convert them yourself: `int(vars["id"])`, `Path(vars["path"])`, whatever your handler needs. diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index 5bb434f99..c5b5b6f6f 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -25,11 +25,10 @@ class ResourceSecurity: """Security policy applied to extracted resource template parameters. - These checks run **after** :meth:`~mcp.shared.uri_template.UriTemplate.match` - has already enforced structural integrity (e.g., rejected ``%2F`` in - simple ``{var}``). They catch semantic attacks that structural checks - cannot: ``..`` traversal and absolute-path injection work even with - perfectly-formed URI components. + These checks run after :meth:`~mcp.shared.uri_template.UriTemplate.match` + has extracted and decoded parameter values. They catch path-traversal + and absolute-path injection regardless of how the value was encoded in + the URI (literal, ``%2F``, ``%5C``, ``%2E%2E``). Example:: @@ -153,10 +152,9 @@ def from_function( def matches(self, uri: str) -> dict[str, str | list[str]] | None: """Check if a URI matches this template and extract parameters. - Delegates to :meth:`UriTemplate.match` for RFC 6570 matching - with structural integrity (``%2F`` smuggling rejected for simple - vars), then applies this template's :class:`ResourceSecurity` - policy (path traversal, absolute paths). + Delegates to :meth:`UriTemplate.match` for RFC 6570 extraction, + then applies this template's :class:`ResourceSecurity` policy + (path traversal, absolute paths). Returns: Extracted parameters on success, or ``None`` if the URI diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 8f2123658..46394152e 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -75,20 +75,6 @@ class _OperatorSpec: "&": r"[^&#]*", # query-cont value } -# Characters that must not appear in a DECODED value for each operator. -# If %2F smuggles a / into a simple {var}, the decoded value violates -# the template author's declared structure and the match is rejected. -_STRUCTURAL_FORBIDDEN: dict[Operator, frozenset[str]] = { - "": frozenset("/?#&"), - "+": frozenset(), - "#": frozenset(), - ".": frozenset("./?#"), - "/": frozenset("/?#"), - ";": frozenset(";/?#"), - "?": frozenset("&#"), - "&": frozenset("&#"), -} - class InvalidUriTemplate(ValueError): """Raised when a URI template string is malformed or unsupported. @@ -343,16 +329,15 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di """Match a concrete URI against this template and extract variables. This is the inverse of :meth:`expand`. The URI is matched against - a regex derived from the template; captured values are - percent-decoded and validated for structural integrity. + a regex derived from the template and captured values are + percent-decoded. For any value ``v``, ``match(expand({k: v}))`` + returns ``{k: v}``. - **Structural integrity**: decoded values must not contain - characters that are structurally significant for their operator. - A simple ``{name}`` whose value decodes to contain ``/`` is - rejected — if that was intended, the template author should use - ``{+name}``. This blocks the ``%2F``-smuggling vector where a - client encodes a path separator to bypass single-segment - semantics. + Matching is structural at the URI level only: a simple ``{name}`` + will not match across a literal ``/`` in the URI (the regex stops + there), but a percent-encoded ``%2F`` that decodes to ``/`` is + accepted as part of the value. Path-safety validation belongs at + a higher layer; see :mod:`mcp.shared.path_security`. Example:: @@ -361,8 +346,6 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di {'name': 'readme.txt'} >>> t.match("file://docs/hello%20world.txt") {'name': 'hello world.txt'} - >>> t.match("file://docs/..%2Fetc%2Fpasswd") is None # / in simple var - True >>> t = UriTemplate.parse("file://docs/{+path}") >>> t.match("file://docs/src/main.py") @@ -381,8 +364,7 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di Returns: A mapping from variable names to decoded values (``str`` for scalar variables, ``list[str]`` for explode variables), or - ``None`` if the URI does not match the template, a decoded - value violates structural integrity, or the URI exceeds + ``None`` if the URI does not match the template or exceeds ``max_uri_length``. """ if len(uri) > max_uri_length: @@ -395,12 +377,10 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di # One capture group per variable, emitted in template order. for var, raw in zip(self._variables, m.groups()): spec = _OPERATOR_SPECS[var.operator] - forbidden = _STRUCTURAL_FORBIDDEN[var.operator] if var.explode: # Explode capture holds the whole run including separators, - # e.g. "/a/b/c" or ";keys=a;keys=b". Split, decode each - # segment, check each. + # e.g. "/a/b/c" or ";keys=a;keys=b". Split and decode each. if not raw: result[var.name] = [] continue @@ -419,18 +399,10 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di seg = "" else: return None - decoded = unquote(seg) - if any(c in decoded for c in forbidden): - return None - segments.append(decoded) + segments.append(unquote(seg)) result[var.name] = segments else: - decoded = unquote(raw) - # Structural integrity: reject if decoding revealed a - # delimiter the operator doesn't permit. - if any(c in decoded for c in forbidden): - return None - result[var.name] = decoded + result[var.name] = unquote(raw) return result diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 9c84f35f8..e02a8c471 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -26,8 +26,9 @@ def test_matches_rfc6570_reserved_expansion(): assert t.matches("file://docs/src/main.py") == {"path": "src/main.py"} -def test_matches_rejects_encoded_slash_in_simple_var(): - # Path traversal via encoded slash: %2F smuggled into a simple {var} +def test_matches_rejects_encoded_slash_traversal(): + # %2F decodes to / in UriTemplate.match(), giving "../../etc/passwd". + # ResourceSecurity's traversal check then rejects the '..' components. t = _make("file://docs/{name}") assert t.matches("file://docs/..%2F..%2Fetc%2Fpasswd") is None @@ -73,29 +74,35 @@ def test_matches_explode_checks_each_segment(): assert t.matches("api/a/../c") is None -def test_matches_encoded_backslash_caught_by_traversal_layer(): - # %5C decodes to '\\'. Backslash is not a URI delimiter, so it passes - # structural integrity (layer 1). The traversal check (layer 2) - # normalizes '\\' to '/' and catches the '..' components. +def test_matches_encoded_backslash_caught_by_traversal_check(): + # %5C decodes to '\\'. The traversal check normalizes '\\' to '/' + # and catches the '..' components. t = _make("file://docs/{name}") assert t.matches("file://docs/..%5C..%5Csecret") is None -def test_matches_encoded_dots_caught_by_traversal_layer(): - # %2E%2E decodes to '..'. Contains no structural delimiter, so passes - # layer 1. Layer 2's traversal check catches the '..' component. +def test_matches_encoded_dots_caught_by_traversal_check(): + # %2E%2E decodes to '..' which the traversal check rejects. t = _make("file://docs/{name}") assert t.matches("file://docs/%2E%2E") is None def test_matches_mixed_encoded_and_literal_slash(): - # One encoded slash + one literal: literal '/' prevents the regex - # match at layer 0 (simple var stops at '/'), so this never reaches - # decoding. Different failure mode than pure-encoded traversal. + # The literal '/' stops the simple-var regex, so the URI doesn't + # match the template at all. t = _make("file://docs/{name}") assert t.matches("file://docs/..%2F../etc") is None +def test_matches_encoded_slash_without_traversal_allowed(): + # %2F decoding to '/' is fine when there's no traversal involved. + # UriTemplate accepts it; ResourceSecurity only blocks '..' and + # absolute paths. Handlers that need single-segment should use + # safe_join or validate explicitly. + t = _make("file://docs/{name}") + assert t.matches("file://docs/sub%2Ffile.txt") == {"name": "sub/file.txt"} + + def test_matches_escapes_template_literals(): # Regression: old impl treated . as regex wildcard t = _make("data://v1.0/{id}") diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 2d15bdac7..59076edac 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -410,27 +410,26 @@ def test_match_escapes_template_literals(): @pytest.mark.parametrize( - ("template", "uri"), + ("template", "uri", "expected"), [ - # %2F in simple var — encoded-slash path traversal - ("file://docs/{name}", "file://docs/..%2F..%2Fetc%2Fpasswd"), - ("file://docs/{name}", "file://docs/..%2f..%2fetc%2fpasswd"), - # %3F (?) in simple var - ("{var}", "a%3Fb"), - # %2E (.) in label var — would break label structure - ("file{.ext}", "file.a%2Eb"), - # %2F in path-segment var - ("api{/v}", "api/a%2Fb"), - # %26 (&) in query var — would break query structure - ("search{?q}", "search?q=a%26b"), + # Percent-encoded delimiters round-trip through match/expand. + # Path-safety validation belongs to ResourceSecurity, not here. + ("file://docs/{name}", "file://docs/a%2Fb", {"name": "a/b"}), + ("{var}", "a%3Fb", {"var": "a?b"}), + ("{var}", "a%23b", {"var": "a#b"}), + ("{var}", "a%26b", {"var": "a&b"}), + ("file{.ext}", "file.a%2Eb", {"ext": "a.b"}), + ("api{/v}", "api/a%2Fb", {"v": "a/b"}), + ("search{?q}", "search?q=a%26b", {"q": "a&b"}), + ("{;filter}", ";filter=a%3Bb", {"filter": "a;b"}), ], ) -def test_match_structural_integrity_rejects_smuggled_delimiters(template: str, uri: str): - assert UriTemplate.parse(template).match(uri) is None +def test_match_encoded_delimiters_roundtrip(template: str, uri: str, expected: dict[str, str]): + assert UriTemplate.parse(template).match(uri) == expected -def test_match_structural_integrity_allows_slash_in_reserved(): - # {+var} explicitly permits / — structural check must not block it +def test_match_reserved_expansion_handles_slash(): + # {+var} allows literal / (not just encoded) t = UriTemplate.parse("{+path}") assert t.match("a%2Fb") == {"path": "a/b"} assert t.match("a/b") == {"path": "a/b"} @@ -438,26 +437,11 @@ def test_match_structural_integrity_allows_slash_in_reserved(): def test_match_double_encoding_decoded_once(): # %252F is %2F encoded again. Single decode gives "%2F" (a literal - # percent sign, a '2', and an 'F'), which contains no '/' and should - # be accepted. Guards against over-decoding. + # percent sign, a '2', and an 'F'). Guards against over-decoding. t = UriTemplate.parse("file://docs/{name}") assert t.match("file://docs/..%252Fetc") == {"name": "..%2Fetc"} -def test_match_multi_param_one_poisoned_rejects_whole(): - # One bad param in a multi-param template rejects the entire match - t = UriTemplate.parse("file://{org}/{repo}") - assert t.match("file://acme/..%2Fsecret") is None - # But the same template with clean params matches fine - assert t.match("file://acme/project") == {"org": "acme", "repo": "project"} - - -def test_match_bare_encoded_delimiter_rejected(): - # A value that decodes to only the forbidden delimiter - t = UriTemplate.parse("file://docs/{name}") - assert t.match("file://docs/%2F") is None - - def test_match_rejects_oversized_uri(): t = UriTemplate.parse("{var}") assert t.match("x" * 100, max_uri_length=50) is None @@ -478,10 +462,11 @@ def test_match_default_uri_length_limit(): assert t.match("x" * (DEFAULT_MAX_URI_LENGTH + 1)) is None -def test_match_structural_integrity_per_explode_segment(): +def test_match_explode_encoded_separator_in_segment(): + # An encoded separator inside a segment decodes as part of the value, + # not as a split point. The split happens at literal separators only. t = UriTemplate.parse("/files{/path*}") - # Each segment checked independently - assert t.match("/files/a%2Fb/c") is None + assert t.match("/files/a%2Fb/c") == {"path": ["a/b", "c"]} @pytest.mark.parametrize( From 93e742b624b058fcfb8275ecd07821336f41cd40 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 21:29:48 +0000 Subject: [PATCH 17/47] feat: lenient query param matching for {?var} and {&var} UriTemplate.match() now handles trailing {?...}/{&...} expressions via urllib.parse.parse_qs instead of positional regex. Query parameters are matched order-agnostic, partial params are accepted, and unrecognized params are ignored. Parameters absent from the URI stay absent from the result so downstream function defaults apply. This restores the round-trip invariant for query expansion: RFC 6570 skips undefined vars during expand(), so {?q,lang} with only q set produces ?q=foo. Previously match() rejected that output; now it returns {'q': 'foo'}. Templates with a literal ? in the path portion (?fixed=1{&page}) fall back to strict regex matching since the URI split won't align with the template's expression boundary. The docs example at docs/server/resources.md (logs://{service}{?since,level} with Python defaults) now works as documented. --- src/mcp/shared/uri_template.py | 172 ++++++++++++++++++++------ tests/server/mcpserver/test_server.py | 30 +++++ tests/shared/test_uri_template.py | 21 +++- 3 files changed, 184 insertions(+), 39 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 46394152e..2d094b24e 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -16,7 +16,7 @@ from collections.abc import Mapping, Sequence from dataclasses import dataclass, field from typing import Literal, cast -from urllib.parse import quote, unquote +from urllib.parse import parse_qs, quote, unquote __all__ = ["InvalidUriTemplate", "Operator", "UriTemplate", "Variable"] @@ -201,6 +201,8 @@ class UriTemplate: _parts: tuple[_Part, ...] = field(repr=False, compare=False) _variables: tuple[Variable, ...] = field(repr=False, compare=False) _pattern: re.Pattern[str] = field(repr=False, compare=False) + _path_variables: tuple[Variable, ...] = field(repr=False, compare=False) + _query_variables: tuple[Variable, ...] = field(repr=False, compare=False) @staticmethod def is_template(value: str) -> bool: @@ -253,8 +255,22 @@ def parse( ) parts, variables = _parse(template, max_expressions=max_expressions) - pattern = _build_pattern(parts) - return cls(template=template, _parts=parts, _variables=variables, _pattern=pattern) + + # Trailing {?...}/{&...} expressions are matched leniently via + # parse_qs instead of regex: order-agnostic, partial, ignores + # extras. The path portion keeps regex matching. + path_parts, query_vars = _split_query_tail(parts) + path_vars = variables[: len(variables) - len(query_vars)] + pattern = _build_pattern(path_parts) + + return cls( + template=template, + _parts=parts, + _variables=variables, + _pattern=pattern, + _path_variables=path_vars, + _query_variables=query_vars, + ) @property def variables(self) -> tuple[Variable, ...]: @@ -355,6 +371,19 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di >>> t.match("/files/a/b/c") {'path': ['a', 'b', 'c']} + **Query parameters** (``{?q,lang}`` at the end of a template) + are matched leniently: order-agnostic, partial, and unrecognized + params are ignored. Absent params are omitted from the result so + downstream function defaults can apply:: + + >>> t = UriTemplate.parse("logs://{service}{?since,level}") + >>> t.match("logs://api") + {'service': 'api'} + >>> t.match("logs://api?level=error") + {'service': 'api', 'level': 'error'} + >>> t.match("logs://api?level=error&since=5m&utm=x") + {'service': 'api', 'since': '5m', 'level': 'error'} + Args: uri: A concrete URI string. max_uri_length: Maximum permitted length of the input URI. @@ -369,45 +398,116 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di """ if len(uri) > max_uri_length: return None + + if self._query_variables: + # Two-phase: regex matches the path, parse_qs handles the + # query. Query params may be partial, reordered, or include + # extras; absent params stay absent so downstream defaults + # can apply. + path, _, query = uri.partition("?") + m = self._pattern.fullmatch(path) + if m is None: + return None + result = _extract_path(m, self._path_variables) + if result is None: + return None + if query: + parsed = parse_qs(query, keep_blank_values=True) + for var in self._query_variables: + if var.name in parsed: + result[var.name] = parsed[var.name][0] + return result + m = self._pattern.fullmatch(uri) if m is None: return None + return _extract_path(m, self._variables) - result: dict[str, str | list[str]] = {} - # One capture group per variable, emitted in template order. - for var, raw in zip(self._variables, m.groups()): - spec = _OPERATOR_SPECS[var.operator] + def __str__(self) -> str: + return self.template - if var.explode: - # Explode capture holds the whole run including separators, - # e.g. "/a/b/c" or ";keys=a;keys=b". Split and decode each. - if not raw: - result[var.name] = [] + +def _extract_path(m: re.Match[str], variables: tuple[Variable, ...]) -> dict[str, str | list[str]] | None: + """Decode regex capture groups into a variable-name mapping. + + Handles scalar and explode variables. Named explode (``;``) strips + and validates the ``name=`` prefix per item, returning ``None`` on + mismatch. + """ + result: dict[str, str | list[str]] = {} + # One capture group per variable, emitted in template order. + for var, raw in zip(variables, m.groups()): + spec = _OPERATOR_SPECS[var.operator] + + if var.explode: + # Explode capture holds the whole run including separators, + # e.g. "/a/b/c" or ";keys=a;keys=b". Split and decode each. + if not raw: + result[var.name] = [] + continue + segments: list[str] = [] + prefix = f"{var.name}=" + for seg in raw.split(spec.separator): + if not seg: # leading separator produces an empty first item continue - segments: list[str] = [] - prefix = f"{var.name}=" - for seg in raw.split(spec.separator): - if not seg: # leading separator produces an empty first item - continue - if spec.named: - # Named explode emits name=value per item (or bare - # name for ; with empty value). Validate the name - # and strip the prefix before decoding. - if seg.startswith(prefix): - seg = seg[len(prefix) :] - elif seg == var.name: - seg = "" - else: - return None - segments.append(unquote(seg)) - result[var.name] = segments - else: - result[var.name] = unquote(raw) + if spec.named: + # Named explode emits name=value per item (or bare + # name for ; with empty value). Validate the name + # and strip the prefix before decoding. + if seg.startswith(prefix): + seg = seg[len(prefix) :] + elif seg == var.name: + seg = "" + else: + return None + segments.append(unquote(seg)) + result[var.name] = segments + else: + result[var.name] = unquote(raw) - return result + return result - def __str__(self) -> str: - return self.template + +def _split_query_tail( + parts: tuple[_Part, ...], +) -> tuple[tuple[_Part, ...], tuple[Variable, ...]]: + """Separate trailing ``?``/``&`` expressions from the path portion. + + Lenient query matching (order-agnostic, partial, ignores extras) + applies when a template ends with one or more consecutive ``?``/``&`` + expressions and the preceding path portion contains no literal + ``?``. If the path has a literal ``?`` (e.g., ``?fixed=1{&page}``), + the URI's ``?`` split won't align with the template's expression + boundary, so strict regex matching is used instead. + + Returns: + A pair ``(path_parts, query_vars)``. If lenient matching does + not apply, ``query_vars`` is empty and ``path_parts`` is the + full input. + """ + split = len(parts) + for i in range(len(parts) - 1, -1, -1): + part = parts[i] + if isinstance(part, _Expression) and part.operator in ("?", "&"): + split = i + else: + break + + if split == len(parts): + return parts, () + + # If the path portion contains a literal ?, the URI's ? won't align + # with our template split. Fall back to strict regex. + for part in parts[:split]: + if isinstance(part, str) and "?" in part: + return parts, () + + query_vars: list[Variable] = [] + for part in parts[split:]: + assert isinstance(part, _Expression) + query_vars.extend(part.variables) + + return parts[:split], tuple(query_vars) def _build_pattern(parts: tuple[_Part, ...]) -> re.Pattern[str]: @@ -415,8 +515,8 @@ def _build_pattern(parts: tuple[_Part, ...]) -> re.Pattern[str]: Walks parts in order: literals are ``re.escape``'d, expressions become capture groups. One group is emitted per variable, in the - same order as ``UriTemplate._variables``, so ``match.groups()`` can - be zipped directly. + same order as the variables appearing in ``parts``, so + ``match.groups()`` can be zipped directly. Raises: re.error: Only if pattern assembly is buggy — should not happen diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index 050ce7043..6a271c12f 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -159,6 +159,36 @@ async def test_resource_decorator_rejects_malformed_template(self): with pytest.raises(InvalidUriTemplate, match="Unclosed expression"): mcp.resource("file://{name") + async def test_resource_optional_query_params_use_function_defaults(self): + """Omitted {?...} query params should fall through to the + handler's Python defaults. Partial and reordered params work.""" + mcp = MCPServer() + + @mcp.resource("logs://{service}{?since,level}") + def tail_logs(service: str, since: str = "1h", level: str = "info") -> str: + return f"{service}|{since}|{level}" + + async with Client(mcp) as client: + # No query → all defaults + r = await client.read_resource("logs://api") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|1h|info" + + # Partial query → one default + r = await client.read_resource("logs://api?since=15m") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|15m|info" + + # Reordered, both present + r = await client.read_resource("logs://api?level=error&since=5m") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|5m|error" + + # Extra param ignored + r = await client.read_resource("logs://api?since=2h&utm=x") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|2h|info" + async def test_resource_security_default_rejects_traversal(self): mcp = MCPServer() diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 59076edac..698d401df 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -339,11 +339,22 @@ def test_expand_rejects_invalid_value_types(value: object): ("item{;keys*}", "item;keys=a;keys=b", {"keys": ["a", "b"]}), ("item{;keys*}", "item;keys=a;keys;keys=b", {"keys": ["a", "", "b"]}), ("item{;keys*}", "item", {"keys": []}), - # Level 3: query + # Level 3: query. Lenient matching: partial, reordered, and + # extra params are all accepted. Absent params stay absent. ("search{?q}", "search?q=hello", {"q": "hello"}), ("search{?q}", "search?q=", {"q": ""}), + ("search{?q}", "search", {}), ("search{?q,lang}", "search?q=mcp&lang=en", {"q": "mcp", "lang": "en"}), - # Level 3: query continuation + ("search{?q,lang}", "search?lang=en&q=mcp", {"q": "mcp", "lang": "en"}), + ("search{?q,lang}", "search?q=mcp", {"q": "mcp"}), + ("search{?q,lang}", "search", {}), + ("search{?q}", "search?q=mcp&utm=x&ref=y", {"q": "mcp"}), + # URL-encoded query values are decoded + ("search{?q}", "search?q=hello%20world", {"q": "hello world"}), + # Multiple ?/& expressions collected together + ("api{?v}{&page,limit}", "api?limit=10&v=2", {"v": "2", "limit": "10"}), + # Level 3: query continuation with literal ? falls back to + # strict regex (template-order, all-present required) ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), # Explode: path segments as list ("/files{/path*}", "/files/a/b/c", {"path": ["a", "b", "c"]}), @@ -365,7 +376,6 @@ def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): ("file://docs/{name}", "file://other/readme.txt"), ("{a}/{b}", "foo"), ("file{.ext}", "file"), - ("search{?q}", "search"), ("static", "different"), # Anchoring: trailing extra component must not match. Guards # against a refactor from fullmatch() to match() or search(). @@ -483,6 +493,11 @@ def test_match_explode_encoded_separator_in_segment(): ("item{;id}", {"id": ""}), ("item{;keys*}", {"keys": ["a", "b", "c"]}), ("item{;keys*}", {"keys": ["a", "", "b"]}), + # Partial query expansion round-trips: expand omits undefined + # vars, match leaves them absent from the result. + ("logs://{service}{?since,level}", {"service": "api"}), + ("logs://{service}{?since,level}", {"service": "api", "since": "1h"}), + ("logs://{service}{?since,level}", {"service": "api", "since": "1h", "level": "error"}), ], ) def test_roundtrip_expand_then_match(template: str, variables: dict[str, str | list[str]]): From 99c9cb06424be73b49f600af2a2ea1b5fa14fc68 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 21:38:07 +0000 Subject: [PATCH 18/47] fix: tighten parse-time validation and document matching limits Four small corrections: Varname grammar: the RFC grammar requires dots only between varchar groups, so {foo..bar} and {foo.} are now rejected. Previously the regex allowed any dot placement after the first char. Adjacent explodes: previously only same-operator adjacent explodes ({/a*}{/b*}) were rejected. Different operators ({/a*}{.b*}) are equally ambiguous because the first operator's character class typically includes the second's separator, so the first explode greedily consumes both. All adjacent explodes are now rejected; a literal or non-explode variable between them still disambiguates. Documented the inherent ambiguity of multi-var reserved expressions ({+x,y} with commas in values) and the intentional tradeoff that {+var} match stops at ? and # so {+path}{?q} can separate correctly. --- src/mcp/shared/uri_template.py | 53 ++++++++++++++++++++++--------- tests/shared/test_uri_template.py | 40 ++++++++++++++++++----- 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 2d094b24e..14b3210cf 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -8,6 +8,26 @@ Supports Levels 1-3 fully, plus Level 4 explode modifier for path-like operators (``{/var*}``, ``{.var*}``, ``{;var*}``). The Level 4 prefix modifier (``{var:N}``) and query-explode (``{?var*}``) are not supported. + +Known matching limitations +-------------------------- + +Matching is not specified by RFC 6570. A few templates can expand to +URIs that ``match()`` cannot unambiguously reverse: + +* Multi-variable reserved expressions like ``{+x,y}`` use a comma as + separator but also permit commas *inside* values (commas are in the + reserved set). ``match("a,b,c")`` cannot know which comma is the + separator. The matcher takes the last comma as the split point; if + your values contain commas, prefer separate expressions (``{+x}/{+y}``) + or a different operator. + +* Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but + the match pattern stops at those characters so that templates like + ``{+path}{?q}`` can correctly separate path from query. A value + containing a literal ``?`` or ``#`` expands fine but will not + round-trip through ``match()``. Use simple ``{var}`` (which encodes + them) if round-trip matters for such values. """ from __future__ import annotations @@ -25,8 +45,9 @@ _OPERATORS: frozenset[str] = frozenset({"+", "#", ".", "/", ";", "?", "&"}) # RFC 6570 §2.3: varname = varchar *(["."] varchar), varchar = ALPHA / DIGIT / "_" +# Dots appear only between varchar groups — not consecutive, not trailing. # (Percent-encoded varchars are technically allowed but unseen in practice.) -_VARNAME_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.]*$") +_VARNAME_RE = re.compile(r"^[A-Za-z0-9_]+(?:\.[A-Za-z0-9_]+)*$") DEFAULT_MAX_TEMPLATE_LENGTH = 1_000_000 DEFAULT_MAX_EXPRESSIONS = 10_000 @@ -717,33 +738,35 @@ def _check_duplicate_variables(template: str, variables: list[Variable]) -> None def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None: - """Reject templates with adjacent same-operator explode variables. + """Reject templates with adjacent explode variables. Patterns like ``{/a*}{/b*}`` are ambiguous for matching: given - ``/x/y/z``, the split between ``a`` and ``b`` is undetermined. We - reject these at parse time rather than picking an arbitrary - resolution. A literal between them (``{/a*}/x{/b*}``) or a different - operator (``{/a*}{.b*}``) disambiguates. + ``/x/y/z``, the split between ``a`` and ``b`` is undetermined. + Different operators (``{/a*}{.b*}``) do not help in general because + the first operator's character class often includes the second's + separator, so the first explode greedily consumes both. We reject + all adjacent explodes at parse time rather than picking an arbitrary + resolution. A literal between them (``{/a*}/x{/b*}``) still + disambiguates. Raises: - InvalidUriTemplate: If two explode variables with the same - operator appear with no literal or non-explode variable - between them. + InvalidUriTemplate: If two explode variables appear with no + literal or non-explode variable between them. """ - prev_explode_op: Operator | None = None + prev_explode = False for part in parts: if isinstance(part, str): # Literal text breaks any adjacency. - prev_explode_op = None + prev_explode = False continue for var in part.variables: if var.explode: - if prev_explode_op == var.operator: + if prev_explode: raise InvalidUriTemplate( - f"Adjacent explode expressions with operator {var.operator!r} are ambiguous and not supported", + "Adjacent explode expressions are ambiguous for matching and not supported", template=template, ) - prev_explode_op = var.operator + prev_explode = True else: # A non-explode variable also breaks adjacency. - prev_explode_op = None + prev_explode = False diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 698d401df..12c9058cb 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -112,12 +112,28 @@ def test_parse_rejects_operator_without_variable(): UriTemplate.parse("{+}") -@pytest.mark.parametrize("name", ["-bad", "bad-name", "bad name", "bad/name"]) +@pytest.mark.parametrize( + "name", + [ + "-bad", + "bad-name", + "bad name", + "bad/name", + # RFC §2.3: dots only between varchars, not consecutive or trailing + "foo..bar", + "foo.", + ], +) def test_parse_rejects_invalid_varname(name: str): with pytest.raises(InvalidUriTemplate, match="Invalid variable name"): UriTemplate.parse(f"{{{name}}}") +def test_parse_accepts_dotted_varname(): + t = UriTemplate.parse("{a.b.c}") + assert t.variable_names == ("a.b.c",) + + def test_parse_rejects_empty_spec_in_list(): with pytest.raises(InvalidUriTemplate, match="Invalid variable name"): UriTemplate.parse("{a,,b}") @@ -134,9 +150,17 @@ def test_parse_rejects_unsupported_explode(template: str): UriTemplate.parse(template) -def test_parse_rejects_adjacent_explodes_same_operator(): +@pytest.mark.parametrize( + "template", + [ + "{/a*}{/b*}", # same operator + "{/a*}{.b*}", # different operators: / char class includes ., still ambiguous + "{.a*}{;b*}", + ], +) +def test_parse_rejects_adjacent_explodes(template: str): with pytest.raises(InvalidUriTemplate, match="Adjacent explode"): - UriTemplate.parse("{/a*}{/b*}") + UriTemplate.parse(template) @pytest.mark.parametrize( @@ -201,16 +225,16 @@ def test_parse_stray_close_brace_between_expressions(): assert tmpl.variable_names == ("a", "b") -def test_parse_allows_adjacent_explodes_different_operator(): - tmpl = UriTemplate.parse("{/a*}{.b*}") - assert len(tmpl.variables) == 2 - - def test_parse_allows_explode_separated_by_literal(): tmpl = UriTemplate.parse("{/a*}/x{/b*}") assert len(tmpl.variables) == 2 +def test_parse_allows_explode_separated_by_non_explode_var(): + tmpl = UriTemplate.parse("{/a*}{b}{.c*}") + assert len(tmpl.variables) == 3 + + def test_parse_rejects_oversized_template(): with pytest.raises(InvalidUriTemplate, match="maximum length"): UriTemplate.parse("x" * 101, max_length=100) From 80c79343e13a40617e46e81d55ff986d75baecf3 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 21:47:43 +0000 Subject: [PATCH 19/47] fix: preserve pct-triplets in reserved expansion; allow empty match captures Two RFC-conformance fixes: Reserved expansion ({+var}, {#var}) now passes through existing %XX pct-triplets unchanged per RFC 6570 section 3.2.3, while still encoding bare %. Previously quote() double-encoded path%2Fto into path%252Fto. Simple expansion is unchanged (still encodes % unconditionally). Match patterns now use * instead of + quantifiers so defined-but-empty values round-trip. RFC says empty variables still emit the operator prefix: {#section} with section='' expands to '#', but the previous .+ pattern could not match the empty capture after it. All eight operators now consistently accept empty values. The quantifier change affects adjacent-unrestricted-var resolution: {a}{b} matching 'xy' now gives {a: 'xy', b: ''} (greedy first-wins) instead of the previous {a: 'x', b: 'y'} (artifact of + backtracking). Adjacent vars without a separating literal are inherently ambiguous either way; a literal between them ({a}-{b}) still disambiguates. --- src/mcp/shared/uri_template.py | 35 +++++++++++++++++++++++-------- tests/shared/test_uri_template.py | 31 ++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 14b3210cf..8878b9ab8 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -86,11 +86,11 @@ class _OperatorSpec: # the characters that can appear in an expanded value for that operator, # stopping at the next structural delimiter. _MATCH_PATTERN: dict[Operator, str] = { - "": r"[^/?#&,]+", # simple: everything structural is pct-encoded - "+": r"[^?#]+", # reserved: / allowed, stop at query/fragment - "#": r".+", # fragment: tail of URI - ".": r"[^./?#]+", # label: stop at next . - "/": r"[^/?#]+", # path segment: stop at next / + "": r"[^/?#&,]*", # simple: everything structural is pct-encoded + "+": r"[^?#]*", # reserved: / allowed, stop at query/fragment + "#": r".*", # fragment: tail of URI + ".": r"[^./?#]*", # label: stop at next . + "/": r"[^/?#]*", # path segment: stop at next / ";": r"[^;/?#]*", # path-param value (may be empty: ;name) "?": r"[^&#]*", # query value (may be empty: ?name=) "&": r"[^&#]*", # query-cont value @@ -140,15 +140,32 @@ def _is_str_sequence(value: object) -> bool: return all(isinstance(item, str) for item in seq) +_PCT_TRIPLET_RE = re.compile(r"%[0-9A-Fa-f]{2}") + + def _encode(value: str, *, allow_reserved: bool) -> str: """Percent-encode a value per RFC 6570 §3.2.1. Simple expansion encodes everything except unreserved characters. - Reserved expansion ({+var}, {#var}) additionally keeps RFC 3986 - reserved characters intact. + Reserved expansion (``{+var}``, ``{#var}``) additionally keeps + RFC 3986 reserved characters intact and passes through existing + ``%XX`` pct-triplets unchanged (RFC 6570 §3.2.3). A bare ``%`` not + followed by two hex digits is still encoded to ``%25``. """ - safe = _RESERVED if allow_reserved else "" - return quote(value, safe=safe) + if not allow_reserved: + return quote(value, safe="") + + # Reserved expansion: walk the string, pass through triplets as-is, + # quote the gaps between them. A bare % with no triplet lands in a + # gap and gets encoded normally. + out: list[str] = [] + last = 0 + for m in _PCT_TRIPLET_RE.finditer(value): + out.append(quote(value[last : m.start()], safe=_RESERVED)) + out.append(m.group()) + last = m.end() + out.append(quote(value[last:], safe=_RESERVED)) + return "".join(out) def _expand_expression(expr: _Expression, variables: Mapping[str, str | Sequence[str]]) -> str: diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 12c9058cb..ea6d3efc0 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -277,6 +277,16 @@ def test_frozen(): # Level 2: reserved expansion keeps / ? # etc. ("{+var}", {"var": "a/b/c"}, "a/b/c"), ("{+var}", {"var": "a?b#c"}, "a?b#c"), + # RFC §3.2.3: reserved expansion passes through existing + # pct-triplets unchanged; bare % is still encoded. + ("{+var}", {"var": "path%2Fto"}, "path%2Fto"), + ("{+var}", {"var": "50%"}, "50%25"), + ("{+var}", {"var": "50%2"}, "50%252"), + ("{+var}", {"var": "a%2Fb%20c"}, "a%2Fb%20c"), + ("{#var}", {"var": "a%2Fb"}, "#a%2Fb"), + # Simple expansion still encodes % unconditionally (triplet + # preservation is reserved-only). + ("{var}", {"var": "path%2Fto"}, "path%252Fto"), ("file://docs/{+path}", {"path": "src/main.py"}, "file://docs/src/main.py"), # Level 2: fragment ("{#var}", {"var": "section"}, "#section"), @@ -422,12 +432,17 @@ def test_match_no_match(template: str, uri: str): def test_match_adjacent_vars_with_prefix_names(): # Two adjacent simple vars where one name is a prefix of the other. # We use positional capture groups, so names only affect the result - # dict keys, not the regex. Standard greedy matching: the first var - # takes as much as it can while still letting the second satisfy +. + # dict keys, not the regex. Adjacent unrestricted vars are inherently + # ambiguous; greedy * resolution means the first takes everything. t = UriTemplate.parse("{var}{vara}") - assert t.match("ab") == {"var": "a", "vara": "b"} - assert t.match("abc") == {"var": "ab", "vara": "c"} - assert t.match("abcd") == {"var": "abc", "vara": "d"} + assert t.match("ab") == {"var": "ab", "vara": ""} + assert t.match("abcd") == {"var": "abcd", "vara": ""} + + +def test_match_adjacent_vars_disambiguated_by_literal(): + # A literal between vars resolves the ambiguity. + t = UriTemplate.parse("{a}-{b}") + assert t.match("foo-bar") == {"a": "foo", "b": "bar"} def test_match_decodes_percent_encoding(): @@ -515,6 +530,12 @@ def test_match_explode_encoded_separator_in_segment(): ("{var}", {"var": "hello world"}), ("item{;id}", {"id": "42"}), ("item{;id}", {"id": ""}), + # Defined-but-empty values still emit the operator prefix; match + # must accept the empty capture after it. + ("page{#section}", {"section": ""}), + ("file{.ext}", {"ext": ""}), + ("api{/v}", {"v": ""}), + ("x{name}y", {"name": ""}), ("item{;keys*}", {"keys": ["a", "b", "c"]}), ("item{;keys*}", {"keys": ["a", "", "b"]}), # Partial query expansion round-trips: expand omits undefined From 278e5e747eb1314598ea655e0eea27c766fce811 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 21:59:04 +0000 Subject: [PATCH 20/47] refactor: use lists instead of tuples for variable-length sequences Replaces tuple[X, ...] with list[X] throughout UriTemplate internals and public API. The tuples were defensive immutability nobody needed: the dataclass fields are compare=False so they do not participate in hash/eq, and the public properties now return fresh copies so callers cannot mutate internal state. Helper function parameters take Sequence[X] where they only iterate; returns are concrete list[X]. The only remaining tuples are the fixed-arity (pair) return types on _parse and _split_query_tail, which is the correct use of tuple. --- src/mcp/shared/uri_template.py | 40 +++++++++++++++---------------- tests/shared/test_uri_template.py | 22 ++++++++--------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 8878b9ab8..779b225dd 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -121,12 +121,12 @@ class Variable: explode: bool = False -@dataclass(frozen=True) +@dataclass class _Expression: """A parsed ``{...}`` expression: one operator, one or more variables.""" operator: Operator - variables: tuple[Variable, ...] + variables: list[Variable] _Part = str | _Expression @@ -236,11 +236,11 @@ class UriTemplate: """ template: str - _parts: tuple[_Part, ...] = field(repr=False, compare=False) - _variables: tuple[Variable, ...] = field(repr=False, compare=False) + _parts: list[_Part] = field(repr=False, compare=False) + _variables: list[Variable] = field(repr=False, compare=False) _pattern: re.Pattern[str] = field(repr=False, compare=False) - _path_variables: tuple[Variable, ...] = field(repr=False, compare=False) - _query_variables: tuple[Variable, ...] = field(repr=False, compare=False) + _path_variables: list[Variable] = field(repr=False, compare=False) + _query_variables: list[Variable] = field(repr=False, compare=False) @staticmethod def is_template(value: str) -> bool: @@ -311,14 +311,14 @@ def parse( ) @property - def variables(self) -> tuple[Variable, ...]: + def variables(self) -> list[Variable]: """All variables in the template, in order of appearance.""" - return self._variables + return list(self._variables) @property - def variable_names(self) -> tuple[str, ...]: + def variable_names(self) -> list[str]: """All variable names in the template, in order of appearance.""" - return tuple(v.name for v in self._variables) + return [v.name for v in self._variables] def expand(self, variables: Mapping[str, str | Sequence[str]]) -> str: """Expand the template by substituting variable values. @@ -465,7 +465,7 @@ def __str__(self) -> str: return self.template -def _extract_path(m: re.Match[str], variables: tuple[Variable, ...]) -> dict[str, str | list[str]] | None: +def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, str | list[str]] | None: """Decode regex capture groups into a variable-name mapping. Handles scalar and explode variables. Named explode (``;``) strips @@ -506,9 +506,7 @@ def _extract_path(m: re.Match[str], variables: tuple[Variable, ...]) -> dict[str return result -def _split_query_tail( - parts: tuple[_Part, ...], -) -> tuple[tuple[_Part, ...], tuple[Variable, ...]]: +def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: """Separate trailing ``?``/``&`` expressions from the path portion. Lenient query matching (order-agnostic, partial, ignores extras) @@ -532,23 +530,23 @@ def _split_query_tail( break if split == len(parts): - return parts, () + return parts, [] # If the path portion contains a literal ?, the URI's ? won't align # with our template split. Fall back to strict regex. for part in parts[:split]: if isinstance(part, str) and "?" in part: - return parts, () + return parts, [] query_vars: list[Variable] = [] for part in parts[split:]: assert isinstance(part, _Expression) query_vars.extend(part.variables) - return parts[:split], tuple(query_vars) + return parts[:split], query_vars -def _build_pattern(parts: tuple[_Part, ...]) -> re.Pattern[str]: +def _build_pattern(parts: Sequence[_Part]) -> re.Pattern[str]: """Compile a regex that matches URIs produced by this template. Walks parts in order: literals are ``re.escape``'d, expressions @@ -606,7 +604,7 @@ def _expression_pattern(expr: _Expression) -> str: return "".join(pieces) -def _parse(template: str, *, max_expressions: int) -> tuple[tuple[_Part, ...], tuple[Variable, ...]]: +def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Variable]]: """Split a template into an ordered sequence of literals and expressions. Walks the string, alternating between collecting literal runs and @@ -663,7 +661,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[tuple[_Part, ...], t _check_adjacent_explodes(template, parts) _check_duplicate_variables(template, variables) - return tuple(parts), tuple(variables) + return parts, variables def _parse_expression(template: str, body: str, pos: int) -> _Expression: @@ -730,7 +728,7 @@ def _parse_expression(template: str, body: str, pos: int) -> _Expression: variables.append(Variable(name=name, operator=operator, explode=explode)) - return _Expression(operator=operator, variables=tuple(variables)) + return _Expression(operator=operator, variables=variables) def _check_duplicate_variables(template: str, variables: list[Variable]) -> None: diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index ea6d3efc0..401cd41f2 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -7,8 +7,8 @@ def test_parse_literal_only(): tmpl = UriTemplate.parse("file://docs/readme.txt") - assert tmpl.variables == () - assert tmpl.variable_names == () + assert tmpl.variables == [] + assert tmpl.variable_names == [] assert str(tmpl) == "file://docs/readme.txt" @@ -32,8 +32,8 @@ def test_is_template(value: str, expected: bool): def test_parse_simple_variable(): tmpl = UriTemplate.parse("file://docs/{name}") - assert tmpl.variables == (Variable(name="name", operator=""),) - assert tmpl.variable_names == ("name",) + assert tmpl.variables == [Variable(name="name", operator="")] + assert tmpl.variable_names == ["name"] @pytest.mark.parametrize( @@ -57,13 +57,13 @@ def test_parse_all_operators(template: str, operator: str): def test_parse_multiple_variables_in_expression(): tmpl = UriTemplate.parse("{?q,lang,page}") - assert tmpl.variable_names == ("q", "lang", "page") + assert tmpl.variable_names == ["q", "lang", "page"] assert all(v.operator == "?" for v in tmpl.variables) def test_parse_multiple_expressions(): tmpl = UriTemplate.parse("db://{table}/{id}{?format}") - assert tmpl.variable_names == ("table", "id", "format") + assert tmpl.variable_names == ["table", "id", "format"] ops = [v.operator for v in tmpl.variables] assert ops == ["", "", "?"] @@ -84,15 +84,15 @@ def test_parse_explode_supported_operators(template: str): def test_parse_mixed_explode_and_plain(): tmpl = UriTemplate.parse("{/path*}{?q}") - assert tmpl.variables == ( + assert tmpl.variables == [ Variable(name="path", operator="/", explode=True), Variable(name="q", operator="?"), - ) + ] def test_parse_varname_with_dots_and_underscores(): tmpl = UriTemplate.parse("{foo_bar.baz}") - assert tmpl.variable_names == ("foo_bar.baz",) + assert tmpl.variable_names == ["foo_bar.baz"] def test_parse_rejects_unclosed_expression(): @@ -131,7 +131,7 @@ def test_parse_rejects_invalid_varname(name: str): def test_parse_accepts_dotted_varname(): t = UriTemplate.parse("{a.b.c}") - assert t.variable_names == ("a.b.c",) + assert t.variable_names == ["a.b.c"] def test_parse_rejects_empty_spec_in_list(): @@ -222,7 +222,7 @@ def test_parse_treats_stray_close_brace_as_literal(template: str): def test_parse_stray_close_brace_between_expressions(): tmpl = UriTemplate.parse("{a}}{b}") - assert tmpl.variable_names == ("a", "b") + assert tmpl.variable_names == ["a", "b"] def test_parse_allows_explode_separated_by_literal(): From 9473442435db12a9d8c42b09da2526dc04539642 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:06:52 +0000 Subject: [PATCH 21/47] docs: trim migration guide to breaking changes only The resource template migration section was documenting new features alongside behavior changes. Trimmed to the four actual breakages: path-safety checks now applied by default, template literals regex- escaped, lenient query matching, and parse-time validation. New capabilities and best-practice guidance moved to the Resources doc via a link at the end. --- docs/migration.md | 75 +++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 40027c0b3..be4d8f767 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -545,69 +545,48 @@ await client.read_resource("test://resource") await client.read_resource(str(my_any_url)) ``` -### Resource templates: RFC 6570 support and security hardening +### Resource templates: matching behavior changes -Resource template matching has been rewritten to support RFC 6570 URI -templates (Levels 1-3 plus path-style explode) and to apply path-safety -checks to extracted parameters by default. +Resource template matching has been rewritten with RFC 6570 support. +Four behaviors have changed: -**New capabilities:** - -- `{+path}` (reserved expansion) now works — it matches multi-segment - paths like `src/main.py`. Previously only simple `{var}` was supported. -- All Level 3 operators: `{.ext}`, `{/seg}`, `{;param}`, `{?query}`, `{&cont}` -- Path-style explode: `{/path*}` extracts a `list[str]` of segments -- Template literals are now regex-escaped (a `.` in your template no - longer matches any character — this was a bug) - -**Security hardening (may require opt-out):** - -By default, extracted parameter values are now rejected if they: - -- Contain `..` as a path component (e.g., `..`, `../etc`, `a/../../b`) -- Look like an absolute filesystem path (e.g., `/etc/passwd`, `C:\Windows`) - -These checks apply to the decoded value, so they catch traversal -regardless of encoding (`../etc`, `..%2Fetc`, `%2E%2E/etc` all caught). - -If your template parameters legitimately contain `..` (e.g., git commit -ranges like `HEAD~3..HEAD`) or absolute paths, exempt them: +**Path-safety checks applied by default.** Extracted parameter values +containing `..` as a path component or looking like an absolute path +(`/etc/passwd`, `C:\Windows`) now cause the template to not match. +This is checked on the decoded value, so `..%2Fetc` and `%2E%2E` are +caught too. If a parameter legitimately contains these (a git commit +range, a fully-qualified identifier), exempt it: ```python -from mcp.server.mcpserver import MCPServer, ResourceSecurity - -mcp = MCPServer() +from mcp.server.mcpserver import ResourceSecurity @mcp.resource( "git://diff/{+range}", security=ResourceSecurity(exempt_params={"range"}), ) -def git_diff(range: str) -> str: - ... +def git_diff(range: str) -> str: ... ``` -Or relax the policy server-wide: +Note that `..` is only flagged as a standalone path component, so a +value like `v1.0..v2.0` is unaffected. -```python -mcp = MCPServer( - resource_security=ResourceSecurity(reject_path_traversal=False), -) -``` +**Template literals are regex-escaped.** Previously a `.` in your +template matched any character; now it matches only a literal dot. +`data://v1.0/{id}` no longer matches `data://v1X0/42`. -**Filesystem handlers:** even with `{+path}` allowing slashes, you must -still guard against traversal in your handler. Use `safe_join`: +**Query parameters match leniently.** A template like +`search://{q}{?limit}` now matches `search://foo` (with `limit` absent +from the extracted params so your function default applies). Previously +this returned no match. If you relied on all query parameters being +required, add explicit checks in your handler. -```python -from mcp.shared.path_security import safe_join - -@mcp.resource("file://docs/{+path}") -def read_doc(path: str) -> str: - return safe_join("/data/docs", path).read_text() -``` +**Malformed templates fail at decoration time.** Unclosed braces, +duplicate variable names, and unsupported syntax now raise +`InvalidUriTemplate` when the decorator runs, rather than silently +misbehaving at match time. -**Malformed templates now fail at decoration time** with -`InvalidUriTemplate` (a `ValueError` subclass carrying the error -position), rather than silently misbehaving at match time. +See [Resources](server/resources.md) for the full template syntax, +security configuration, and filesystem safety utilities. ### Lowlevel `Server`: constructor parameters are now keyword-only From 60d12e10ee9eb01cfbf352e4dd4290f93627dc39 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:21:32 +0000 Subject: [PATCH 22/47] docs: clarify query leniency and fix exempt_params example Adds a sentence on lenient query matching (order-agnostic, extras ignored, defaults apply) after the logs example. Adds the component-based clarification for the .. check so users know values like HEAD~3..HEAD and v1.0..v2.0 are unaffected. Fixes the exempt_params motivating example in both resources.md and migration.md. The previous git://diff/{+range} example used HEAD~3..HEAD, which the component-based check already passes without exemption. Replaced with inspect://file/{+target} receiving absolute paths, which genuinely requires the opt-out. --- docs/migration.md | 16 ++++++++-------- docs/server/resources.md | 22 ++++++++++++++++------ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index be4d8f767..5fd3518a0 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -554,22 +554,22 @@ Four behaviors have changed: containing `..` as a path component or looking like an absolute path (`/etc/passwd`, `C:\Windows`) now cause the template to not match. This is checked on the decoded value, so `..%2Fetc` and `%2E%2E` are -caught too. If a parameter legitimately contains these (a git commit -range, a fully-qualified identifier), exempt it: +caught too. Note that `..` is only flagged as a standalone path +component, so values like `v1.0..v2.0` or `HEAD~3..HEAD` are unaffected. + +If a parameter legitimately needs to receive absolute paths or +traversal sequences, exempt it: ```python from mcp.server.mcpserver import ResourceSecurity @mcp.resource( - "git://diff/{+range}", - security=ResourceSecurity(exempt_params={"range"}), + "inspect://file/{+target}", + security=ResourceSecurity(exempt_params={"target"}), ) -def git_diff(range: str) -> str: ... +def inspect_file(target: str) -> str: ... ``` -Note that `..` is only flagged as a standalone path component, so a -value like `v1.0..v2.0` is unaffected. - **Template literals are regex-escaped.** Previously a `.` in your template matched any character; now it matches only a literal dot. `data://v1.0/{id}` no longer matches `data://v1X0/42`. diff --git a/docs/server/resources.md b/docs/server/resources.md index 85d7e6a6b..a3cfe51cb 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -97,6 +97,9 @@ Reading `logs://api` uses the defaults. Reading `logs://api?since=15m&level=error` narrows it down. The path identifies *which* resource; the query tunes *how* you read it. +Query params are matched leniently: order doesn't matter, extras are +ignored, and omitted params fall through to your function defaults. + ### Path segments as a list If you want each path segment as a separate list item rather than one @@ -141,6 +144,10 @@ Before your handler runs, the SDK rejects any parameter that: - contains `..` as a path component - looks like an absolute path (`/etc/passwd`, `C:\Windows`) +The `..` check is component-based, not a substring scan. Values like +`v1.0..v2.0` or `HEAD~3..HEAD` pass because `..` is not a standalone +path segment there. + These checks apply to the decoded value, so they catch traversal regardless of how it was encoded in the URI (`../etc`, `..%2Fetc`, `%2E%2E/etc`, `..%5Cetc` all get caught). @@ -174,19 +181,22 @@ client as a `ResourceError`. ### When the defaults get in the way -Sometimes `..` in a parameter is legitimate. A git commit range like -`HEAD~3..HEAD` contains `..` but it's not a path. Exempt that parameter: +Sometimes the checks block legitimate values. An external-tool wrapper +might intentionally receive an absolute path, or a parameter might be a +relative reference like `../sibling` that your handler interprets +safely without touching the filesystem. Exempt that parameter: ```python from mcp.server.mcpserver import ResourceSecurity @mcp.resource( - "git://diff/{+range}", - security=ResourceSecurity(exempt_params={"range"}), + "inspect://file/{+target}", + security=ResourceSecurity(exempt_params={"target"}), ) -def git_diff(range: str) -> str: - return run_git("diff", range) +def inspect_file(target: str) -> str: + # target might be "/usr/bin/python3"; this handler is trusted + return describe_binary(target) ``` Or relax the policy for the whole server: From 2f7fd615ec55585bd0f7c074586e8add394ae6b4 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:38:04 +0000 Subject: [PATCH 23/47] =?UTF-8?q?fix:=20reject=20template=20patterns=20cau?= =?UTF-8?q?sing=20O(n=C2=B2)=20regex=20backtracking?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The [^?#]* match pattern for {+var} and {#var} overlaps with every other operator's character class. When a trailing literal fails to match, the regex engine backtracks through O(n) split points with O(n) rescanning each, yielding quadratic time. A 64KB payload (the default max_uri_length) against {+prefix}{/path*}/END consumed ~25s CPU per request. Two conditions trigger the quadratic case, now both rejected at parse time: 1. {+var} immediately adjacent to any expression ({+a}{b}, {+a}{/b*}) 2. Two {+var}/{#var} anywhere in the template, even with a literal between them ({+a}/x/{+b}) since [^?#]* matches the literal too What remains permitted: - {+path} at end of template (the flagship use case) - {+path}.txt or {+path}/edit (literal suffix, linear backtracking) - {+path}{?v,page} (query expressions stripped before pattern build) - {+a}/sep/{b} (literal + bounded expression, disambiguated) The _check_adjacent_explodes function is generalized to _check_ambiguous_adjacency covering both explode adjacency and the new reserved-expansion constraints. --- src/mcp/shared/uri_template.py | 100 +++++++++++++++++++++--------- tests/shared/test_uri_template.py | 52 +++++++++++++++- 2 files changed, 121 insertions(+), 31 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 779b225dd..a2f9b2a54 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -15,12 +15,15 @@ Matching is not specified by RFC 6570. A few templates can expand to URIs that ``match()`` cannot unambiguously reverse: -* Multi-variable reserved expressions like ``{+x,y}`` use a comma as - separator but also permit commas *inside* values (commas are in the - reserved set). ``match("a,b,c")`` cannot know which comma is the - separator. The matcher takes the last comma as the split point; if - your values contain commas, prefer separate expressions (``{+x}/{+y}``) - or a different operator. +* Reserved/fragment expressions (``{+var}``, ``{#var}``) are restricted + to positions that avoid quadratic-time backtracking: at most one per + template, and not immediately adjacent to another expression. The + ``[^?#]*`` pattern overlaps with every other operator's character + class; a failing match against ``{+a}{b}`` or ``{+a}/x/{+b}`` + backtracks O(n²). Use a literal separator before a bounded + expression (``{+a}/sep/{b}``) or put the reserved expression last + (``file://docs/{+path}``). Trailing ``{?...}``/``{&...}`` query + expressions are always fine since they're matched via ``parse_qs``. * Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but the match pattern stops at those characters so that templates like @@ -615,7 +618,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va Raises: InvalidUriTemplate: On unclosed braces, too many expressions, or any error surfaced by :func:`_parse_expression` or - :func:`_check_adjacent_explodes`. + :func:`_check_ambiguous_adjacency`. """ parts: list[_Part] = [] variables: list[Variable] = [] @@ -659,7 +662,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va # Advance past the closing brace. i = end + 1 - _check_adjacent_explodes(template, parts) + _check_ambiguous_adjacency(template, parts) _check_duplicate_variables(template, variables) return parts, variables @@ -752,36 +755,73 @@ def _check_duplicate_variables(template: str, variables: list[Variable]) -> None seen.add(var.name) -def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None: - """Reject templates with adjacent explode variables. +def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: + """Reject templates where adjacent expressions would cause ambiguous or quadratic matching. - Patterns like ``{/a*}{/b*}`` are ambiguous for matching: given - ``/x/y/z``, the split between ``a`` and ``b`` is undetermined. - Different operators (``{/a*}{.b*}``) do not help in general because - the first operator's character class often includes the second's - separator, so the first explode greedily consumes both. We reject - all adjacent explodes at parse time rather than picking an arbitrary - resolution. A literal between them (``{/a*}/x{/b*}``) still - disambiguates. + Two patterns are rejected: + + 1. Adjacent explode variables (``{/a*}{/b*}``): the split between + ``a`` and ``b`` in ``/x/y/z`` is undetermined. Different + operators don't help since character classes overlap. + + 2. Reserved/fragment expansion in a position that causes quadratic + backtracking. The ``[^?#]*`` pattern for ``+`` and ``#`` + overlaps with every other operator's character class, so when a + trailing match fails the engine backtracks through O(n) split + points. Two conditions trigger this: + + - ``{+var}`` immediately adjacent to any expression + (``{+a}{b}``, ``{+a}{/b*}``) + - Two ``{+var}``/``{#var}`` anywhere in the path, even with a + literal between them (``{+a}/x/{+b}``) — the literal does not + disambiguate since ``[^?#]*`` matches it too + + A 64KB payload against either can consume tens of seconds of CPU. + + Trailing ``{?...}``/``{&...}`` expressions are handled via + ``parse_qs`` outside the path regex, so they do not count against + any check. Raises: - InvalidUriTemplate: If two explode variables appear with no - literal or non-explode variable between them. + InvalidUriTemplate: If any pattern is detected. """ prev_explode = False + prev_reserved = False + seen_reserved = False for part in parts: if isinstance(part, str): - # Literal text breaks any adjacency. + # A literal breaks immediate adjacency but does not reset + # the seen-reserved count: [^?#]* matches most literals. prev_explode = False + prev_reserved = False continue for var in part.variables: - if var.explode: - if prev_explode: - raise InvalidUriTemplate( - "Adjacent explode expressions are ambiguous for matching and not supported", - template=template, - ) - prev_explode = True - else: - # A non-explode variable also breaks adjacency. + # ?/& are stripped before pattern building and never reach + # the path regex. + if var.operator in ("?", "&"): prev_explode = False + prev_reserved = False + continue + + if prev_reserved: + raise InvalidUriTemplate( + "{+var} or {#var} immediately followed by another expression " + "causes quadratic-time matching; separate them with a literal", + template=template, + ) + if var.operator in ("+", "#") and seen_reserved: + raise InvalidUriTemplate( + "Multiple {+var} or {#var} expressions in one template cause " + "quadratic-time matching even with literals between them", + template=template, + ) + if var.explode and prev_explode: + raise InvalidUriTemplate( + "Adjacent explode expressions are ambiguous for matching and not supported", + template=template, + ) + + prev_explode = var.explode + prev_reserved = var.operator in ("+", "#") + if prev_reserved: + seen_reserved = True diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 401cd41f2..c4365bb5c 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -163,6 +163,55 @@ def test_parse_rejects_adjacent_explodes(template: str): UriTemplate.parse(template) +@pytest.mark.parametrize( + "template", + [ + # {+var} immediately adjacent to any expression + "{+a}{b}", + "{+a}{/b}", + "{+a}{/b*}", + "{+a}{.b}", + "{+a}{;b}", + "{#a}{b}", + "{+a,b}", # multi-var in one expression: same adjacency + "prefix/{+path}{.ext}", # literal before doesn't help + # Two {+var}/{#var} anywhere, even with literals between + "{+a}/x/{+b}", + "{+a},{+b}", + "{#a}/x/{+b}", + "{+a}.foo.{#b}", + ], +) +def test_parse_rejects_reserved_quadratic_patterns(template: str): + # These patterns cause O(n²) regex backtracking when a trailing + # literal fails to match. Rejecting at parse time eliminates the + # ReDoS vector at the source. + with pytest.raises(InvalidUriTemplate, match="quadratic"): + UriTemplate.parse(template) + + +@pytest.mark.parametrize( + "template", + [ + "file://docs/{+path}", # + at end of template + "file://{+path}.txt", # + followed by literal only + "file://{+path}/edit", # + followed by literal only + "api/{+path}{?v,page}", # + followed by query (stripped before regex) + "api/{+path}{&next}", # + followed by query-continuation + "page{#section}", # # at end + "{a}{+b}", # + preceded by expression is fine; only following matters + "{+a}/sep/{b}", # literal + bounded expression after: linear + "{+a},{b}", # same: literal disambiguates when second is bounded + ], +) +def test_parse_allows_reserved_in_safe_positions(template: str): + # These do not exhibit quadratic backtracking: end-of-template, + # literal + bounded expression, or trailing query expression + # (handled by parse_qs outside the path regex). + t = UriTemplate.parse(template) + assert t is not None + + @pytest.mark.parametrize( "template", ["{x}/{x}", "{x,x}", "{a}{b}{a}", "{+x}/foo/{x}"], @@ -306,7 +355,8 @@ def test_frozen(): ("?a=1{&b}", {"b": "2"}, "?a=1&b=2"), # Multi-var in one expression ("{x,y}", {"x": "1", "y": "2"}, "1,2"), - ("{+x,y}", {"x": "a/b", "y": "c/d"}, "a/b,c/d"), + # {+x,y} is rejected at parse time (quadratic backtracking + + # inherent ambiguity). Use {+x}/{+y} with a literal separator. # Sequence values, non-explode (comma-join) ("{/list}", {"list": ["a", "b", "c"]}, "/a,b,c"), ("{?list}", {"list": ["a", "b"]}, "?list=a,b"), From aed579c8a3315c154c60731c6c8dd109fb5a952f Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:41:25 +0000 Subject: [PATCH 24/47] docs: address reviewer feedback on migration guide and resources doc migration.md: added note that static URIs with Context-only handlers now error at decoration time. The pattern was previously silently unreachable (the resource registered but could never be read); now it surfaces early. Duplicate-variable-names rejection was already covered in the malformed-templates paragraph. resources.md: clarified that the .. check is depth-based (rejects values that would escape the starting directory, so a/../b passes). Changed template reference table intro from 'what the SDK supports' to 'the most common patterns' since the table intentionally omits the rarely-used fragment and path-param operators. test_uri_template.py: corrected the stray-} test comment. RFC 6570 section 2.1 strictly excludes } from literals; we accept it for TypeScript SDK parity, not because the RFC is lenient. --- docs/migration.md | 8 ++++++++ docs/server/resources.md | 6 +++--- tests/shared/test_uri_template.py | 6 ++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 5fd3518a0..8bc030804 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -585,6 +585,14 @@ duplicate variable names, and unsupported syntax now raise `InvalidUriTemplate` when the decorator runs, rather than silently misbehaving at match time. +**Static URIs with Context-only handlers now error.** A non-template +URI paired with a handler that takes only a `Context` parameter +previously registered but was silently unreachable (the resource +could never be read). This now raises `ValueError` at decoration time. +Context injection for static resources is planned; until then, use a +template with at least one variable or access context through other +means. + See [Resources](server/resources.md) for the full template syntax, security configuration, and filesystem safety utilities. diff --git a/docs/server/resources.md b/docs/server/resources.md index a3cfe51cb..e774c6ded 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -118,7 +118,7 @@ def walk_tree(path: list[str]) -> dict: ### Template reference The template syntax follows [RFC 6570](https://datatracker.ietf.org/doc/html/rfc6570). -Here's what the SDK supports: +The most common patterns: | Pattern | Example input | You get | |--------------|-----------------------|-------------------------| @@ -141,7 +141,7 @@ or database operations, a hostile client can try path traversal Before your handler runs, the SDK rejects any parameter that: -- contains `..` as a path component +- would escape its starting directory via `..` components - looks like an absolute path (`/etc/passwd`, `C:\Windows`) The `..` check is component-based, not a substring scan. Values like @@ -211,7 +211,7 @@ The configurable checks: | Setting | Default | What it does | |-------------------------|---------|-------------------------------------| -| `reject_path_traversal` | `True` | Rejects `..` as a path component | +| `reject_path_traversal` | `True` | Rejects `..` sequences that escape the starting directory | | `reject_absolute_paths` | `True` | Rejects `/foo`, `C:\foo`, UNC paths | | `exempt_params` | empty | Parameter names to skip checks for | diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index c4365bb5c..6c542c517 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -263,8 +263,10 @@ def test_parse_rejects_unclosed_brace(template: str, position: int): ["}}", "}", "a}b", "{a}}{b}"], ) def test_parse_treats_stray_close_brace_as_literal(template: str): - # RFC 6570 is lenient about } outside expressions; most implementations - # (including the TypeScript SDK) treat it as a literal rather than erroring. + # RFC 6570 §2.1 strictly excludes } from literals, but we accept it + # for TypeScript SDK parity. A stray } almost always indicates a + # typo; rejecting would be more helpful but would also break + # cross-SDK behavior. tmpl = UriTemplate.parse(template) assert str(tmpl) == template From 7891fd9d63bc5b7aba816a5f1e3437d8fe4501a6 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:55:10 +0000 Subject: [PATCH 25/47] test: close coverage gaps in lenient-query branch and test handlers Adds two no-match cases for the lenient-query code path in UriTemplate.match(): path regex failing when query vars are present, and ; explode name mismatch in the path portion before a {?...} expression. Adds a passing case to test_resource_security_default_rejects_traversal so the handler body executes (the test previously only sent rejected URIs, leaving the handler uncovered). Replaces the _make helper's unreachable return with raise NotImplementedError since those tests only exercise matches(). --- tests/server/mcpserver/resources/test_resource_template.py | 2 +- tests/server/mcpserver/test_server.py | 5 +++++ tests/shared/test_uri_template.py | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index e02a8c471..97808dc37 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -15,7 +15,7 @@ def _make(uri_template: str, security: ResourceSecurity = DEFAULT_RESOURCE_SECURITY) -> ResourceTemplate: def handler(**kwargs: Any) -> str: - return "ok" + raise NotImplementedError # these tests only exercise matches() return ResourceTemplate.from_function(fn=handler, uri_template=uri_template, security=security) diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index 6a271c12f..85d6dd819 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -197,6 +197,11 @@ def get_item(name: str) -> str: return f"item:{name}" async with Client(mcp) as client: + # Safe value passes through to the handler + r = await client.read_resource("data://items/widget") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "item:widget" + # ".." as a path component is rejected by default policy with pytest.raises(MCPError, match="Unknown resource"): await client.read_resource("data://items/..") diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 6c542c517..4fb47147f 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -475,6 +475,10 @@ def test_match(template: str, uri: str, expected: dict[str, str | list[str]]): # ; explode: wrong parameter name in any segment rejects the match ("item{;keys*}", "item;admin=true"), ("item{;keys*}", "item;keys=a;admin=true"), + # Lenient-query branch: path portion fails to match + ("api/{name}{?q}", "wrong/path?q=x"), + # Lenient-query branch: ; explode name mismatch in path portion + ("item{;keys*}{?q}", "item;wrong=x?q=1"), ], ) def test_match_no_match(template: str, uri: str): From 1500ca3a452a47a2cfda43cce60fe329b188b525 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 22:59:38 +0000 Subject: [PATCH 26/47] fix: correct lenient query matching for +, fragments, and standalone {&var} Three fixes to the two-phase query matching path: Replaced parse_qs with a manual &/= split using unquote(). parse_qs follows application/x-www-form-urlencoded semantics where + decodes to space, but RFC 6570 and RFC 3986 treat + as a literal sub-delim. A client sending ?q=C++ previously got 'C '; the path-portion decoder (unquote) already handled this correctly, so the two code paths disagreed. Fragment is now stripped before splitting on ?. A URI like logs://api?level=error#section1 previously returned level='error#section1' via the lenient path while the strict-regex path correctly stopped at #. _split_query_tail now requires the trailing tail to start with a {?...} expression. A standalone {&page} expands with an & prefix (no ?), so partition('?') found no split and the path regex failed. Such templates now fall through to strict regex which handles them correctly. Also extends the path-portion check to bail on {?...} expressions left in the path, not just literal ?. --- src/mcp/shared/uri_template.py | 56 +++++++++++++++++++++++++------ tests/shared/test_uri_template.py | 9 +++++ 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index a2f9b2a54..e10ba90cf 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -39,7 +39,7 @@ from collections.abc import Mapping, Sequence from dataclasses import dataclass, field from typing import Literal, cast -from urllib.parse import parse_qs, quote, unquote +from urllib.parse import quote, unquote __all__ = ["InvalidUriTemplate", "Operator", "UriTemplate", "Variable"] @@ -441,11 +441,13 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di return None if self._query_variables: - # Two-phase: regex matches the path, parse_qs handles the - # query. Query params may be partial, reordered, or include - # extras; absent params stay absent so downstream defaults - # can apply. - path, _, query = uri.partition("?") + # Two-phase: regex matches the path, the query is split and + # decoded manually. Query params may be partial, reordered, + # or include extras; absent params stay absent so downstream + # defaults can apply. Fragment is stripped first since the + # template's {?...} tail never describes a fragment. + before_fragment, _, _ = uri.partition("#") + path, _, query = before_fragment.partition("?") m = self._pattern.fullmatch(path) if m is None: return None @@ -453,10 +455,10 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di if result is None: return None if query: - parsed = parse_qs(query, keep_blank_values=True) + parsed = _parse_query(query) for var in self._query_variables: if var.name in parsed: - result[var.name] = parsed[var.name][0] + result[var.name] = parsed[var.name] return result m = self._pattern.fullmatch(uri) @@ -468,6 +470,26 @@ def __str__(self) -> str: return self.template +def _parse_query(query: str) -> dict[str, str]: + """Parse a query string into a name→value mapping. + + Unlike ``urllib.parse.parse_qs``, this follows RFC 3986 semantics: + ``+`` is a literal sub-delim, not a space. Form-urlencoding treats + ``+`` as space for HTML form submissions, but RFC 6570 and MCP + resource URIs follow RFC 3986 where only ``%20`` encodes a space. + + Duplicate keys keep the first value. Pairs without ``=`` are + treated as empty-valued. + """ + result: dict[str, str] = {} + for pair in query.split("&"): + name, _, value = pair.partition("=") + name = unquote(name) + if name and name not in result: + result[name] = unquote(value) + return result + + def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, str | list[str]] | None: """Decode regex capture groups into a variable-name mapping. @@ -535,10 +557,22 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: if split == len(parts): return parts, [] - # If the path portion contains a literal ?, the URI's ? won't align - # with our template split. Fall back to strict regex. + # The tail must start with a {?...} expression so that expand() + # emits a ? the URI can split on. A standalone {&page} expands + # with an & prefix, which partition("?") won't find. + first = parts[split] + assert isinstance(first, _Expression) + if first.operator != "?": + return parts, [] + + # If the path portion contains a literal ? or a {?...} expression, + # the URI's ? split won't align with our template boundary. Fall + # back to strict regex. for part in parts[:split]: - if isinstance(part, str) and "?" in part: + if isinstance(part, str): + if "?" in part: + return parts, [] + elif part.operator == "?": return parts, [] query_vars: list[Variable] = [] diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 4fb47147f..f98c6cbbe 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -437,8 +437,17 @@ def test_expand_rejects_invalid_value_types(value: object): ("search{?q}", "search?q=mcp&utm=x&ref=y", {"q": "mcp"}), # URL-encoded query values are decoded ("search{?q}", "search?q=hello%20world", {"q": "hello world"}), + # + is a literal sub-delim per RFC 3986, not a space (form-encoding) + ("search{?q}", "search?q=C++", {"q": "C++"}), + ("search{?q}", "search?q=1.0+build.5", {"q": "1.0+build.5"}), + # Fragment is stripped before query parsing + ("logs://{service}{?level}", "logs://api?level=error#section1", {"service": "api", "level": "error"}), + ("search{?q}", "search#frag", {}), # Multiple ?/& expressions collected together ("api{?v}{&page,limit}", "api?limit=10&v=2", {"v": "2", "limit": "10"}), + # Standalone {&var} falls through to strict regex (expands with + # & prefix, no ? for lenient matching to split on) + ("api{&page}", "api&page=2", {"page": "2"}), # Level 3: query continuation with literal ? falls back to # strict regex (template-order, all-present required) ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), From 4a45f59b7aea5137dea7a461898e98b77e2a3852 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:00:24 +0000 Subject: [PATCH 27/47] docs: fix stale docstrings and export DEFAULT_MAX_* constants match() docstring: qualified the round-trip claim with the RFC section 1.4 caveat that values containing their operator's separator unencoded do not round-trip (e.g. {.ext} with 'tar.gz'). resource() decorator docstring: removed the 'or the function has parameters' clause which commit 674783f made stale; template/static is now decided purely by URI variables. Added DEFAULT_MAX_TEMPLATE_LENGTH, DEFAULT_MAX_EXPRESSIONS, and DEFAULT_MAX_URI_LENGTH to __all__ to match the stated intent that these are part of the public API. Moved DEFAULT_MAX_URI_LENGTH import in test file from function body to top-level per repo convention. --- src/mcp/server/mcpserver/server.py | 5 +++-- src/mcp/shared/uri_template.py | 18 +++++++++++++++--- tests/shared/test_uri_template.py | 4 +--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mcp/server/mcpserver/server.py b/src/mcp/server/mcpserver/server.py index 31018fbc9..4a5462fe9 100644 --- a/src/mcp/server/mcpserver/server.py +++ b/src/mcp/server/mcpserver/server.py @@ -644,8 +644,9 @@ def resource( - bytes for binary content - other types will be converted to JSON - If the URI contains parameters (e.g. "resource://{param}") or the function - has parameters, it will be registered as a template resource. + If the URI contains parameters (e.g. "resource://{param}"), it is + registered as a template resource. Otherwise it is registered as a + static resource; function parameters on a static URI raise an error. Args: uri: URI for the resource (e.g. "resource://my-resource" or "resource://{param}") diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index e10ba90cf..6217137bb 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -41,7 +41,15 @@ from typing import Literal, cast from urllib.parse import quote, unquote -__all__ = ["InvalidUriTemplate", "Operator", "UriTemplate", "Variable"] +__all__ = [ + "DEFAULT_MAX_EXPRESSIONS", + "DEFAULT_MAX_TEMPLATE_LENGTH", + "DEFAULT_MAX_URI_LENGTH", + "InvalidUriTemplate", + "Operator", + "UriTemplate", + "Variable", +] Operator = Literal["", "+", "#", ".", "/", ";", "?", "&"] @@ -387,8 +395,12 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di This is the inverse of :meth:`expand`. The URI is matched against a regex derived from the template and captured values are - percent-decoded. For any value ``v``, ``match(expand({k: v}))`` - returns ``{k: v}``. + percent-decoded. The round-trip ``match(expand({k: v})) == {k: v}`` + holds when ``v`` does not contain its operator's separator + unencoded: ``{.ext}`` with ``ext="tar.gz"`` expands to + ``.tar.gz`` but matches back as ``ext="tar"`` since the ``.`` + pattern stops at the first dot. RFC 6570 §1.4 notes this is an + inherent reversal limitation. Matching is structural at the URI level only: a simple ``{name}`` will not match across a literal ``/`` in the URI (the regex stops diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index f98c6cbbe..a12cc601c 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -2,7 +2,7 @@ import pytest -from mcp.shared.uri_template import InvalidUriTemplate, UriTemplate, Variable +from mcp.shared.uri_template import DEFAULT_MAX_URI_LENGTH, InvalidUriTemplate, UriTemplate, Variable def test_parse_literal_only(): @@ -567,8 +567,6 @@ def test_match_accepts_uri_within_custom_limit(): def test_match_default_uri_length_limit(): - from mcp.shared.uri_template import DEFAULT_MAX_URI_LENGTH - t = UriTemplate.parse("{+var}") # Just at the limit: should match assert t.match("x" * DEFAULT_MAX_URI_LENGTH) is not None From 2bedd9d79212b1b069cae54df55e936b991a3d04 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:07:06 +0000 Subject: [PATCH 28/47] test: move new resource tests to module level per repo convention The eight resource-template tests added in this PR were placed inside the legacy TestServer and TestContextInjection classes to match surrounding code, but the repo convention is standalone module-level functions. Moved to the bottom of the file alongside the existing standalone tests. --- tests/server/mcpserver/test_server.py | 240 +++++++++++++------------- 1 file changed, 124 insertions(+), 116 deletions(-) diff --git a/tests/server/mcpserver/test_server.py b/tests/server/mcpserver/test_server.py index 85d6dd819..183c32c1c 100644 --- a/tests/server/mcpserver/test_server.py +++ b/tests/server/mcpserver/test_server.py @@ -142,99 +142,6 @@ async def test_add_resource_decorator_incorrect_usage(self): def get_data(x: str) -> str: # pragma: no cover return f"Data: {x}" - async def test_resource_decorator_rfc6570_reserved_expansion(self): - # Regression: old regex-based param extraction couldn't see `path` - # in `{+path}` and failed with a confusing mismatch error. - mcp = MCPServer() - - @mcp.resource("file://docs/{+path}") - def read_doc(path: str) -> str: - raise NotImplementedError - - templates = await mcp.list_resource_templates() - assert [t.uri_template for t in templates] == ["file://docs/{+path}"] - - async def test_resource_decorator_rejects_malformed_template(self): - mcp = MCPServer() - with pytest.raises(InvalidUriTemplate, match="Unclosed expression"): - mcp.resource("file://{name") - - async def test_resource_optional_query_params_use_function_defaults(self): - """Omitted {?...} query params should fall through to the - handler's Python defaults. Partial and reordered params work.""" - mcp = MCPServer() - - @mcp.resource("logs://{service}{?since,level}") - def tail_logs(service: str, since: str = "1h", level: str = "info") -> str: - return f"{service}|{since}|{level}" - - async with Client(mcp) as client: - # No query → all defaults - r = await client.read_resource("logs://api") - assert isinstance(r.contents[0], TextResourceContents) - assert r.contents[0].text == "api|1h|info" - - # Partial query → one default - r = await client.read_resource("logs://api?since=15m") - assert isinstance(r.contents[0], TextResourceContents) - assert r.contents[0].text == "api|15m|info" - - # Reordered, both present - r = await client.read_resource("logs://api?level=error&since=5m") - assert isinstance(r.contents[0], TextResourceContents) - assert r.contents[0].text == "api|5m|error" - - # Extra param ignored - r = await client.read_resource("logs://api?since=2h&utm=x") - assert isinstance(r.contents[0], TextResourceContents) - assert r.contents[0].text == "api|2h|info" - - async def test_resource_security_default_rejects_traversal(self): - mcp = MCPServer() - - @mcp.resource("data://items/{name}") - def get_item(name: str) -> str: - return f"item:{name}" - - async with Client(mcp) as client: - # Safe value passes through to the handler - r = await client.read_resource("data://items/widget") - assert isinstance(r.contents[0], TextResourceContents) - assert r.contents[0].text == "item:widget" - - # ".." as a path component is rejected by default policy - with pytest.raises(MCPError, match="Unknown resource"): - await client.read_resource("data://items/..") - - async def test_resource_security_per_resource_override(self): - mcp = MCPServer() - - @mcp.resource( - "git://diff/{+range}", - security=ResourceSecurity(exempt_params={"range"}), - ) - def git_diff(range: str) -> str: - return f"diff:{range}" - - async with Client(mcp) as client: - # "../foo" would be rejected by default, but "range" is exempt - result = await client.read_resource("git://diff/../foo") - assert isinstance(result.contents[0], TextResourceContents) - assert result.contents[0].text == "diff:../foo" - - async def test_resource_security_server_wide_override(self): - mcp = MCPServer(resource_security=ResourceSecurity(reject_path_traversal=False)) - - @mcp.resource("data://items/{name}") - def get_item(name: str) -> str: - return f"item:{name}" - - async with Client(mcp) as client: - # Server-wide policy disabled traversal check; ".." now allowed - result = await client.read_resource("data://items/..") - assert isinstance(result.contents[0], TextResourceContents) - assert result.contents[0].text == "item:.." - class TestDnsRebindingProtection: """Tests for automatic DNS rebinding protection on localhost. @@ -1227,29 +1134,6 @@ def resource_with_context(name: str, ctx: Context) -> str: # Should have either request_id or indication that context was injected assert "Resource test - context injected" == content.text - async def test_static_resource_with_context_param_errors(self): - """A non-template URI with a Context-only handler should error - at decoration time with a clear message, not silently register - an unreachable resource.""" - mcp = MCPServer() - - with pytest.raises(ValueError, match="Context injection for static resources is not yet supported"): - - @mcp.resource("weather://current") - def current_weather(ctx: Context) -> str: - raise NotImplementedError - - async def test_static_resource_with_extra_params_errors(self): - """A non-template URI with non-Context params should error at - decoration time.""" - mcp = MCPServer() - - with pytest.raises(ValueError, match="has no URI template variables"): - - @mcp.resource("data://fixed") - def get_data(name: str) -> str: - raise NotImplementedError - async def test_resource_without_context(self): """Test that resources without context work normally.""" mcp = MCPServer() @@ -1536,6 +1420,130 @@ def prompt_fn(name: str) -> str: ... # pragma: no branch await client.get_prompt("prompt_fn") +async def test_resource_decorator_rfc6570_reserved_expansion(): + # Regression: old regex-based param extraction couldn't see `path` + # in `{+path}` and failed with a confusing mismatch error. + mcp = MCPServer() + + @mcp.resource("file://docs/{+path}") + def read_doc(path: str) -> str: + raise NotImplementedError + + templates = await mcp.list_resource_templates() + assert [t.uri_template for t in templates] == ["file://docs/{+path}"] + + +async def test_resource_decorator_rejects_malformed_template(): + mcp = MCPServer() + with pytest.raises(InvalidUriTemplate, match="Unclosed expression"): + mcp.resource("file://{name") + + +async def test_resource_optional_query_params_use_function_defaults(): + """Omitted {?...} query params should fall through to the + handler's Python defaults. Partial and reordered params work.""" + mcp = MCPServer() + + @mcp.resource("logs://{service}{?since,level}") + def tail_logs(service: str, since: str = "1h", level: str = "info") -> str: + return f"{service}|{since}|{level}" + + async with Client(mcp) as client: + # No query → all defaults + r = await client.read_resource("logs://api") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|1h|info" + + # Partial query → one default + r = await client.read_resource("logs://api?since=15m") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|15m|info" + + # Reordered, both present + r = await client.read_resource("logs://api?level=error&since=5m") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|5m|error" + + # Extra param ignored + r = await client.read_resource("logs://api?since=2h&utm=x") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "api|2h|info" + + +async def test_resource_security_default_rejects_traversal(): + mcp = MCPServer() + + @mcp.resource("data://items/{name}") + def get_item(name: str) -> str: + return f"item:{name}" + + async with Client(mcp) as client: + # Safe value passes through to the handler + r = await client.read_resource("data://items/widget") + assert isinstance(r.contents[0], TextResourceContents) + assert r.contents[0].text == "item:widget" + + # ".." as a path component is rejected by default policy + with pytest.raises(MCPError, match="Unknown resource"): + await client.read_resource("data://items/..") + + +async def test_resource_security_per_resource_override(): + mcp = MCPServer() + + @mcp.resource( + "git://diff/{+range}", + security=ResourceSecurity(exempt_params={"range"}), + ) + def git_diff(range: str) -> str: + return f"diff:{range}" + + async with Client(mcp) as client: + # "../foo" would be rejected by default, but "range" is exempt + result = await client.read_resource("git://diff/../foo") + assert isinstance(result.contents[0], TextResourceContents) + assert result.contents[0].text == "diff:../foo" + + +async def test_resource_security_server_wide_override(): + mcp = MCPServer(resource_security=ResourceSecurity(reject_path_traversal=False)) + + @mcp.resource("data://items/{name}") + def get_item(name: str) -> str: + return f"item:{name}" + + async with Client(mcp) as client: + # Server-wide policy disabled traversal check; ".." now allowed + result = await client.read_resource("data://items/..") + assert isinstance(result.contents[0], TextResourceContents) + assert result.contents[0].text == "item:.." + + +async def test_static_resource_with_context_param_errors(): + """A non-template URI with a Context-only handler should error + at decoration time with a clear message, not silently register + an unreachable resource.""" + mcp = MCPServer() + + with pytest.raises(ValueError, match="Context injection for static resources is not yet supported"): + + @mcp.resource("weather://current") + def current_weather(ctx: Context) -> str: + raise NotImplementedError + + +async def test_static_resource_with_extra_params_errors(): + """A non-template URI with non-Context params should error at + decoration time.""" + mcp = MCPServer() + + with pytest.raises(ValueError, match="has no URI template variables"): + + @mcp.resource("data://fixed") + def get_data(name: str) -> str: + raise NotImplementedError + + async def test_completion_decorator() -> None: """Test that the completion decorator registers a working handler.""" mcp = MCPServer() From dcfd67ac6694c093bea6f108cee40cf463084db1 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:09:48 +0000 Subject: [PATCH 29/47] test: close coverage gaps in _parse_query and _split_query_tail fallbacks Adds cases for the fallback paths introduced in the lenient-query fixes: literal ? in path portion, {?...} expression in path portion, empty & segments in query string, and duplicate query keys. --- tests/shared/test_uri_template.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index a12cc601c..73cf31c38 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -448,6 +448,14 @@ def test_expand_rejects_invalid_value_types(value: object): # Standalone {&var} falls through to strict regex (expands with # & prefix, no ? for lenient matching to split on) ("api{&page}", "api&page=2", {"page": "2"}), + # Literal ? in path portion falls through to strict regex + ("api?x{?page}", "api?x?page=2", {"page": "2"}), + # {?...} expression in path portion also falls through + ("api{?q}x{?page}", "api?q=1x?page=2", {"q": "1", "page": "2"}), + # Empty & segments in query are skipped + ("search{?q}", "search?&q=hello&", {"q": "hello"}), + # Duplicate query keys keep first value + ("search{?q}", "search?q=first&q=second", {"q": "first"}), # Level 3: query continuation with literal ? falls back to # strict regex (template-order, all-present required) ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), From a8f488e2f5883fdbc3291b0aa19861196fa682f9 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:29:40 +0000 Subject: [PATCH 30/47] fix: preserve empty list items in explode matching _extract_path was dropping all empty segments when splitting an explode capture, but only the first empty item comes from the leading operator prefix. Subsequent empties are legitimate values: {/path*} with ['a', '', 'c'] expands to /a//c and must match back to the same list. Split by separator, strip only items[0] if empty, then iterate. The ; operator is unaffected since empty values use the bare-name form which is a non-empty segment. --- src/mcp/shared/uri_template.py | 11 ++++++++--- tests/shared/test_uri_template.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 6217137bb..5bd922213 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -522,9 +522,14 @@ def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, continue segments: list[str] = [] prefix = f"{var.name}=" - for seg in raw.split(spec.separator): - if not seg: # leading separator produces an empty first item - continue + # Splitting on the separator yields an empty first item from + # the leading prefix. Strip only that one; subsequent empty + # items are legitimate empty values ({/path*} with ["a","","c"] + # expands to /a//c and must round-trip). + items = raw.split(spec.separator) + if items and not items[0]: + items = items[1:] + for seg in items: if spec.named: # Named explode emits name=value per item (or bare # name for ; with empty value). Validate the name diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 73cf31c38..c7f02f5bb 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -512,6 +512,19 @@ def test_match_adjacent_vars_with_prefix_names(): assert t.match("abcd") == {"var": "abcd", "vara": ""} +def test_match_explode_preserves_empty_list_items(): + # Splitting the explode capture on its separator yields a leading + # empty item from the operator prefix; only that one is stripped. + # Subsequent empties are legitimate values from the input list. + t = UriTemplate.parse("{/path*}") + assert t.match("/a//c") == {"path": ["a", "", "c"]} + assert t.match("//a") == {"path": ["", "a"]} + assert t.match("/a/") == {"path": ["a", ""]} + + t = UriTemplate.parse("host{.labels*}") + assert t.match("host.a..c") == {"labels": ["a", "", "c"]} + + def test_match_adjacent_vars_disambiguated_by_literal(): # A literal between vars resolves the ambiguity. t = UriTemplate.parse("{a}-{b}") @@ -609,6 +622,10 @@ def test_match_explode_encoded_separator_in_segment(): ("x{name}y", {"name": ""}), ("item{;keys*}", {"keys": ["a", "b", "c"]}), ("item{;keys*}", {"keys": ["a", "", "b"]}), + # Empty strings in explode lists round-trip for unnamed operators + ("{/path*}", {"path": ["a", "", "c"]}), + ("{/path*}", {"path": ["", "a"]}), + ("host{.labels*}", {"labels": ["a", "", "c"]}), # Partial query expansion round-trips: expand omits undefined # vars, match leaves them absent from the result. ("logs://{service}{?since,level}", {"service": "api"}), From 7c34c12e38718711ea04f65b812dc22387c58b91 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:32:28 +0000 Subject: [PATCH 31/47] refactor: simplify explode split since regex guarantees leading separator The explode capture pattern ((?:SEP body)*?) means non-empty captures always start with the separator, so split()[0] is always empty. The defensive if-check was a dead branch; slice unconditionally instead. --- src/mcp/shared/uri_template.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 5bd922213..629720c78 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -522,14 +522,11 @@ def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, continue segments: list[str] = [] prefix = f"{var.name}=" - # Splitting on the separator yields an empty first item from - # the leading prefix. Strip only that one; subsequent empty - # items are legitimate empty values ({/path*} with ["a","","c"] - # expands to /a//c and must round-trip). - items = raw.split(spec.separator) - if items and not items[0]: - items = items[1:] - for seg in items: + # The explode regex ((?:SEP body)*?) guarantees non-empty + # captures start with the separator, so split()[0] is always + # "". Slice it off; subsequent empties are legitimate values + # ({/path*} with ["a","","c"] expands to /a//c). + for seg in raw.split(spec.separator)[1:]: if spec.named: # Named explode emits name=value per item (or bare # name for ; with empty value). Validate the name From ed84090006cc8b22322f6861bcec0d62584e2b27 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Thu, 26 Mar 2026 23:40:38 +0000 Subject: [PATCH 32/47] fix: fall back to strict regex when path contains {#...} or literal # _split_query_tail enabled lenient matching for page{#section}{?q}, but lenient matching's partition('#') stripped the fragment before the path regex (which expects #section) could see it, causing fullmatch to always fail. Extended the path-portion fallback check to also bail on {#...} expressions and literal # characters, mirroring the existing ? check. Such templates are semantically unusual (query-after-fragment is not valid URI structure) but now round-trip correctly via strict regex. --- src/mcp/shared/uri_template.py | 11 ++++++----- tests/shared/test_uri_template.py | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 629720c78..e71357b0b 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -579,14 +579,15 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: if first.operator != "?": return parts, [] - # If the path portion contains a literal ? or a {?...} expression, - # the URI's ? split won't align with our template boundary. Fall - # back to strict regex. + # If the path portion contains a literal ?/# or a {?...}/{#...} + # expression, lenient matching's partition("#") then partition("?") + # would strip content the path regex expects to see. Fall back to + # strict regex. for part in parts[:split]: if isinstance(part, str): - if "?" in part: + if "?" in part or "#" in part: return parts, [] - elif part.operator == "?": + elif part.operator in ("?", "#"): return parts, [] query_vars: list[Variable] = [] diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index c7f02f5bb..5d1a3a1b3 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -452,6 +452,10 @@ def test_expand_rejects_invalid_value_types(value: object): ("api?x{?page}", "api?x?page=2", {"page": "2"}), # {?...} expression in path portion also falls through ("api{?q}x{?page}", "api?q=1x?page=2", {"q": "1", "page": "2"}), + # {#...} or literal # in path portion falls through: lenient + # matching would strip the fragment before the path regex sees it + ("page{#section}{?q}", "page#intro?q=x", {"section": "intro", "q": "x"}), + ("page#lit{?q}", "page#lit?q=x", {"q": "x"}), # Empty & segments in query are skipped ("search{?q}", "search?&q=hello&", {"q": "hello"}), # Duplicate query keys keep first value From dd505ea8b782568c8337cdd3316f4315eab27f26 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:07:33 +0000 Subject: [PATCH 33/47] docs: add missing type annotations to resources.md examples Bare dict return types are now parameterized (dict[str, str] or dict[str, Any] as appropriate). Low-level handler examples now include ServerRequestContext[Any] and PaginatedRequestParams types for the ctx and params parameters, with the corresponding imports added to each code block. --- docs/server/resources.md | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/server/resources.md b/docs/server/resources.md index e774c6ded..4432a1162 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -44,7 +44,7 @@ Instead, register a template with placeholders: ```python @mcp.resource("tickets://{ticket_id}") -def get_ticket(ticket_id: str) -> dict: +def get_ticket(ticket_id: str) -> dict[str, str]: ticket = helpdesk.find(ticket_id) return {"id": ticket_id, "subject": ticket.subject, "status": ticket.status} ``` @@ -61,7 +61,7 @@ type and the SDK will convert: ```python @mcp.resource("orders://{order_id}") -def get_order(order_id: int) -> dict: +def get_order(order_id: int) -> dict[str, Any]: # "12345" from the URI becomes the int 12345 return db.orders.get(order_id) ``` @@ -107,7 +107,7 @@ string with slashes, use `{/name*}`: ```python @mcp.resource("tree://nodes{/path*}") -def walk_tree(path: list[str]) -> dict: +def walk_tree(path: list[str]) -> dict[str, Any]: # tree://nodes/a/b/c gives path = ["a", "b", "c"] node = root for segment in path: @@ -243,11 +243,13 @@ For fixed URIs, keep a registry and dispatch on exact match: from mcp.server.lowlevel import Server from mcp.types import ( ListResourcesResult, + PaginatedRequestParams, ReadResourceRequestParams, ReadResourceResult, Resource, TextResourceContents, ) +from mcp.server.context import ServerRequestContext RESOURCES = { "config://features": lambda: '{"beta_search": true}', @@ -255,13 +257,17 @@ RESOURCES = { } -async def on_list_resources(ctx, params) -> ListResourcesResult: +async def on_list_resources( + ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None +) -> ListResourcesResult: return ListResourcesResult( resources=[Resource(name=uri, uri=uri) for uri in RESOURCES] ) -async def on_read_resource(ctx, params: ReadResourceRequestParams) -> ReadResourceResult: +async def on_read_resource( + ctx: ServerRequestContext[Any], params: ReadResourceRequestParams +) -> ReadResourceResult: if (producer := RESOURCES.get(params.uri)) is not None: return ReadResourceResult( contents=[TextResourceContents(uri=params.uri, text=producer())] @@ -292,6 +298,7 @@ Parse your templates once, then match incoming URIs against them in your read handler: ```python +from mcp.server.context import ServerRequestContext from mcp.server.lowlevel import Server from mcp.shared.uri_template import UriTemplate from mcp.types import ReadResourceRequestParams, ReadResourceResult, TextResourceContents @@ -302,7 +309,9 @@ TEMPLATES = { } -async def on_read_resource(ctx, params: ReadResourceRequestParams) -> ReadResourceResult: +async def on_read_resource( + ctx: ServerRequestContext[Any], params: ReadResourceRequestParams +) -> ReadResourceResult: if (vars := TEMPLATES["files"].match(params.uri)) is not None: content = read_file_safely(vars["path"]) return ReadResourceResult(contents=[TextResourceContents(uri=params.uri, text=content)]) @@ -353,10 +362,12 @@ the protocol `ResourceTemplate` type, using the same template strings you parsed above: ```python -from mcp.types import ListResourceTemplatesResult, ResourceTemplate +from mcp.types import ListResourceTemplatesResult, PaginatedRequestParams, ResourceTemplate -async def on_list_resource_templates(ctx, params) -> ListResourceTemplatesResult: +async def on_list_resource_templates( + ctx: ServerRequestContext[Any], params: PaginatedRequestParams | None +) -> ListResourceTemplatesResult: return ListResourceTemplatesResult( resource_templates=[ ResourceTemplate(name="files", uri_template=str(TEMPLATES["files"])), From c8712ff1eb2b41b88cddb17868476646cc5d7052 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:18:43 +0000 Subject: [PATCH 34/47] docs: improve resources.md with spec link and concrete-URI examples Added a link to the MCP resources specification after the intro. Rewrote the multi-segment paths section to lead with the problem: show a URI that fails with {name} before introducing {+name} as the fix. Code comments align inputs with outputs for at-a-glance parsing. Rewrote the query parameters section to lead with the two concrete URIs a user would want to support (base and with-query), then show how one template covers both. --- docs/server/resources.md | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/docs/server/resources.md b/docs/server/resources.md index 4432a1162..0ca6d2c39 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -11,6 +11,9 @@ file. A resource is something the application *reads* to understand the world. Reading a resource should not change state or kick off expensive work. If it does either, you probably want a tool. +For the protocol-level details (message formats, lifecycle, pagination), +see the [MCP resources specification](https://modelcontextprotocol.io/specification/latest/server/resources). + ## A static resource The simplest case is a fixed URI that returns the same kind of content @@ -68,24 +71,33 @@ def get_order(order_id: int) -> dict[str, Any]: ### Multi-segment paths -A plain `{name}` matches a single URI segment. It stops at the first -slash. To match across slashes, use `{+name}`: +A plain `{name}` stops at the first slash. If your template is +`files://{name}`, a client reading `files://readme.txt` matches fine, +but `files://guides/intro.md` does not: the slash after `guides` ends +the match, and `intro.md` is left over. + +To capture the whole path including slashes, use `{+name}`: ```python @mcp.resource("files://{+path}") def read_file(path: str) -> str: - # Matches files://readme.txt - # Also matches files://guides/quickstart/intro.md + # files://readme.txt gives path = "readme.txt" + # files://guides/intro.md gives path = "guides/intro.md" ... ``` -This is the pattern you want for filesystem paths, nested object keys, -or anything hierarchical. +Reach for `{+name}` whenever the value is hierarchical: filesystem +paths, nested object keys, URL paths you're proxying. ### Query parameters -Optional configuration goes in query parameters. Use `{?name}` or list -several with `{?a,b,c}`: +Say you want clients to read `logs://api` for recent logs, but also +`logs://api?since=15m&level=error` when they need to narrow it down. +The `?since=15m&level=error` part is optional configuration, and you +don't want a separate template for every combination. + +Declare these as query parameters with `{?name}`, or list several at +once with `{?a,b,c}`: ```python @mcp.resource("logs://{service}{?since,level}") @@ -93,9 +105,8 @@ def tail_logs(service: str, since: str = "1h", level: str = "info") -> str: return log_store.query(service, since=since, min_level=level) ``` -Reading `logs://api` uses the defaults. Reading -`logs://api?since=15m&level=error` narrows it down. The path identifies -*which* resource; the query tunes *how* you read it. +The path identifies *which* resource; the query tunes *how* you read +it. Query params are matched leniently: order doesn't matter, extras are ignored, and omitted params fall through to your function defaults. From 19822fbbebcd9aca620fdecde1b6070e59ac4441 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:31:10 +0000 Subject: [PATCH 35/47] fix: reject {expr}{+var} adjacency to close ReDoS gap The adjacency check rejected {+a}{b} but not the symmetric {a}{+b}. Both produce overlapping greedy quantifiers; a 64KB crafted input against prefix{a}{+b}.json takes ~23s to reject. Added prev_path_expr tracking so {+var} immediately after any path expression is rejected. {expr}{#var} remains allowed since the # operator prepends a literal '#' that the preceding group's character class excludes, giving a natural boundary. Also adds the missing 'from typing import Any' to the three low-level server examples in docs/server/resources.md. --- docs/server/resources.md | 6 ++++++ src/mcp/shared/uri_template.py | 15 +++++++++++---- tests/shared/test_uri_template.py | 8 ++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docs/server/resources.md b/docs/server/resources.md index 0ca6d2c39..105626892 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -251,6 +251,8 @@ There's no decorator; you return the protocol types yourself. For fixed URIs, keep a registry and dispatch on exact match: ```python +from typing import Any + from mcp.server.lowlevel import Server from mcp.types import ( ListResourcesResult, @@ -309,6 +311,8 @@ Parse your templates once, then match incoming URIs against them in your read handler: ```python +from typing import Any + from mcp.server.context import ServerRequestContext from mcp.server.lowlevel import Server from mcp.shared.uri_template import UriTemplate @@ -373,6 +377,8 @@ the protocol `ResourceTemplate` type, using the same template strings you parsed above: ```python +from typing import Any + from mcp.types import ListResourceTemplatesResult, PaginatedRequestParams, ResourceTemplate diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index e71357b0b..286024e12 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -819,8 +819,11 @@ def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: trailing match fails the engine backtracks through O(n) split points. Two conditions trigger this: - - ``{+var}`` immediately adjacent to any expression - (``{+a}{b}``, ``{+a}{/b*}``) + - ``{+var}`` immediately adjacent to any expression on either + side (``{+a}{b}``, ``{a}{+b}``, ``{/a}{+b}``). The ``#`` + operator is exempt from the preceded-by case since it + prepends a literal ``#`` that the preceding group cannot + match. - Two ``{+var}``/``{#var}`` anywhere in the path, even with a literal between them (``{+a}/x/{+b}``) — the literal does not disambiguate since ``[^?#]*`` matches it too @@ -836,6 +839,7 @@ def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: """ prev_explode = False prev_reserved = False + prev_path_expr = False seen_reserved = False for part in parts: if isinstance(part, str): @@ -843,6 +847,7 @@ def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: # the seen-reserved count: [^?#]* matches most literals. prev_explode = False prev_reserved = False + prev_path_expr = False continue for var in part.variables: # ?/& are stripped before pattern building and never reach @@ -850,11 +855,12 @@ def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: if var.operator in ("?", "&"): prev_explode = False prev_reserved = False + prev_path_expr = False continue - if prev_reserved: + if prev_reserved or (var.operator == "+" and prev_path_expr): raise InvalidUriTemplate( - "{+var} or {#var} immediately followed by another expression " + "{+var} or {#var} immediately adjacent to another expression " "causes quadratic-time matching; separate them with a literal", template=template, ) @@ -872,5 +878,6 @@ def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: prev_explode = var.explode prev_reserved = var.operator in ("+", "#") + prev_path_expr = True if prev_reserved: seen_reserved = True diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 5d1a3a1b3..8594d6678 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -166,7 +166,7 @@ def test_parse_rejects_adjacent_explodes(template: str): @pytest.mark.parametrize( "template", [ - # {+var} immediately adjacent to any expression + # {+var} immediately adjacent to any expression (either side) "{+a}{b}", "{+a}{/b}", "{+a}{/b*}", @@ -175,6 +175,10 @@ def test_parse_rejects_adjacent_explodes(template: str): "{#a}{b}", "{+a,b}", # multi-var in one expression: same adjacency "prefix/{+path}{.ext}", # literal before doesn't help + "{a}{+b}", # + preceded by expression: same overlap + "{.a}{+b}", + "{/a}{+b}", + "x{name}{+path}y", # Two {+var}/{#var} anywhere, even with literals between "{+a}/x/{+b}", "{+a},{+b}", @@ -199,7 +203,7 @@ def test_parse_rejects_reserved_quadratic_patterns(template: str): "api/{+path}{?v,page}", # + followed by query (stripped before regex) "api/{+path}{&next}", # + followed by query-continuation "page{#section}", # # at end - "{a}{+b}", # + preceded by expression is fine; only following matters + "{a}{#b}", # # prepends literal '#' that {a}'s class excludes "{+a}/sep/{b}", # literal + bounded expression after: linear "{+a},{b}", # same: literal disambiguates when second is bounded ], From ae122845d4c7494c1e85283eb9ee5b4f8172ced6 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:12:14 +0000 Subject: [PATCH 36/47] refactor: replace regex matcher with linear-time two-ended scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regex-based URI template matcher required an ever-growing set of parse-time adjacency checks to reject templates that would cause catastrophic backtracking. Python's re module is a backtracking engine, so any pair of greedy groups with overlapping character classes and a failing suffix produces O(n^k) match time. Enumerating every such combination proved intractable — each fix revealed another bypass. This replaces the regex matcher with a two-ended linear scan: - Templates are flattened into a sequence of literal and capture atoms, with operator prefixes/separators lowered to explicit literals. - A template may contain at most one multi-segment variable ({+var}, {#var}, or explode). This is the only structural restriction. - The suffix is scanned right-to-left: literals via endswith, bounded variables via rfind of the preceding literal. This matches regex greedy-first semantics exactly for templates without a greedy var. - If a greedy var exists, the prefix is scanned left-to-right with lazy anchor-finding, and the greedy var gets whatever remains between prefix_end and suffix_start. Every URI character is visited O(1) times per atom. There is no backtracking; a failed anchor search returns None immediately. Removes _check_ambiguous_adjacency (80 lines of state tracking), _build_pattern, _expression_pattern, and the _pattern field. Templates previously rejected for adjacency ({+a}{b}, {a}{+b}, prefix/{+path}{.ext}) are now accepted and match in linear time. The only rejected patterns are those with two or more multi-segment variables, which are inherently ambiguous regardless of algorithm. --- src/mcp/shared/uri_template.py | 566 +++++++++++++++++++----------- tests/shared/test_uri_template.py | 144 +++++--- 2 files changed, 446 insertions(+), 264 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 286024e12..67bff6a84 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -9,28 +9,31 @@ operators (``{/var*}``, ``{.var*}``, ``{;var*}``). The Level 4 prefix modifier (``{var:N}``) and query-explode (``{?var*}``) are not supported. -Known matching limitations --------------------------- - -Matching is not specified by RFC 6570. A few templates can expand to -URIs that ``match()`` cannot unambiguously reverse: - -* Reserved/fragment expressions (``{+var}``, ``{#var}``) are restricted - to positions that avoid quadratic-time backtracking: at most one per - template, and not immediately adjacent to another expression. The - ``[^?#]*`` pattern overlaps with every other operator's character - class; a failing match against ``{+a}{b}`` or ``{+a}/x/{+b}`` - backtracks O(n²). Use a literal separator before a bounded - expression (``{+a}/sep/{b}``) or put the reserved expression last - (``file://docs/{+path}``). Trailing ``{?...}``/``{&...}`` query - expressions are always fine since they're matched via ``parse_qs``. - -* Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but - the match pattern stops at those characters so that templates like - ``{+path}{?q}`` can correctly separate path from query. A value - containing a literal ``?`` or ``#`` expands fine but will not - round-trip through ``match()``. Use simple ``{var}`` (which encodes - them) if round-trip matters for such values. +Matching semantics +------------------ + +Matching is not specified by RFC 6570 (§1.4 explicitly defers to regex +languages). This implementation uses a linear-time two-ended scan that +never backtracks, so match time is O(n) in URI length regardless of +template structure. + +A template may contain **at most one multi-segment variable** — +``{+var}``, ``{#var}``, or an explode-modified variable (``{/var*}``, +``{.var*}``, ``{;var*}``). This variable greedily consumes whatever the +surrounding bounded variables and literals do not. Two such variables +in one template are inherently ambiguous (which one gets the extra +segment?) and are rejected at parse time. + +Bounded variables before the multi-segment variable match **lazily** +(first occurrence of the following literal); those after match +**greedily** (last occurrence of the preceding literal). Templates +without a multi-segment variable match greedily throughout, identical +to regex semantics. + +Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but +the scan stops at those characters so ``{+path}{?q}`` can separate path +from query. A value containing a literal ``?`` or ``#`` expands fine +but will not round-trip through ``match()``. """ from __future__ import annotations @@ -38,7 +41,7 @@ import re from collections.abc import Mapping, Sequence from dataclasses import dataclass, field -from typing import Literal, cast +from typing import Literal, TypeAlias, cast from urllib.parse import quote, unquote __all__ = [ @@ -93,18 +96,19 @@ class _OperatorSpec: "&": _OperatorSpec(prefix="&", separator="&", named=True, allow_reserved=False), } -# Per-operator character class for regex matching. Each pattern matches -# the characters that can appear in an expanded value for that operator, -# stopping at the next structural delimiter. -_MATCH_PATTERN: dict[Operator, str] = { - "": r"[^/?#&,]*", # simple: everything structural is pct-encoded - "+": r"[^?#]*", # reserved: / allowed, stop at query/fragment - "#": r".*", # fragment: tail of URI - ".": r"[^./?#]*", # label: stop at next . - "/": r"[^/?#]*", # path segment: stop at next / - ";": r"[^;/?#]*", # path-param value (may be empty: ;name) - "?": r"[^&#]*", # query value (may be empty: ?name=) - "&": r"[^&#]*", # query-cont value +# Per-operator stop characters for the linear scan. A bounded variable's +# value ends at the first occurrence of any character in its stop set, +# mirroring the character-class boundaries a regex would use but without +# the backtracking. +_STOP_CHARS: dict[Operator, str] = { + "": "/?#&,", # simple: everything structural is pct-encoded + "+": "?#", # reserved: / allowed, stop at query/fragment + "#": "", # fragment: tail of URI, nothing stops it + ".": "./?#", # label: stop at next . + "/": "/?#", # path segment: stop at next / + ";": ";/?#", # path-param value (may be empty: ;name) + "?": "&#", # query value (may be empty: ?name=) + "&": "&#", # query-cont value } @@ -143,6 +147,39 @@ class _Expression: _Part = str | _Expression +@dataclass(frozen=True) +class _Lit: + """A literal run in the flattened match-atom sequence.""" + + text: str + + +@dataclass(frozen=True) +class _Cap: + """A single-variable capture in the flattened match-atom sequence. + + ``ifemp`` marks the ``;`` operator's optional-equals quirk: ``{;id}`` + expands to ``;id=value`` or bare ``;id`` when the value is empty, so + the scan must accept both forms. + """ + + var: Variable + ifemp: bool = False + + +_Atom: TypeAlias = "_Lit | _Cap" + + +def _is_greedy(var: Variable) -> bool: + """Return True if this variable can span multiple path segments. + + Reserved/fragment expansion and explode variables are the only + constructs whose match range is not bounded by a single structural + delimiter. A template may contain at most one such variable. + """ + return var.explode or var.operator in ("+", "#") + + def _is_str_sequence(value: object) -> bool: """Check if value is a non-string sequence whose items are all strings.""" if isinstance(value, str) or not isinstance(value, Sequence): @@ -249,8 +286,9 @@ class UriTemplate: template: str _parts: list[_Part] = field(repr=False, compare=False) _variables: list[Variable] = field(repr=False, compare=False) - _pattern: re.Pattern[str] = field(repr=False, compare=False) - _path_variables: list[Variable] = field(repr=False, compare=False) + _prefix: list[_Atom] = field(repr=False, compare=False) + _greedy: Variable | None = field(repr=False, compare=False) + _suffix: list[_Atom] = field(repr=False, compare=False) _query_variables: list[Variable] = field(repr=False, compare=False) @staticmethod @@ -306,18 +344,19 @@ def parse( parts, variables = _parse(template, max_expressions=max_expressions) # Trailing {?...}/{&...} expressions are matched leniently via - # parse_qs instead of regex: order-agnostic, partial, ignores - # extras. The path portion keeps regex matching. + # parse_qs rather than the scan: order-agnostic, partial, ignores + # extras. The path portion uses the linear scan. path_parts, query_vars = _split_query_tail(parts) - path_vars = variables[: len(variables) - len(query_vars)] - pattern = _build_pattern(path_parts) + atoms = _flatten(path_parts) + prefix, greedy, suffix = _partition_greedy(atoms, template) return cls( template=template, _parts=parts, _variables=variables, - _pattern=pattern, - _path_variables=path_vars, + _prefix=prefix, + _greedy=greedy, + _suffix=suffix, _query_variables=query_vars, ) @@ -453,17 +492,14 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di return None if self._query_variables: - # Two-phase: regex matches the path, the query is split and + # Two-phase: scan matches the path, the query is split and # decoded manually. Query params may be partial, reordered, # or include extras; absent params stay absent so downstream # defaults can apply. Fragment is stripped first since the # template's {?...} tail never describes a fragment. before_fragment, _, _ = uri.partition("#") path, _, query = before_fragment.partition("?") - m = self._pattern.fullmatch(path) - if m is None: - return None - result = _extract_path(m, self._path_variables) + result = self._scan(path) if result is None: return None if query: @@ -473,10 +509,43 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di result[var.name] = parsed[var.name] return result - m = self._pattern.fullmatch(uri) - if m is None: + return self._scan(uri) + + def _scan(self, uri: str) -> dict[str, str | list[str]] | None: + """Run the two-ended linear scan against the path portion of a URI.""" + n = len(uri) + + # Suffix right-to-left: literals anchor via endswith, bounded + # vars take the minimum needed (rfind for the preceding literal). + # This matches regex greedy-first semantics for templates without + # a greedy var, and minimises the suffix claim when one exists. + suffix = _scan_suffix(self._suffix, uri, n) + if suffix is None: return None - return _extract_path(m, self._variables) + suffix_result, suffix_start = suffix + + if self._greedy is None: + # No greedy var: suffix scan consumed the whole template. + # It must have consumed the whole URI too. + return suffix_result if suffix_start == 0 else None + + # Prefix left-to-right: each bounded var takes the minimum + # needed (find for the following literal), leaving as much as + # possible for the greedy var in the middle. + prefix = _scan_prefix(self._prefix, uri, 0, suffix_start) + if prefix is None: + return None + prefix_result, prefix_end = prefix + + if prefix_end > suffix_start: + return None + + middle = uri[prefix_end:suffix_start] + greedy_value = _extract_greedy(self._greedy, middle) + if greedy_value is None: + return None + + return {**prefix_result, self._greedy.name: greedy_value, **suffix_result} def __str__(self) -> str: return self.template @@ -502,47 +571,54 @@ def _parse_query(query: str) -> dict[str, str]: return result -def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, str | list[str]] | None: - """Decode regex capture groups into a variable-name mapping. +def _extract_greedy(var: Variable, raw: str) -> str | list[str] | None: + """Decode the greedy variable's isolated middle span. - Handles scalar and explode variables. Named explode (``;``) strips - and validates the ``name=`` prefix per item, returning ``None`` on - mismatch. + For scalar greedy (``{+var}``, ``{#var}``) this is a stop-char + validation and a single ``unquote``. For explode variables the span + is a run of separator-delimited segments (``/a/b/c`` or + ``;keys=a;keys=b``) that is split, validated, and decoded per item. """ - result: dict[str, str | list[str]] = {} - # One capture group per variable, emitted in template order. - for var, raw in zip(variables, m.groups()): - spec = _OPERATOR_SPECS[var.operator] - - if var.explode: - # Explode capture holds the whole run including separators, - # e.g. "/a/b/c" or ";keys=a;keys=b". Split and decode each. - if not raw: - result[var.name] = [] - continue - segments: list[str] = [] - prefix = f"{var.name}=" - # The explode regex ((?:SEP body)*?) guarantees non-empty - # captures start with the separator, so split()[0] is always - # "". Slice it off; subsequent empties are legitimate values - # ({/path*} with ["a","","c"] expands to /a//c). - for seg in raw.split(spec.separator)[1:]: - if spec.named: - # Named explode emits name=value per item (or bare - # name for ; with empty value). Validate the name - # and strip the prefix before decoding. - if seg.startswith(prefix): - seg = seg[len(prefix) :] - elif seg == var.name: - seg = "" - else: - return None - segments.append(unquote(seg)) - result[var.name] = segments - else: - result[var.name] = unquote(raw) + spec = _OPERATOR_SPECS[var.operator] + stops = _STOP_CHARS[var.operator] - return result + if not var.explode: + if any(c in stops for c in raw): + return None + return unquote(raw) + + sep = spec.separator + if not raw: + return [] + # A non-empty explode span must begin with the separator: {/a*} + # expands to "/x/y", never "x/y". The scan does not consume the + # separator itself, so it must be the first character here. + if raw[0] != sep: + return None + # Segments must not contain the operator's non-separator stop + # characters (e.g. {/path*} segments may contain neither ? nor #). + body_stops = set(stops) - {sep} + if any(c in body_stops for c in raw): + return None + + segments: list[str] = [] + prefix = f"{var.name}=" + # split()[0] is always "" because raw starts with the separator; + # subsequent empties are legitimate values ({/path*} with + # ["a","","c"] expands to /a//c). + for seg in raw.split(sep)[1:]: + if spec.named: + # Named explode emits name=value per item (or bare name + # under ; with empty value). Validate the name and strip + # the prefix before decoding. + if seg.startswith(prefix): + seg = seg[len(prefix) :] + elif seg == var.name: + seg = "" + else: + return None + segments.append(unquote(seg)) + return segments def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: @@ -598,64 +674,6 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: return parts[:split], query_vars -def _build_pattern(parts: Sequence[_Part]) -> re.Pattern[str]: - """Compile a regex that matches URIs produced by this template. - - Walks parts in order: literals are ``re.escape``'d, expressions - become capture groups. One group is emitted per variable, in the - same order as the variables appearing in ``parts``, so - ``match.groups()`` can be zipped directly. - - Raises: - re.error: Only if pattern assembly is buggy — should not happen - for templates that passed :func:`_parse`. - """ - chunks: list[str] = [] - for part in parts: - if isinstance(part, str): - chunks.append(re.escape(part)) - else: - chunks.append(_expression_pattern(part)) - return re.compile("".join(chunks)) - - -def _expression_pattern(expr: _Expression) -> str: - """Build the regex fragment for a single ``{...}`` expression. - - Emits the operator's prefix, then one capture group per variable - separated by the operator's separator. Named operators (``; ? &``) - include ``name=`` before the capture. - """ - spec = _OPERATOR_SPECS[expr.operator] - body = _MATCH_PATTERN[expr.operator] - sep = re.escape(spec.separator) - prefix = re.escape(spec.prefix) - - pieces: list[str] = [] - for i, var in enumerate(expr.variables): - # First var gets the prefix; subsequent vars get the separator. - lead = prefix if i == 0 else sep - - if var.explode: - # Capture the whole run of separator+value repetitions. - # Non-greedy so a trailing literal can terminate the run. - pieces.append(f"((?:{sep}{body})*?)") - elif spec.named: - name = re.escape(var.name) - if expr.operator == ";": - # RFC ifemp: ; emits bare name for empty values, so = is - # optional. The lookahead asserts the name ends at = or a - # delimiter, preventing {;id} from matching ;identity. - pieces.append(f"{lead}{name}(?==|[;/?#]|$)=?({body})") - else: - # ? and & always emit name=, even for empty values. - pieces.append(f"{lead}{name}=({body})") - else: - pieces.append(f"{lead}({body})") - - return "".join(pieces) - - def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Variable]]: """Split a template into an ordered sequence of literals and expressions. @@ -666,8 +684,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va Raises: InvalidUriTemplate: On unclosed braces, too many expressions, or - any error surfaced by :func:`_parse_expression` or - :func:`_check_ambiguous_adjacency`. + any error surfaced by :func:`_parse_expression`. """ parts: list[_Part] = [] variables: list[Variable] = [] @@ -711,7 +728,6 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va # Advance past the closing brace. i = end + 1 - _check_ambiguous_adjacency(template, parts) _check_duplicate_variables(template, variables) return parts, variables @@ -804,80 +820,202 @@ def _check_duplicate_variables(template: str, variables: list[Variable]) -> None seen.add(var.name) -def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: - """Reject templates where adjacent expressions would cause ambiguous or quadratic matching. +def _flatten(parts: list[_Part]) -> list[_Atom]: + """Lower expressions into a flat sequence of literals and single-variable captures. - Two patterns are rejected: + Operator prefixes and separators become explicit ``_Lit`` atoms so + the scan only ever sees two atom kinds. Adjacent literals are + coalesced so that anchor-finding (``find``/``rfind``) operates on + the longest possible literal, reducing false matches. - 1. Adjacent explode variables (``{/a*}{/b*}``): the split between - ``a`` and ``b`` in ``/x/y/z`` is undetermined. Different - operators don't help since character classes overlap. + Explode variables emit no lead literal: the explode capture + includes its own separator-prefixed repetitions (``{/a*}`` → + ``/x/y/z``, not ``/`` then ``x/y/z``). + """ + atoms: list[_Atom] = [] - 2. Reserved/fragment expansion in a position that causes quadratic - backtracking. The ``[^?#]*`` pattern for ``+`` and ``#`` - overlaps with every other operator's character class, so when a - trailing match fails the engine backtracks through O(n) split - points. Two conditions trigger this: + def push_lit(text: str) -> None: + if not text: + return + if atoms and isinstance(atoms[-1], _Lit): + atoms[-1] = _Lit(atoms[-1].text + text) + else: + atoms.append(_Lit(text)) + + for part in parts: + if isinstance(part, str): + push_lit(part) + continue + spec = _OPERATOR_SPECS[part.operator] + for i, var in enumerate(part.variables): + lead = spec.prefix if i == 0 else spec.separator + if var.explode: + atoms.append(_Cap(var)) + elif spec.named: + # ; uses ifemp (bare name when empty); ? and & always + # emit name= so the equals is part of the literal. + if part.operator == ";": + push_lit(f"{lead}{var.name}") + atoms.append(_Cap(var, ifemp=True)) + else: + push_lit(f"{lead}{var.name}=") + atoms.append(_Cap(var)) + else: + push_lit(lead) + atoms.append(_Cap(var)) + return atoms - - ``{+var}`` immediately adjacent to any expression on either - side (``{+a}{b}``, ``{a}{+b}``, ``{/a}{+b}``). The ``#`` - operator is exempt from the preceded-by case since it - prepends a literal ``#`` that the preceding group cannot - match. - - Two ``{+var}``/``{#var}`` anywhere in the path, even with a - literal between them (``{+a}/x/{+b}``) — the literal does not - disambiguate since ``[^?#]*`` matches it too - A 64KB payload against either can consume tens of seconds of CPU. +def _partition_greedy(atoms: list[_Atom], template: str) -> tuple[list[_Atom], Variable | None, list[_Atom]]: + """Split atoms at the single greedy variable, if any. - Trailing ``{?...}``/``{&...}`` expressions are handled via - ``parse_qs`` outside the path regex, so they do not count against - any check. + Returns ``(prefix, greedy_var, suffix)``. If there is no greedy + variable the entire atom list is returned as the suffix so that + the right-to-left scan (which matches regex-greedy semantics) + handles it. Raises: - InvalidUriTemplate: If any pattern is detected. + InvalidUriTemplate: If more than one greedy variable is + present. Two multi-segment variables in one template are + inherently ambiguous — there is no principled way to decide + which one absorbs an extra segment. """ - prev_explode = False - prev_reserved = False - prev_path_expr = False - seen_reserved = False - for part in parts: - if isinstance(part, str): - # A literal breaks immediate adjacency but does not reset - # the seen-reserved count: [^?#]* matches most literals. - prev_explode = False - prev_reserved = False - prev_path_expr = False - continue - for var in part.variables: - # ?/& are stripped before pattern building and never reach - # the path regex. - if var.operator in ("?", "&"): - prev_explode = False - prev_reserved = False - prev_path_expr = False - continue - - if prev_reserved or (var.operator == "+" and prev_path_expr): - raise InvalidUriTemplate( - "{+var} or {#var} immediately adjacent to another expression " - "causes quadratic-time matching; separate them with a literal", - template=template, - ) - if var.operator in ("+", "#") and seen_reserved: - raise InvalidUriTemplate( - "Multiple {+var} or {#var} expressions in one template cause " - "quadratic-time matching even with literals between them", - template=template, - ) - if var.explode and prev_explode: + greedy_idx: int | None = None + for i, atom in enumerate(atoms): + if isinstance(atom, _Cap) and _is_greedy(atom.var): + if greedy_idx is not None: raise InvalidUriTemplate( - "Adjacent explode expressions are ambiguous for matching and not supported", + "Template contains more than one multi-segment variable " + "({+var}, {#var}, or explode modifier); matching would be ambiguous", template=template, ) + greedy_idx = i + if greedy_idx is None: + return [], None, atoms + greedy = atoms[greedy_idx] + assert isinstance(greedy, _Cap) + return atoms[:greedy_idx], greedy.var, atoms[greedy_idx + 1 :] + + +def _scan_suffix(atoms: list[_Atom], uri: str, end: int) -> tuple[dict[str, str | list[str]], int] | None: + """Scan atoms right-to-left from ``end``, returning captures and start position. + + Each bounded variable takes the minimum span that lets its + preceding literal match (found via ``rfind``), which makes the + *first* variable in template order greedy — identical to Python + regex semantics for a sequence of greedy groups. + """ + result: dict[str, str | list[str]] = {} + pos = end + i = len(atoms) - 1 + while i >= 0: + atom = atoms[i] + if isinstance(atom, _Lit): + n = len(atom.text) + if pos < n or uri[pos - n : pos] != atom.text: + return None + pos -= n + i -= 1 + continue + + var = atom.var + stops = _STOP_CHARS[var.operator] + prev = atoms[i - 1] if i > 0 else None + + if atom.ifemp: + # ;name or ;name=value. The preceding _Lit is ";name". + # Try empty first: if the lit ends at pos the value is + # absent (RFC ifemp). Otherwise require =value. + assert isinstance(prev, _Lit) + if uri.endswith(prev.text, 0, pos): + result[var.name] = "" + i -= 1 + continue + start = pos + while start > 0 and uri[start - 1] not in stops and uri[start - 1] != "=": + start -= 1 + if start == 0 or uri[start - 1] != "=": + return None + result[var.name] = unquote(uri[start:pos]) + pos = start - 1 + i -= 1 + continue + + # Earliest valid start: the var cannot extend left past any + # stop-char, so scan backward to find that boundary. + earliest = pos + while earliest > 0 and uri[earliest - 1] not in stops: + earliest -= 1 + + if prev is None: + start = earliest + elif isinstance(prev, _Lit): + # Rightmost occurrence of the preceding literal whose end + # falls within the var's valid range. + idx = uri.rfind(prev.text, 0, pos) + if idx == -1 or idx + len(prev.text) < earliest: + return None + start = idx + len(prev.text) + else: + # Adjacent capture with no literal anchor: this (later) + # var takes nothing, the earlier var takes the span. + start = pos + + result[var.name] = unquote(uri[start:pos]) + pos = start + i -= 1 + return result, pos + + +def _scan_prefix(atoms: list[_Atom], uri: str, start: int, limit: int) -> tuple[dict[str, str | list[str]], int] | None: + """Scan atoms left-to-right from ``start``, not exceeding ``limit``. + + Each bounded variable takes the minimum span that lets its + following literal match (found via ``find``), leaving the + greedy variable as much of the URI as possible. + """ + result: dict[str, str | list[str]] = {} + pos = start + n = len(atoms) + for i in range(n): + atom = atoms[i] + if isinstance(atom, _Lit): + end = pos + len(atom.text) + if end > limit or uri[pos:end] != atom.text: + return None + pos = end + continue + + var = atom.var + stops = _STOP_CHARS[var.operator] + nxt = atoms[i + 1] if i + 1 < n else None + + if atom.ifemp: + # Optional = after ;name. A non-= non-delimiter here means + # the name continued (e.g. ;keys vs ;key) — reject. + if pos < limit and uri[pos] == "=": + pos += 1 + elif pos < limit and uri[pos] not in stops: + return None + + # Latest valid end: the var stops at the first stop-char or + # the scan limit, whichever comes first. + latest = pos + while latest < limit and uri[latest] not in stops: + latest += 1 + + if nxt is None: + end = latest + elif isinstance(nxt, _Lit): + # First occurrence of the following literal starting + # within the var's valid range. + idx = uri.find(nxt.text, pos, latest + len(nxt.text)) + if idx == -1 or idx > latest: + return None + end = idx + else: + end = latest - prev_explode = var.explode - prev_reserved = var.operator in ("+", "#") - prev_path_expr = True - if prev_reserved: - seen_reserved = True + result[var.name] = unquote(uri[pos:end]) + pos = end + return result, pos diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 8594d6678..060a0284a 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -153,44 +153,29 @@ def test_parse_rejects_unsupported_explode(template: str): @pytest.mark.parametrize( "template", [ - "{/a*}{/b*}", # same operator - "{/a*}{.b*}", # different operators: / char class includes ., still ambiguous + # Two explode variables — any combination + "{/a*}{/b*}", + "{/a*}{.b*}", "{.a*}{;b*}", - ], -) -def test_parse_rejects_adjacent_explodes(template: str): - with pytest.raises(InvalidUriTemplate, match="Adjacent explode"): - UriTemplate.parse(template) - - -@pytest.mark.parametrize( - "template", - [ - # {+var} immediately adjacent to any expression (either side) - "{+a}{b}", - "{+a}{/b}", + "{/a*}/x{/b*}", # literal between doesn't help: still two greedy + "{/a*}{b}{.c*}", # non-explode between doesn't help either + # {+var}/{#var} combined with explode "{+a}{/b*}", - "{+a}{.b}", - "{+a}{;b}", - "{#a}{b}", - "{+a,b}", # multi-var in one expression: same adjacency - "prefix/{+path}{.ext}", # literal before doesn't help - "{a}{+b}", # + preceded by expression: same overlap - "{.a}{+b}", - "{/a}{+b}", - "x{name}{+path}y", - # Two {+var}/{#var} anywhere, even with literals between + # Multi-var + expression: each var is greedy + "{+a,b}", + # Two {+var}/{#var} anywhere "{+a}/x/{+b}", "{+a},{+b}", "{#a}/x/{+b}", "{+a}.foo.{#b}", ], ) -def test_parse_rejects_reserved_quadratic_patterns(template: str): - # These patterns cause O(n²) regex backtracking when a trailing - # literal fails to match. Rejecting at parse time eliminates the - # ReDoS vector at the source. - with pytest.raises(InvalidUriTemplate, match="quadratic"): +def test_parse_rejects_multiple_multi_segment_variables(template: str): + # Two multi-segment variables make matching inherently ambiguous: + # there is no principled way to decide which one absorbs an extra + # segment. The linear scan can only partition the URI around a + # single greedy slot. + with pytest.raises(InvalidUriTemplate, match="more than one multi-segment"): UriTemplate.parse(template) @@ -200,18 +185,29 @@ def test_parse_rejects_reserved_quadratic_patterns(template: str): "file://docs/{+path}", # + at end of template "file://{+path}.txt", # + followed by literal only "file://{+path}/edit", # + followed by literal only - "api/{+path}{?v,page}", # + followed by query (stripped before regex) + "api/{+path}{?v,page}", # + followed by query (handled by parse_qs) "api/{+path}{&next}", # + followed by query-continuation "page{#section}", # # at end - "{a}{#b}", # # prepends literal '#' that {a}'s class excludes - "{+a}/sep/{b}", # literal + bounded expression after: linear - "{+a},{b}", # same: literal disambiguates when second is bounded + "{a}{#b}", # # prepends literal '#' that {a}'s stop-set includes + "{+a}/sep/{b}", # + with bounded vars after + "{+a},{b}", + # Previously rejected for adjacency; now safe under linear scan + "{+a}{b}", # suffix var scans back to its stop-char + "{+a}{/b}", + "{+a}{.b}", + "{+a}{;b}", + "{#a}{b}", + "prefix/{+path}{.ext}", + "{a}{+b}", # prefix var scans forward to its stop-char + "{.a}{+b}", + "{/a}{+b}", + "x{name}{+path}y", ], ) -def test_parse_allows_reserved_in_safe_positions(template: str): - # These do not exhibit quadratic backtracking: end-of-template, - # literal + bounded expression, or trailing query expression - # (handled by parse_qs outside the path regex). +def test_parse_allows_single_multi_segment_variable(template: str): + # One multi-segment variable is fine: the linear scan isolates it + # between the prefix and suffix boundaries, and the scan never + # backtracks so match time stays O(n) regardless of URI content. t = UriTemplate.parse(template) assert t is not None @@ -280,16 +276,6 @@ def test_parse_stray_close_brace_between_expressions(): assert tmpl.variable_names == ["a", "b"] -def test_parse_allows_explode_separated_by_literal(): - tmpl = UriTemplate.parse("{/a*}/x{/b*}") - assert len(tmpl.variables) == 2 - - -def test_parse_allows_explode_separated_by_non_explode_var(): - tmpl = UriTemplate.parse("{/a*}{b}{.c*}") - assert len(tmpl.variables) == 3 - - def test_parse_rejects_oversized_template(): with pytest.raises(InvalidUriTemplate, match="maximum length"): UriTemplate.parse("x" * 101, max_length=100) @@ -361,8 +347,8 @@ def test_frozen(): ("?a=1{&b}", {"b": "2"}, "?a=1&b=2"), # Multi-var in one expression ("{x,y}", {"x": "1", "y": "2"}, "1,2"), - # {+x,y} is rejected at parse time (quadratic backtracking + - # inherent ambiguity). Use {+x}/{+y} with a literal separator. + # {+x,y} is rejected at parse time: each var in a + expression + # is multi-segment, and a template may only have one. # Sequence values, non-explode (comma-join) ("{/list}", {"list": ["a", "b", "c"]}, "/a,b,c"), ("{?list}", {"list": ["a", "b"]}, "?list=a,b"), @@ -539,6 +525,64 @@ def test_match_adjacent_vars_disambiguated_by_literal(): assert t.match("foo-bar") == {"a": "foo", "b": "bar"} +@pytest.mark.parametrize( + ("template", "uri", "expected"), + [ + # {+var} followed by a bounded var: suffix scan reads back to + # the bounded var's stop-char, greedy var gets the rest. + ("{+path}{/name}", "a/b/c/readme", {"path": "a/b/c", "name": "readme"}), + ("{+path}{.ext}", "src/main.py", {"path": "src/main", "ext": "py"}), + ("prefix/{+path}{.ext}", "prefix/a/b.txt", {"path": "a/b", "ext": "txt"}), + # {+var} preceded by a bounded var: prefix scan reads forward + # to the bounded var's stop-char. + ("{/name}{+rest}", "/foo/bar/baz", {"name": "foo", "rest": "/bar/baz"}), + # Bounded vars before the greedy var match lazily (first anchor) + ("{owner}@{+path}", "alice@src/main", {"owner": "alice", "path": "src/main"}), + # Bounded vars after the greedy var match greedily (last anchor) + ("{+path}@{name}", "src@main@v1", {"path": "src@main", "name": "v1"}), + # {#frag} with a trailing bounded var + ("{#section}{/page}", "#intro/1", {"section": "intro", "page": "1"}), + ], +) +def test_match_greedy_with_adjacent_bounded_vars(template: str, uri: str, expected: dict[str, str]): + # These templates were previously rejected at parse time to avoid + # regex backtracking. The linear scan handles them in O(n). + assert UriTemplate.parse(template).match(uri) == expected + + +@pytest.mark.parametrize( + ("template", "uri"), + [ + # Adjacent bounded vars with a failing suffix: scan commits to + # one split and fails immediately, no retry. + ("{a}{b}X", "z" * 200), + ("{a}{b}{c}X", "z" * 200), + # Mid-template {?...} with greedy var and failing suffix. + ("{?a}{+b}x", "?a=" + "y" * 200), + # Chained anchors that all appear in input but suffix fails. + ("{a}L{b}L{c}L{d}M", "L" * 200), + ], +) +def test_match_no_backtracking_on_pathological_input(template: str, uri: str): + # These patterns caused O(n²) or worse backtracking under the regex + # matcher. The linear scan returns None without retrying splits. + # (Correctness check only; we benchmark separately to avoid flaky + # timing assertions in CI.) + assert UriTemplate.parse(template).match(uri) is None + + +def test_match_large_uri_against_greedy_template(): + # Large payload against a greedy template — the scan visits each + # character once for the suffix anchor and once for the greedy + # validation, so this is O(n) not O(n²). + t = UriTemplate.parse("{+path}/end") + body = "seg/" * 15000 + result = t.match(body + "end") + assert result == {"path": body[:-1]} + # And the failing case returns None without retrying splits. + assert t.match(body + "nope") is None + + def test_match_decodes_percent_encoding(): t = UriTemplate.parse("file://docs/{name}") assert t.match("file://docs/hello%20world.txt") == {"name": "hello world.txt"} From 7629f6234e3d75f925424a69dd6551584ac31af6 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:15:01 +0000 Subject: [PATCH 37/47] test: close coverage gaps in linear-scan error paths Adds tests for: - Prefix literal/anchor failures before a greedy var - Greedy scalar containing its own stop-char - Explode span not starting with separator or containing stop-chars - ifemp handling in the left-to-right prefix scan - Adjacent bounded caps in prefix (first-takes-to-stop-char) Also converts the prefix_end > suffix_start check to an assertion: _scan_prefix is bounded by suffix_start so the condition cannot hold. --- src/mcp/shared/uri_template.py | 5 ++-- tests/shared/test_uri_template.py | 43 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 67bff6a84..7320b4349 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -537,8 +537,9 @@ def _scan(self, uri: str) -> dict[str, str | list[str]] | None: return None prefix_result, prefix_end = prefix - if prefix_end > suffix_start: - return None + # _scan_prefix is bounded by suffix_start, so this holds by + # construction. Kept as an assertion to document the invariant. + assert prefix_end <= suffix_start middle = uri[prefix_end:suffix_start] greedy_value = _extract_greedy(self._greedy, middle) diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 060a0284a..f45d0800f 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -571,6 +571,49 @@ def test_match_no_backtracking_on_pathological_input(template: str, uri: str): assert UriTemplate.parse(template).match(uri) is None +@pytest.mark.parametrize( + ("template", "uri"), + [ + # Prefix literal mismatch before a greedy var + ("file://{+path}", "http://x"), + # Prefix anchor not found: {a} needs '@' before greedy but none exists + ("{a}@{+path}", "no-at-sign-here"), + # Prefix literal doesn't fit within suffix boundary + ("foo{+a}oob", "fooob"), + # Greedy scalar contains its own stop-char ({+var} stops at ?) + ("api://{+path}", "api://foo?bar"), + # Explode span doesn't start with its separator + ("X{/path*}", "Xnoslash"), + # Explode body contains a non-separator stop-char + ("X{/path*}", "X/a?b"), + ], +) +def test_match_greedy_rejection_paths(template: str, uri: str): + assert UriTemplate.parse(template).match(uri) is None + + +@pytest.mark.parametrize( + ("template", "uri", "expected"), + [ + # ifemp in prefix before a greedy var: =value form + ("api{;key}{+rest}", "api;key=abc/xyz", {"key": "abc", "rest": "/xyz"}), + # ifemp in prefix: bare form (empty value) + ("api{;key}{+rest}", "api;key/xyz", {"key": "", "rest": "/xyz"}), + # Adjacent bounded caps in prefix: first takes to stop-char + ("{a}{b}{+rest}", "foo/bar", {"a": "foo", "b": "", "rest": "/bar"}), + ], +) +def test_match_prefix_scan_edge_cases(template: str, uri: str, expected: dict[str, str]): + assert UriTemplate.parse(template).match(uri) == expected + + +def test_match_prefix_ifemp_rejects_name_continuation(): + # {;key} before a greedy var: ;keys has no = and the 's' continues + # the name, so this is not our parameter. + t = UriTemplate.parse("api{;key}{+rest}") + assert t.match("api;keys/xyz") is None + + def test_match_large_uri_against_greedy_template(): # Large payload against a greedy template — the scan visits each # character once for the suffix anchor and once for the greedy From 959574044506db522c390c2d4ac13313288b293c Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:39:19 +0000 Subject: [PATCH 38/47] fix: do not percent-decode query parameter names in match RFC 6570 expansion never percent-encodes variable names, so a legitimate match will always have the parameter name in literal form. Decoding names before the duplicate-key check let an attacker shadow a real parameter by prepending a percent-encoded duplicate: api://x?%74oken=evil&token=real -> {token: evil} With this change the encoded form is treated as an unrecognized parameter and ignored, so the literal form wins. --- src/mcp/shared/uri_template.py | 7 ++++++- tests/shared/test_uri_template.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index 7320b4349..f99e21a87 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -560,13 +560,18 @@ def _parse_query(query: str) -> dict[str, str]: ``+`` as space for HTML form submissions, but RFC 6570 and MCP resource URIs follow RFC 3986 where only ``%20`` encodes a space. + Parameter names are **not** percent-decoded. RFC 6570 expansion + never encodes variable names, so a legitimate match will always + have the name in literal form. Decoding names would let + ``%74oken=evil&token=real`` shadow the real ``token`` parameter + via first-wins. + Duplicate keys keep the first value. Pairs without ``=`` are treated as empty-valued. """ result: dict[str, str] = {} for pair in query.split("&"): name, _, value = pair.partition("=") - name = unquote(name) if name and name not in result: result[name] = unquote(value) return result diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index f45d0800f..551cdef77 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -450,6 +450,11 @@ def test_expand_rejects_invalid_value_types(value: object): ("search{?q}", "search?&q=hello&", {"q": "hello"}), # Duplicate query keys keep first value ("search{?q}", "search?q=first&q=second", {"q": "first"}), + # Percent-encoded parameter names are NOT decoded: RFC 6570 + # expansion never encodes names, so an encoded name cannot be + # a legitimate match. Prevents HTTP parameter pollution. + ("api://x{?token}", "api://x?%74oken=evil&token=real", {"token": "real"}), + ("api://x{?token}", "api://x?%74oken=evil", {}), # Level 3: query continuation with literal ? falls back to # strict regex (template-order, all-present required) ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), From 8fb3d6f296c5517ae4e1e975c2b4eb6d66c050e5 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:39:53 +0000 Subject: [PATCH 39/47] fix: reject null bytes in ResourceSecurity.validate by default A %00 in a URI decodes to \x00, which defeats the traversal check's string comparison ("..\x00" != "..") and can cause truncation in handlers that pass values to C extensions or subprocess. safe_join already rejects null bytes; this closes the defense-in-depth gap so ResourceSecurity catches them before the handler runs. The check runs first so it also covers the traversal-bypass case. --- src/mcp/server/mcpserver/resources/templates.py | 7 +++++++ .../mcpserver/resources/test_resource_template.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index c5b5b6f6f..5955f9a32 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -46,6 +46,11 @@ def git_diff(range: str) -> str: ... reject_absolute_paths: bool = True """Reject values that look like absolute filesystem paths.""" + reject_null_bytes: bool = True + """Reject values containing NUL (``\\x00``). Null bytes defeat string + comparisons (``"..\\x00" != ".."``) and can cause truncation in C + extensions or subprocess calls.""" + exempt_params: Set[str] = field(default_factory=frozenset[str]) """Parameter names to skip all checks for.""" @@ -64,6 +69,8 @@ def validate(self, params: Mapping[str, str | list[str]]) -> bool: continue values = value if isinstance(value, list) else [value] for v in values: + if self.reject_null_bytes and "\0" in v: + return False if self.reject_path_traversal and contains_path_traversal(v): return False if self.reject_absolute_paths and is_absolute_path(v): diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 97808dc37..21883a662 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -67,6 +67,21 @@ def test_matches_disabled_policy_allows_traversal(): assert t.matches("file://docs/..") == {"name": ".."} +def test_matches_rejects_null_byte_by_default(): + # %00 decodes to \x00 which defeats string comparisons + # ("..\x00" != "..") and can truncate in C extensions. + t = _make("file://docs/{name}") + assert t.matches("file://docs/key%00.txt") is None + # Null byte also defeats the traversal check's component comparison + assert t.matches("file://docs/..%00%2Fsecret") is None + + +def test_matches_null_byte_check_can_be_disabled(): + policy = ResourceSecurity(reject_null_bytes=False) + t = _make("file://docs/{name}", security=policy) + assert t.matches("file://docs/key%00.txt") == {"name": "key\x00.txt"} + + def test_matches_explode_checks_each_segment(): t = _make("api{/parts*}") assert t.matches("api/a/b/c") == {"parts": ["a", "b", "c"]} From 6e559915bfc8c0becf7ccbcbb166ffcabf6b180c Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:41:06 +0000 Subject: [PATCH 40/47] fix: raise ResourceSecurityError instead of falling through on rejection ResourceTemplate.matches() previously returned None for both "URI doesn't match this template" and "URI matches but fails security validation". ResourceManager.get_resource iterates templates and uses the first non-None result, so a strict template's security rejection would silently fall through to a later, possibly permissive, template. Registration order became security-critical without documentation. matches() now raises ResourceSecurityError on security failure, halting template iteration at the first rejection. The error carries the template string and the offending parameter name. ResourceSecurity.validate() now returns the name of the first failing parameter (or None if all pass) rather than a bool, so the error can identify which parameter was rejected. --- .../server/mcpserver/resources/__init__.py | 2 + .../server/mcpserver/resources/templates.py | 41 ++++++++++++---- .../resources/test_resource_template.py | 48 +++++++++++++++---- 3 files changed, 73 insertions(+), 18 deletions(-) diff --git a/src/mcp/server/mcpserver/resources/__init__.py b/src/mcp/server/mcpserver/resources/__init__.py index 330edc324..a6cdfa106 100644 --- a/src/mcp/server/mcpserver/resources/__init__.py +++ b/src/mcp/server/mcpserver/resources/__init__.py @@ -3,6 +3,7 @@ from .templates import ( DEFAULT_RESOURCE_SECURITY, ResourceSecurity, + ResourceSecurityError, ResourceTemplate, ) from .types import ( @@ -25,5 +26,6 @@ "ResourceTemplate", "ResourceManager", "ResourceSecurity", + "ResourceSecurityError", "DEFAULT_RESOURCE_SECURITY", ] diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index 5955f9a32..c0b67b384 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -54,7 +54,7 @@ def git_diff(range: str) -> str: ... exempt_params: Set[str] = field(default_factory=frozenset[str]) """Parameter names to skip all checks for.""" - def validate(self, params: Mapping[str, str | list[str]]) -> bool: + def validate(self, params: Mapping[str, str | list[str]]) -> str | None: """Check all parameter values against the configured policy. Args: @@ -62,7 +62,8 @@ def validate(self, params: Mapping[str, str | list[str]]) -> bool: explode variables) are checked element-wise. Returns: - ``True`` if all values pass; ``False`` on first violation. + The name of the first parameter that fails, or ``None`` if + all values pass. """ for name, value in params.items(): if name in self.exempt_params: @@ -70,18 +71,32 @@ def validate(self, params: Mapping[str, str | list[str]]) -> bool: values = value if isinstance(value, list) else [value] for v in values: if self.reject_null_bytes and "\0" in v: - return False + return name if self.reject_path_traversal and contains_path_traversal(v): - return False + return name if self.reject_absolute_paths and is_absolute_path(v): - return False - return True + return name + return None DEFAULT_RESOURCE_SECURITY = ResourceSecurity() """Secure-by-default policy: traversal and absolute paths rejected.""" +class ResourceSecurityError(ValueError): + """Raised when an extracted parameter fails :class:`ResourceSecurity` checks. + + Distinct from a simple ``None`` non-match so that template + iteration can stop at the first security rejection rather than + falling through to a later, possibly more permissive, template. + """ + + def __init__(self, template: str, param: str) -> None: + super().__init__(f"Parameter {param!r} of template {template!r} failed security validation") + self.template = template + self.param = param + + class ResourceTemplate(BaseModel): """A template for dynamically creating resources.""" @@ -165,13 +180,21 @@ def matches(self, uri: str) -> dict[str, str | list[str]] | None: Returns: Extracted parameters on success, or ``None`` if the URI - doesn't match or a parameter fails security validation. + doesn't match the template. + + Raises: + ResourceSecurityError: If the URI matches but an extracted + parameter fails security validation. Raising (rather + than returning ``None``) prevents the resource manager + from silently falling through to a later, possibly more + permissive, template. """ params = self.parsed_template.match(uri) if params is None: return None - if not self.security.validate(params): - return None + failed = self.security.validate(params) + if failed is not None: + raise ResourceSecurityError(self.uri_template, failed) return params async def create_resource( diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 21883a662..2ca85cca7 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -9,6 +9,7 @@ from mcp.server.mcpserver.resources.templates import ( DEFAULT_RESOURCE_SECURITY, ResourceSecurity, + ResourceSecurityError, ) from mcp.types import Annotations @@ -30,23 +31,27 @@ def test_matches_rejects_encoded_slash_traversal(): # %2F decodes to / in UriTemplate.match(), giving "../../etc/passwd". # ResourceSecurity's traversal check then rejects the '..' components. t = _make("file://docs/{name}") - assert t.matches("file://docs/..%2F..%2Fetc%2Fpasswd") is None + with pytest.raises(ResourceSecurityError, match="'name'"): + t.matches("file://docs/..%2F..%2Fetc%2Fpasswd") def test_matches_rejects_path_traversal_by_default(): t = _make("file://docs/{name}") - assert t.matches("file://docs/..") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/..") def test_matches_rejects_path_traversal_in_reserved_var(): # Even {+path} gets the traversal check — it's semantic, not structural t = _make("file://docs/{+path}") - assert t.matches("file://docs/../../etc/passwd") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/../../etc/passwd") def test_matches_rejects_absolute_path(): t = _make("file://docs/{+path}") - assert t.matches("file://docs//etc/passwd") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs//etc/passwd") def test_matches_allows_dotdot_as_substring(): @@ -71,9 +76,11 @@ def test_matches_rejects_null_byte_by_default(): # %00 decodes to \x00 which defeats string comparisons # ("..\x00" != "..") and can truncate in C extensions. t = _make("file://docs/{name}") - assert t.matches("file://docs/key%00.txt") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/key%00.txt") # Null byte also defeats the traversal check's component comparison - assert t.matches("file://docs/..%00%2Fsecret") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/..%00%2Fsecret") def test_matches_null_byte_check_can_be_disabled(): @@ -82,24 +89,47 @@ def test_matches_null_byte_check_can_be_disabled(): assert t.matches("file://docs/key%00.txt") == {"name": "key\x00.txt"} +def test_security_rejection_does_not_fall_through_to_next_template(): + # A strict template's security rejection must halt iteration, not + # fall through to a later permissive template. Previously matches() + # returned None for both "no match" and "security failed", making + # registration order security-critical. + strict = _make("file://docs/{name}") + lax = _make( + "file://docs/{+path}", + security=ResourceSecurity(exempt_params={"path"}), + ) + uri = "file://docs/..%2Fsecrets" + # Strict matches structurally then fails security -> raises. + with pytest.raises(ResourceSecurityError) as exc: + strict.matches(uri) + assert exc.value.param == "name" + # If this raised, the resource manager never reaches the lax + # template. Verify the lax template WOULD have accepted it. + assert lax.matches(uri) == {"path": "../secrets"} + + def test_matches_explode_checks_each_segment(): t = _make("api{/parts*}") assert t.matches("api/a/b/c") == {"parts": ["a", "b", "c"]} # Any segment with traversal rejects the whole match - assert t.matches("api/a/../c") is None + with pytest.raises(ResourceSecurityError): + t.matches("api/a/../c") def test_matches_encoded_backslash_caught_by_traversal_check(): # %5C decodes to '\\'. The traversal check normalizes '\\' to '/' # and catches the '..' components. t = _make("file://docs/{name}") - assert t.matches("file://docs/..%5C..%5Csecret") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/..%5C..%5Csecret") def test_matches_encoded_dots_caught_by_traversal_check(): # %2E%2E decodes to '..' which the traversal check rejects. t = _make("file://docs/{name}") - assert t.matches("file://docs/%2E%2E") is None + with pytest.raises(ResourceSecurityError): + t.matches("file://docs/%2E%2E") def test_matches_mixed_encoded_and_literal_slash(): From ec63c200fcca990538c20869453f02df73860689 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:44:03 +0000 Subject: [PATCH 41/47] hardening: tighten limits, immutability, and drive-letter detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundled low-severity hardening: - Lower DEFAULT_MAX_TEMPLATE_LENGTH from 1MB to 8KB. Real templates are under 200 bytes; the old limit allowed 0.75s parse times. - Replace max_expressions with max_variables (default 256). A single {v0,v1,...,vN} expression packed arbitrarily many variables under one expression count, bypassing the limit. - Store UriTemplate internals as tuples. The dataclass is frozen but list fields were mutable via t._parts.append(), violating the immutability contract. - Coerce ResourceSecurity.exempt_params to frozenset in __post_init__ so hash() works even when callers pass a regular set. - Check drive letters against ASCII only. str.isalpha() is Unicode-aware, so is_absolute_path("Ω:foo") falsely returned True. --- .../server/mcpserver/resources/templates.py | 5 ++ src/mcp/shared/path_security.py | 6 +- src/mcp/shared/uri_template.py | 60 ++++++++++--------- .../resources/test_resource_template.py | 8 +++ tests/shared/test_path_security.py | 3 + tests/shared/test_uri_template.py | 17 ++++-- 6 files changed, 64 insertions(+), 35 deletions(-) diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index c0b67b384..48bb69623 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -54,6 +54,11 @@ def git_diff(range: str) -> str: ... exempt_params: Set[str] = field(default_factory=frozenset[str]) """Parameter names to skip all checks for.""" + def __post_init__(self) -> None: + # Coerce to frozenset so the dataclass stays hashable even if + # callers pass a regular set. + object.__setattr__(self, "exempt_params", frozenset(self.exempt_params)) + def validate(self, params: Mapping[str, str | list[str]]) -> str | None: """Check all parameter values against the configured policy. diff --git a/src/mcp/shared/path_security.py b/src/mcp/shared/path_security.py index 8d75a4193..dfcd479be 100644 --- a/src/mcp/shared/path_security.py +++ b/src/mcp/shared/path_security.py @@ -15,6 +15,7 @@ def read_doc(path: str) -> str: return safe_join("/data/docs", path).read_text() """ +import string from pathlib import Path __all__ = ["PathEscapeError", "contains_path_traversal", "is_absolute_path", "safe_join"] @@ -99,8 +100,9 @@ def is_absolute_path(value: str) -> bool: return False if value[0] in ("/", "\\"): return True - # Windows drive letter: C:, C:\, C:/ - if len(value) >= 2 and value[1] == ":" and value[0].isalpha(): + # Windows drive letter: C:, C:\, C:/. ASCII-only so that values + # like "Ω:namespace" are not falsely rejected. + if len(value) >= 2 and value[1] == ":" and value[0] in string.ascii_letters: return True return False diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index f99e21a87..eb9407067 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -45,8 +45,8 @@ from urllib.parse import quote, unquote __all__ = [ - "DEFAULT_MAX_EXPRESSIONS", "DEFAULT_MAX_TEMPLATE_LENGTH", + "DEFAULT_MAX_VARIABLES", "DEFAULT_MAX_URI_LENGTH", "InvalidUriTemplate", "Operator", @@ -63,8 +63,8 @@ # (Percent-encoded varchars are technically allowed but unseen in practice.) _VARNAME_RE = re.compile(r"^[A-Za-z0-9_]+(?:\.[A-Za-z0-9_]+)*$") -DEFAULT_MAX_TEMPLATE_LENGTH = 1_000_000 -DEFAULT_MAX_EXPRESSIONS = 10_000 +DEFAULT_MAX_TEMPLATE_LENGTH = 8_192 +DEFAULT_MAX_VARIABLES = 256 DEFAULT_MAX_URI_LENGTH = 65_536 # RFC 3986 reserved characters, kept unencoded by {+var} and {#var}. @@ -284,12 +284,12 @@ class UriTemplate: """ template: str - _parts: list[_Part] = field(repr=False, compare=False) - _variables: list[Variable] = field(repr=False, compare=False) - _prefix: list[_Atom] = field(repr=False, compare=False) + _parts: tuple[_Part, ...] = field(repr=False, compare=False) + _variables: tuple[Variable, ...] = field(repr=False, compare=False) + _prefix: tuple[_Atom, ...] = field(repr=False, compare=False) _greedy: Variable | None = field(repr=False, compare=False) - _suffix: list[_Atom] = field(repr=False, compare=False) - _query_variables: list[Variable] = field(repr=False, compare=False) + _suffix: tuple[_Atom, ...] = field(repr=False, compare=False) + _query_variables: tuple[Variable, ...] = field(repr=False, compare=False) @staticmethod def is_template(value: str) -> bool: @@ -319,7 +319,7 @@ def parse( template: str, *, max_length: int = DEFAULT_MAX_TEMPLATE_LENGTH, - max_expressions: int = DEFAULT_MAX_EXPRESSIONS, + max_variables: int = DEFAULT_MAX_VARIABLES, ) -> UriTemplate: """Parse a URI template string. @@ -327,9 +327,11 @@ def parse( template: An RFC 6570 URI template. max_length: Maximum permitted length of the template string. Guards against resource exhaustion. - max_expressions: Maximum number of ``{...}`` expressions - permitted. Guards against pathological inputs that could - produce expensive regexes. + max_variables: Maximum number of variables permitted across + all expressions. Counting variables rather than + ``{...}`` expressions closes the gap where a single + ``{v0,v1,...,vN}`` expression packs arbitrarily many + variables under one expression count. Raises: InvalidUriTemplate: If the template is malformed, exceeds the @@ -341,7 +343,7 @@ def parse( template=template, ) - parts, variables = _parse(template, max_expressions=max_expressions) + parts, variables = _parse(template, max_variables=max_variables) # Trailing {?...}/{&...} expressions are matched leniently via # parse_qs rather than the scan: order-agnostic, partial, ignores @@ -352,12 +354,12 @@ def parse( return cls( template=template, - _parts=parts, - _variables=variables, - _prefix=prefix, + _parts=tuple(parts), + _variables=tuple(variables), + _prefix=tuple(prefix), _greedy=greedy, - _suffix=suffix, - _query_variables=query_vars, + _suffix=tuple(suffix), + _query_variables=tuple(query_vars), ) @property @@ -680,7 +682,7 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: return parts[:split], query_vars -def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Variable]]: +def _parse(template: str, *, max_variables: int) -> tuple[list[_Part], list[Variable]]: """Split a template into an ordered sequence of literals and expressions. Walks the string, alternating between collecting literal runs and @@ -694,7 +696,6 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va """ parts: list[_Part] = [] variables: list[Variable] = [] - expression_count = 0 i = 0 n = len(template) @@ -719,18 +720,17 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va position=brace, ) - expression_count += 1 - if expression_count > max_expressions: - raise InvalidUriTemplate( - f"Template exceeds maximum of {max_expressions} expressions", - template=template, - ) - # Delegate body (between braces, exclusive) to the expression parser. expr = _parse_expression(template, template[brace + 1 : end], brace) parts.append(expr) variables.extend(expr.variables) + if len(variables) > max_variables: + raise InvalidUriTemplate( + f"Template exceeds maximum of {max_variables} variables", + template=template, + ) + # Advance past the closing brace. i = end + 1 @@ -903,7 +903,7 @@ def _partition_greedy(atoms: list[_Atom], template: str) -> tuple[list[_Atom], V return atoms[:greedy_idx], greedy.var, atoms[greedy_idx + 1 :] -def _scan_suffix(atoms: list[_Atom], uri: str, end: int) -> tuple[dict[str, str | list[str]], int] | None: +def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str, str | list[str]], int] | None: """Scan atoms right-to-left from ``end``, returning captures and start position. Each bounded variable takes the minimum span that lets its @@ -973,7 +973,9 @@ def _scan_suffix(atoms: list[_Atom], uri: str, end: int) -> tuple[dict[str, str return result, pos -def _scan_prefix(atoms: list[_Atom], uri: str, start: int, limit: int) -> tuple[dict[str, str | list[str]], int] | None: +def _scan_prefix( + atoms: Sequence[_Atom], uri: str, start: int, limit: int +) -> tuple[dict[str, str | list[str]], int] | None: """Scan atoms left-to-right from ``start``, not exceeding ``limit``. Each bounded variable takes the minimum span that lets its diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 2ca85cca7..54206a977 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -89,6 +89,14 @@ def test_matches_null_byte_check_can_be_disabled(): assert t.matches("file://docs/key%00.txt") == {"name": "key\x00.txt"} +def test_resource_security_hashable_with_regular_set(): + # Frozen dataclass auto-generates __hash__ from all fields, so a + # mutable set would make the instance unhashable. __post_init__ + # coerces to frozenset. + policy = ResourceSecurity(exempt_params={"a", "b"}) + assert hash(policy) == hash(ResourceSecurity(exempt_params=frozenset({"a", "b"}))) + + def test_security_rejection_does_not_fall_through_to_next_template(): # A strict template's security rejection must halt iteration, not # fall through to a later permissive template. Previously matches() diff --git a/tests/shared/test_path_security.py b/tests/shared/test_path_security.py index 75a1562d1..b923cdb59 100644 --- a/tests/shared/test_path_security.py +++ b/tests/shared/test_path_security.py @@ -73,6 +73,9 @@ def test_contains_path_traversal(value: str, expected: bool): ("1:foo", False), # Colon not in position 1 ("ab:c", False), + # Non-ASCII letter is not a drive letter + ("Ω:namespace", False), + ("é:foo", False), ], ) def test_is_absolute_path(value: str, expected: bool): diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 551cdef77..1685c19d2 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -281,14 +281,23 @@ def test_parse_rejects_oversized_template(): UriTemplate.parse("x" * 101, max_length=100) -def test_parse_rejects_too_many_expressions(): - with pytest.raises(InvalidUriTemplate, match="maximum of"): - UriTemplate.parse("{a}" * 11, max_expressions=10) +def test_parse_rejects_too_many_variables(): + template = "".join(f"{{v{i}}}" for i in range(11)) + with pytest.raises(InvalidUriTemplate, match="maximum of 10 variables"): + UriTemplate.parse(template, max_variables=10) + + +def test_parse_counts_variables_not_expressions(): + # A single {v0,v1,...} expression packs many variables under one + # brace pair. Counting expressions would miss this. + template = "{" + ",".join(f"v{i}" for i in range(11)) + "}" + with pytest.raises(InvalidUriTemplate, match="maximum of 10 variables"): + UriTemplate.parse(template, max_variables=10) def test_parse_custom_limits_allow_larger(): template = "".join(f"{{v{i}}}" for i in range(20)) - tmpl = UriTemplate.parse(template, max_expressions=20) + tmpl = UriTemplate.parse(template, max_variables=20) assert len(tmpl.variables) == 20 From ba784d3d08774a663c889ab88ba4b278fce59926 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:46:06 +0000 Subject: [PATCH 42/47] docs: note AnyUrl pre-normalisation in get_resource docstring Pydantic's AnyUrl resolves %2E%2E and traversal during validation, so str(AnyUrl("file:///a/%2E%2E/b")) yields "file:///b". The JSON-RPC protocol layer uses raw str and is unaffected, but internal callers wrapping in AnyUrl get silently different security semantics. The normalisation is mostly protective (resolved paths won't match templates with fixed prefixes), so this documents the inconsistency rather than narrowing the signature. --- .../server/mcpserver/resources/resource_manager.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mcp/server/mcpserver/resources/resource_manager.py b/src/mcp/server/mcpserver/resources/resource_manager.py index 1e7e656cb..5aaccebd3 100644 --- a/src/mcp/server/mcpserver/resources/resource_manager.py +++ b/src/mcp/server/mcpserver/resources/resource_manager.py @@ -83,7 +83,17 @@ def add_template( return template async def get_resource(self, uri: AnyUrl | str, context: Context[LifespanContextT, RequestT]) -> Resource: - """Get resource by URI, checking concrete resources first, then templates.""" + """Get resource by URI, checking concrete resources first, then templates. + + Note: + Pydantic's ``AnyUrl`` normalises percent-encoding and + resolves ``..`` segments during validation, so a value + constructed as ``AnyUrl("file:///a/%2E%2E/b")`` arrives + here as ``file:///b``. The JSON-RPC protocol layer passes + raw ``str`` values and is unaffected, but internal callers + wrapping URIs in ``AnyUrl`` should be aware that security + checks see the already-normalised form. + """ uri_str = str(uri) logger.debug("Getting resource", extra={"uri": uri_str}) From 0eeafe793934e6e375847bab977efced0922dc2c Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 19:56:05 +0000 Subject: [PATCH 43/47] fix: anchor first template literal at position 0, not rightmost occurrence The R->L scan used rfind to locate the literal preceding a variable. When that literal is the first atom of the template and its text appears inside the variable's value, rfind lands on the occurrence inside the value rather than at position 0, leaving unconsumed characters and returning None. UriTemplate.parse("prefix-{id}").match("prefix-prefix-123") # returned None; regex returns {'id': 'prefix-123'} For templates without a greedy variable, the atom sequence IS the whole template, so atoms[0] is positionally fixed at URI position 0. _scan_suffix now takes an anchored flag: when set, the first-atom literal anchors at 0 rather than searching via rfind. Also: adjacent captures now skip the stop-char scan entirely since the result was discarded (start = pos). This drops the worst-case from O(n*v) to O(n + v) for the pathological all-adjacent-vars case (497ms -> 2ms for 256 vars against 64KB), and the module docstring now states the complexity accurately. --- src/mcp/shared/uri_template.py | 44 ++++++++++++++++++++++++------- tests/shared/test_uri_template.py | 26 ++++++++++++++++++ 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index eb9407067..a27f2f316 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -13,9 +13,11 @@ ------------------ Matching is not specified by RFC 6570 (§1.4 explicitly defers to regex -languages). This implementation uses a linear-time two-ended scan that -never backtracks, so match time is O(n) in URI length regardless of -template structure. +languages). This implementation uses a two-ended scan that never +backtracks: match time is O(n·v) where n is URI length and v is the +number of template variables. Realistic templates have v < 10, making +this effectively linear; there is no input that produces +superpolynomial time. A template may contain **at most one multi-segment variable** — ``{+var}``, ``{#var}``, or an explode-modified variable (``{/var*}``, @@ -521,7 +523,11 @@ def _scan(self, uri: str) -> dict[str, str | list[str]] | None: # vars take the minimum needed (rfind for the preceding literal). # This matches regex greedy-first semantics for templates without # a greedy var, and minimises the suffix claim when one exists. - suffix = _scan_suffix(self._suffix, uri, n) + # When there is no greedy var the suffix IS the whole template, + # so its first atom must anchor at position 0 rather than + # searching via rfind. + anchored = self._greedy is None + suffix = _scan_suffix(self._suffix, uri, n, anchored=anchored) if suffix is None: return None suffix_result, suffix_start = suffix @@ -903,13 +909,19 @@ def _partition_greedy(atoms: list[_Atom], template: str) -> tuple[list[_Atom], V return atoms[:greedy_idx], greedy.var, atoms[greedy_idx + 1 :] -def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str, str | list[str]], int] | None: +def _scan_suffix( + atoms: Sequence[_Atom], uri: str, end: int, *, anchored: bool +) -> tuple[dict[str, str | list[str]], int] | None: """Scan atoms right-to-left from ``end``, returning captures and start position. Each bounded variable takes the minimum span that lets its preceding literal match (found via ``rfind``), which makes the *first* variable in template order greedy — identical to Python regex semantics for a sequence of greedy groups. + + When ``anchored`` is true the atom sequence is the entire template + (no greedy variable), so ``atoms[0]`` must match at URI position 0 + rather than at its rightmost occurrence. """ result: dict[str, str | list[str]] = {} pos = end @@ -947,6 +959,14 @@ def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str, i -= 1 continue + if isinstance(prev, _Cap): + # Adjacent capture with no literal anchor: this (later) + # var takes nothing, the earlier var takes the span. Skip + # the stop-char scan entirely since the result is unused. + result[var.name] = "" + i -= 1 + continue + # Earliest valid start: the var cannot extend left past any # stop-char, so scan backward to find that boundary. earliest = pos @@ -955,17 +975,21 @@ def _scan_suffix(atoms: Sequence[_Atom], uri: str, end: int) -> tuple[dict[str, if prev is None: start = earliest - elif isinstance(prev, _Lit): + elif anchored and i - 1 == 0: + # First atom of the whole template: positionally fixed at + # 0, not rightmost occurrence. rfind would land inside the + # value when the literal repeats there (e.g. "prefix-{id}" + # against "prefix-prefix-123"). + start = len(prev.text) + if start < earliest or start > pos: + return None + else: # Rightmost occurrence of the preceding literal whose end # falls within the var's valid range. idx = uri.rfind(prev.text, 0, pos) if idx == -1 or idx + len(prev.text) < earliest: return None start = idx + len(prev.text) - else: - # Adjacent capture with no literal anchor: this (later) - # var takes nothing, the earlier var takes the span. - start = pos result[var.name] = unquote(uri[start:pos]) pos = start diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index 1685c19d2..c42aa0092 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -539,6 +539,32 @@ def test_match_adjacent_vars_disambiguated_by_literal(): assert t.match("foo-bar") == {"a": "foo", "b": "bar"} +@pytest.mark.parametrize( + ("template", "variables"), + [ + # Leading literal appears inside the value: must anchor at + # position 0, not rfind to the rightmost occurrence. + ("prefix-{id}", {"id": "prefix-123"}), + ("u{s}", {"s": "xu"}), + ("_{x}", {"x": "_"}), + ("~{v}~", {"v": "~~~"}), + # Multi-occurrence with two vars: rfind correctly picks the + # rightmost literal BETWEEN vars, first literal anchors at 0. + ("L{a}L{b}", {"a": "xLy", "b": "z"}), + # Leading literal with stop-char: earliest bound still applies. + ("api/{name}", {"name": "api"}), + ], +) +def test_match_leading_literal_appears_in_value(template: str, variables: dict[str, str]): + # Regression: the R->L scan used rfind for the preceding literal, + # which lands inside the value when the template's leading literal + # is a substring of the expanded value. The first atom must anchor + # at position 0, not search. + t = UriTemplate.parse(template) + uri = t.expand(variables) + assert t.match(uri) == variables + + @pytest.mark.parametrize( ("template", "uri", "expected"), [ From cd19eaae38fa4852d801197be816958986114744 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 20:02:47 +0000 Subject: [PATCH 44/47] docs: update migration guide and resources doc for matcher changes - migration.md: path-safety checks now raise ResourceSecurityError rather than silently falling through; null bytes are rejected by default; templates may have at most one multi-segment variable - resources.md: add reject_null_bytes to the settings table; note that ResourceSecurity is a heuristic and safe_join remains the containment boundary --- docs/migration.md | 24 +++++++++++++++++------- docs/server/resources.md | 4 ++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 8bc030804..e0a114e00 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -548,14 +548,17 @@ await client.read_resource(str(my_any_url)) ### Resource templates: matching behavior changes Resource template matching has been rewritten with RFC 6570 support. -Four behaviors have changed: +Several behaviors have changed: **Path-safety checks applied by default.** Extracted parameter values -containing `..` as a path component or looking like an absolute path -(`/etc/passwd`, `C:\Windows`) now cause the template to not match. -This is checked on the decoded value, so `..%2Fetc` and `%2E%2E` are -caught too. Note that `..` is only flagged as a standalone path -component, so values like `v1.0..v2.0` or `HEAD~3..HEAD` are unaffected. +containing `..` as a path component, a null byte, or looking like an +absolute path (`/etc/passwd`, `C:\Windows`) now raise +`ResourceSecurityError` and halt template iteration — a strict +template's rejection no longer falls through to a later permissive +template. This is checked on the decoded value, so `..%2Fetc`, +`%2E%2E`, and `%00` are caught too. Note that `..` is only flagged as +a standalone path component, so values like `v1.0..v2.0` or +`HEAD~3..HEAD` are unaffected. If a parameter legitimately needs to receive absolute paths or traversal sequences, exempt it: @@ -570,10 +573,17 @@ from mcp.server.mcpserver import ResourceSecurity def inspect_file(target: str) -> str: ... ``` -**Template literals are regex-escaped.** Previously a `.` in your +**Template literals match exactly.** Previously a `.` in your template matched any character; now it matches only a literal dot. `data://v1.0/{id}` no longer matches `data://v1X0/42`. +**At most one multi-segment variable.** Templates may contain a single +`{+var}`, `{#var}`, or explode-modified variable (`{/var*}`, etc.). +Two such variables make matching inherently ambiguous and now raise +`InvalidUriTemplate` at decoration time. This is unlikely to affect +existing templates since the previous Level 1 matcher did not support +these operators at all. + **Query parameters match leniently.** A template like `search://{q}{?limit}` now matches `search://foo` (with `limit` absent from the extracted params so your function default applies). Previously diff --git a/docs/server/resources.md b/docs/server/resources.md index 105626892..9aff5d94c 100644 --- a/docs/server/resources.md +++ b/docs/server/resources.md @@ -224,8 +224,12 @@ The configurable checks: |-------------------------|---------|-------------------------------------| | `reject_path_traversal` | `True` | Rejects `..` sequences that escape the starting directory | | `reject_absolute_paths` | `True` | Rejects `/foo`, `C:\foo`, UNC paths | +| `reject_null_bytes` | `True` | Rejects values containing `\x00` | | `exempt_params` | empty | Parameter names to skip checks for | +These checks are a heuristic pre-filter; for filesystem access, +`safe_join` remains the containment boundary. + ## Errors If your handler can't fulfil the request, raise an exception. The SDK From d3a0936da67ec0422d0f4d08db3da86ca36ef3f0 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 20:22:45 +0000 Subject: [PATCH 45/47] docs: trim migration guide to actual v1.x breaking changes The previous matcher was a naive replace('{', '(?P<').replace('}', '>[^/]+)') that threw re.error on any operator character. Removed items describing constraints on features that did not exist in v1.x: - 'At most one multi-segment variable': {+var}/{#var}/explode all threw re.error in v1.x, so nobody had a working template with one let alone two. Covered in resources.md. - 'Query parameters match leniently': {?q} also threw re.error. The lenient-query feature is new, not a behavior change. Also folded the structural-delimiter change into the literals item and softened 'malformed templates' to note it's an error-timing change (re.error at match time -> InvalidUriTemplate at decoration). --- docs/migration.md | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index e0a114e00..0b9c72c65 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -573,27 +573,17 @@ from mcp.server.mcpserver import ResourceSecurity def inspect_file(target: str) -> str: ... ``` -**Template literals match exactly.** Previously a `.` in your -template matched any character; now it matches only a literal dot. -`data://v1.0/{id}` no longer matches `data://v1X0/42`. - -**At most one multi-segment variable.** Templates may contain a single -`{+var}`, `{#var}`, or explode-modified variable (`{/var*}`, etc.). -Two such variables make matching inherently ambiguous and now raise -`InvalidUriTemplate` at decoration time. This is unlikely to affect -existing templates since the previous Level 1 matcher did not support -these operators at all. - -**Query parameters match leniently.** A template like -`search://{q}{?limit}` now matches `search://foo` (with `limit` absent -from the extracted params so your function default applies). Previously -this returned no match. If you relied on all query parameters being -required, add explicit checks in your handler. - -**Malformed templates fail at decoration time.** Unclosed braces, -duplicate variable names, and unsupported syntax now raise -`InvalidUriTemplate` when the decorator runs, rather than silently -misbehaving at match time. +**Template literals and structural delimiters match exactly.** The +previous matcher built a regex without escaping, so `.` matched any +character and simple `{var}` swallowed `?`, `#`, `&`, and `,`. Now +`data://v1.0/{id}` no longer matches `data://v1X0/42`, and +`api://{id}` no longer matches `api://foo?x=1` — use `api://{id}{?x}` +or `api://{+id}` if you need to capture a query tail. + +**Template syntax errors surface at decoration time.** Unclosed +braces, duplicate variable names, and unsupported syntax raise +`InvalidUriTemplate` when the decorator runs rather than `re.error` +on first match. **Static URIs with Context-only handlers now error.** A non-template URI paired with a handler that takes only a `Context` parameter From f9aa92bd08957d1d805a12daf0cd74a79e178a85 Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 20:42:06 +0000 Subject: [PATCH 46/47] revert: remove frozenset coercion in ResourceSecurity.__post_init__ Passing a mutable set to a frozen dataclass and expecting hash() to work is a caller error, not something the SDK needs to defend against. --- src/mcp/server/mcpserver/resources/templates.py | 5 ----- .../server/mcpserver/resources/test_resource_template.py | 8 -------- 2 files changed, 13 deletions(-) diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index 48bb69623..c0b67b384 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -54,11 +54,6 @@ def git_diff(range: str) -> str: ... exempt_params: Set[str] = field(default_factory=frozenset[str]) """Parameter names to skip all checks for.""" - def __post_init__(self) -> None: - # Coerce to frozenset so the dataclass stays hashable even if - # callers pass a regular set. - object.__setattr__(self, "exempt_params", frozenset(self.exempt_params)) - def validate(self, params: Mapping[str, str | list[str]]) -> str | None: """Check all parameter values against the configured policy. diff --git a/tests/server/mcpserver/resources/test_resource_template.py b/tests/server/mcpserver/resources/test_resource_template.py index 54206a977..2ca85cca7 100644 --- a/tests/server/mcpserver/resources/test_resource_template.py +++ b/tests/server/mcpserver/resources/test_resource_template.py @@ -89,14 +89,6 @@ def test_matches_null_byte_check_can_be_disabled(): assert t.matches("file://docs/key%00.txt") == {"name": "key\x00.txt"} -def test_resource_security_hashable_with_regular_set(): - # Frozen dataclass auto-generates __hash__ from all fields, so a - # mutable set would make the instance unhashable. __post_init__ - # coerces to frozenset. - policy = ResourceSecurity(exempt_params={"a", "b"}) - assert hash(policy) == hash(ResourceSecurity(exempt_params=frozenset({"a", "b"}))) - - def test_security_rejection_does_not_fall_through_to_next_template(): # A strict template's security rejection must halt iteration, not # fall through to a later permissive template. Previously matches() From 8b5ca8944a408cb1e0066ea5fa50460bc141b62a Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 27 Mar 2026 21:02:59 +0000 Subject: [PATCH 47/47] fix: ifemp round-trip + stale docstrings from linear-scan refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One logic fix and a sweep of stale references left over from the regex-to-scan rewrite. ifemp round-trip (_scan_prefix): the name-continuation guard rejected the empty-value case when the template's next literal started with a non-stop-char. api{;key}X{+rest} with key='' expands to api;keyX/tail but matched None because 'X' after ;key was treated as a name continuation. Now checks whether the next literal starts at the current position before rejecting. Doc/style cleanups: - match() docstring: 'regex derived from the template' -> 'linear scan' - _split_query_tail: 'strict regex' -> 'strict scan' - test comments: 5x 'regex' -> 'scan' - DEFAULT_RESOURCE_SECURITY: docstring now mentions null-byte rejection - migration.md: describe client-visible 'Unknown resource' error rather than the internal ResourceSecurityError type - _Atom type alias: remove unnecessary string quoting - UriTemplate fields: list[...] not tuple[..., ...] — arbitrary-sized tuples are not a defence worth having --- docs/migration.md | 14 +++--- .../server/mcpserver/resources/templates.py | 2 +- src/mcp/shared/uri_template.py | 43 ++++++++++--------- tests/shared/test_uri_template.py | 30 ++++++++++--- 4 files changed, 54 insertions(+), 35 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 0b9c72c65..8740d08c4 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -552,13 +552,13 @@ Several behaviors have changed: **Path-safety checks applied by default.** Extracted parameter values containing `..` as a path component, a null byte, or looking like an -absolute path (`/etc/passwd`, `C:\Windows`) now raise -`ResourceSecurityError` and halt template iteration — a strict -template's rejection no longer falls through to a later permissive -template. This is checked on the decoded value, so `..%2Fetc`, -`%2E%2E`, and `%00` are caught too. Note that `..` is only flagged as -a standalone path component, so values like `v1.0..v2.0` or -`HEAD~3..HEAD` are unaffected. +absolute path (`/etc/passwd`, `C:\Windows`) now cause the read to +fail — the client receives an "Unknown resource" error and template +iteration stops, so a strict template's rejection does not fall +through to a later permissive template. This is checked on the +decoded value, so `..%2Fetc`, `%2E%2E`, and `%00` are caught too. +Note that `..` is only flagged as a standalone path component, so +values like `v1.0..v2.0` or `HEAD~3..HEAD` are unaffected. If a parameter legitimately needs to receive absolute paths or traversal sequences, exempt it: diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index c0b67b384..ce21ce8b0 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -80,7 +80,7 @@ def validate(self, params: Mapping[str, str | list[str]]) -> str | None: DEFAULT_RESOURCE_SECURITY = ResourceSecurity() -"""Secure-by-default policy: traversal and absolute paths rejected.""" +"""Secure-by-default policy: traversal, absolute paths, and null bytes rejected.""" class ResourceSecurityError(ValueError): diff --git a/src/mcp/shared/uri_template.py b/src/mcp/shared/uri_template.py index a27f2f316..7fff3aa1b 100644 --- a/src/mcp/shared/uri_template.py +++ b/src/mcp/shared/uri_template.py @@ -169,7 +169,7 @@ class _Cap: ifemp: bool = False -_Atom: TypeAlias = "_Lit | _Cap" +_Atom: TypeAlias = _Lit | _Cap def _is_greedy(var: Variable) -> bool: @@ -286,12 +286,12 @@ class UriTemplate: """ template: str - _parts: tuple[_Part, ...] = field(repr=False, compare=False) - _variables: tuple[Variable, ...] = field(repr=False, compare=False) - _prefix: tuple[_Atom, ...] = field(repr=False, compare=False) + _parts: list[_Part] = field(repr=False, compare=False) + _variables: list[Variable] = field(repr=False, compare=False) + _prefix: list[_Atom] = field(repr=False, compare=False) _greedy: Variable | None = field(repr=False, compare=False) - _suffix: tuple[_Atom, ...] = field(repr=False, compare=False) - _query_variables: tuple[Variable, ...] = field(repr=False, compare=False) + _suffix: list[_Atom] = field(repr=False, compare=False) + _query_variables: list[Variable] = field(repr=False, compare=False) @staticmethod def is_template(value: str) -> bool: @@ -356,12 +356,12 @@ def parse( return cls( template=template, - _parts=tuple(parts), - _variables=tuple(variables), - _prefix=tuple(prefix), + _parts=parts, + _variables=variables, + _prefix=prefix, _greedy=greedy, - _suffix=tuple(suffix), - _query_variables=tuple(query_vars), + _suffix=suffix, + _query_variables=query_vars, ) @property @@ -436,8 +436,8 @@ def expand(self, variables: Mapping[str, str | Sequence[str]]) -> str: def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> dict[str, str | list[str]] | None: """Match a concrete URI against this template and extract variables. - This is the inverse of :meth:`expand`. The URI is matched against - a regex derived from the template and captured values are + This is the inverse of :meth:`expand`. The URI is matched via a + linear scan of the template and captured values are percent-decoded. The round-trip ``match(expand({k: v})) == {k: v}`` holds when ``v`` does not contain its operator's separator unencoded: ``{.ext}`` with ``ext="tar.gz"`` expands to @@ -446,7 +446,7 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di inherent reversal limitation. Matching is structural at the URI level only: a simple ``{name}`` - will not match across a literal ``/`` in the URI (the regex stops + will not match across a literal ``/`` in the URI (the scan stops there), but a percent-encoded ``%2F`` that decodes to ``/`` is accepted as part of the value. Path-safety validation belongs at a higher layer; see :mod:`mcp.shared.path_security`. @@ -483,7 +483,7 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di Args: uri: A concrete URI string. max_uri_length: Maximum permitted length of the input URI. - Oversized inputs return ``None`` without regex evaluation, + Oversized inputs return ``None`` without scanning, guarding against resource exhaustion. Returns: @@ -643,7 +643,7 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: expressions and the preceding path portion contains no literal ``?``. If the path has a literal ``?`` (e.g., ``?fixed=1{&page}``), the URI's ``?`` split won't align with the template's expression - boundary, so strict regex matching is used instead. + boundary, so the strict scan is used instead. Returns: A pair ``(path_parts, query_vars)``. If lenient matching does @@ -671,8 +671,8 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]: # If the path portion contains a literal ?/# or a {?...}/{#...} # expression, lenient matching's partition("#") then partition("?") - # would strip content the path regex expects to see. Fall back to - # strict regex. + # would strip content the path scan expects to see. Fall back to + # the strict scan. for part in parts[:split]: if isinstance(part, str): if "?" in part or "#" in part: @@ -1024,11 +1024,14 @@ def _scan_prefix( if atom.ifemp: # Optional = after ;name. A non-= non-delimiter here means - # the name continued (e.g. ;keys vs ;key) — reject. + # the name continued (e.g. ;keys vs ;key) — reject, unless + # the template's next literal starts right here, in which + # case the value is legitimately empty. if pos < limit and uri[pos] == "=": pos += 1 elif pos < limit and uri[pos] not in stops: - return None + if not (isinstance(nxt, _Lit) and uri.startswith(nxt.text, pos)): + return None # Latest valid end: the var stops at the first stop-char or # the scan limit, whichever comes first. diff --git a/tests/shared/test_uri_template.py b/tests/shared/test_uri_template.py index c42aa0092..6b253732d 100644 --- a/tests/shared/test_uri_template.py +++ b/tests/shared/test_uri_template.py @@ -444,15 +444,15 @@ def test_expand_rejects_invalid_value_types(value: object): ("search{?q}", "search#frag", {}), # Multiple ?/& expressions collected together ("api{?v}{&page,limit}", "api?limit=10&v=2", {"v": "2", "limit": "10"}), - # Standalone {&var} falls through to strict regex (expands with - # & prefix, no ? for lenient matching to split on) + # Standalone {&var} falls through to the strict scan (expands + # with & prefix, no ? for lenient matching to split on) ("api{&page}", "api&page=2", {"page": "2"}), - # Literal ? in path portion falls through to strict regex + # Literal ? in path portion falls through to the strict scan ("api?x{?page}", "api?x?page=2", {"page": "2"}), # {?...} expression in path portion also falls through ("api{?q}x{?page}", "api?q=1x?page=2", {"q": "1", "page": "2"}), # {#...} or literal # in path portion falls through: lenient - # matching would strip the fragment before the path regex sees it + # matching would strip the fragment before the path scan sees it ("page{#section}{?q}", "page#intro?q=x", {"section": "intro", "q": "x"}), ("page#lit{?q}", "page#lit?q=x", {"q": "x"}), # Empty & segments in query are skipped @@ -465,7 +465,7 @@ def test_expand_rejects_invalid_value_types(value: object): ("api://x{?token}", "api://x?%74oken=evil&token=real", {"token": "real"}), ("api://x{?token}", "api://x?%74oken=evil", {}), # Level 3: query continuation with literal ? falls back to - # strict regex (template-order, all-present required) + # the strict scan (template-order, all-present required) ("?a=1{&b}", "?a=1&b=2", {"b": "2"}), # Explode: path segments as list ("/files{/path*}", "/files/a/b/c", {"path": ["a", "b", "c"]}), @@ -512,8 +512,8 @@ def test_match_no_match(template: str, uri: str): def test_match_adjacent_vars_with_prefix_names(): # Two adjacent simple vars where one name is a prefix of the other. - # We use positional capture groups, so names only affect the result - # dict keys, not the regex. Adjacent unrestricted vars are inherently + # Capture positions are ordinal, so names only affect the result + # dict keys, not the scan. Adjacent unrestricted vars are inherently # ambiguous; greedy * resolution means the first takes everything. t = UriTemplate.parse("{var}{vara}") assert t.match("ab") == {"var": "ab", "vara": ""} @@ -654,6 +654,22 @@ def test_match_prefix_ifemp_rejects_name_continuation(): assert t.match("api;keys/xyz") is None +def test_match_prefix_ifemp_empty_before_non_stop_literal(): + # Regression: _scan_prefix rejected the empty-value case when the + # following template literal starts with a non-stop-char. The + # name-continuation guard saw 'X' after ';key' and assumed the + # name continued, but 'X' is the template's next literal. + t = UriTemplate.parse("api{;key}X{+rest}") + # Non-empty round-trips fine: + assert t.match(t.expand({"key": "abc", "rest": "/tail"})) == {"key": "abc", "rest": "/tail"} + # Empty value (ifemp → bare ;key, then X) must also round-trip: + uri = t.expand({"key": "", "rest": "/tail"}) + assert uri == "api;keyX/tail" + assert t.match(uri) == {"key": "", "rest": "/tail"} + # But an actual name continuation still rejects: + assert t.match("api;keyZX/tail") is None + + def test_match_large_uri_against_greedy_template(): # Large payload against a greedy template — the scan visits each # character once for the suffix anchor and once for the greedy