From df00532a076b30b9471b46d6c67a5ad41cbddf51 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Mon, 9 Mar 2026 09:07:46 -0600 Subject: [PATCH 1/5] hrw4u: Adds sandbox protection feature for the compiler A new sandbox mechanism allows administrators to restrict which hrw4u language features are available at compile time. Policy is defined in a YAML configuration file that can deny specific sections, functions, conditions, operators, and language constructs such as break and variables. Violations are reported as compilation errors with a configurable policy message, and per-input sandbox overrides are also supported for testing purposes. Co-author and ideas: Miles Libbey --- doc/admin-guide/configuration/hrw4u.en.rst | 181 +++++++++++++++++- tools/hrw4u/.gitignore | 1 + tools/hrw4u/Makefile | 9 +- tools/hrw4u/pyproject.toml | 2 + tools/hrw4u/schema/sandbox.schema.json | 125 ++++++++++++ tools/hrw4u/scripts/hrw4u | 20 +- tools/hrw4u/scripts/hrw4u-lsp | 23 ++- tools/hrw4u/src/common.py | 40 +++- tools/hrw4u/src/errors.py | 10 + tools/hrw4u/src/sandbox.py | 159 +++++++++++++++ tools/hrw4u/src/symbols.py | 13 +- tools/hrw4u/src/symbols_base.py | 4 +- tools/hrw4u/src/visitor.py | 35 +++- tools/hrw4u/src/visitor_base.py | 7 + .../hrw4u/tests/data/sandbox/allowed.ast.txt | 1 + .../tests/data/sandbox/allowed.input.txt | 3 + .../tests/data/sandbox/allowed.output.txt | 2 + .../data/sandbox/denied-function.ast.txt | 1 + .../data/sandbox/denied-function.error.txt | 2 + .../data/sandbox/denied-function.input.txt | 3 + .../sandbox/denied-language-break.ast.txt | 1 + .../sandbox/denied-language-break.error.txt | 2 + .../sandbox/denied-language-break.input.txt | 4 + .../data/sandbox/denied-language-elif.ast.txt | 1 + .../sandbox/denied-language-elif.error.txt | 1 + .../sandbox/denied-language-elif.input.txt | 7 + .../sandbox/denied-language-elif.sandbox.yaml | 6 + .../data/sandbox/denied-language-else.ast.txt | 1 + .../sandbox/denied-language-else.error.txt | 1 + .../sandbox/denied-language-else.input.txt | 7 + .../sandbox/denied-language-else.sandbox.yaml | 6 + .../data/sandbox/denied-language-in.ast.txt | 1 + .../data/sandbox/denied-language-in.error.txt | 1 + .../data/sandbox/denied-language-in.input.txt | 5 + .../sandbox/denied-language-in.sandbox.yaml | 6 + .../sandbox/denied-modifier-nocase.ast.txt | 1 + .../sandbox/denied-modifier-nocase.error.txt | 1 + .../sandbox/denied-modifier-nocase.input.txt | 5 + .../denied-modifier-nocase.sandbox.yaml | 6 + .../data/sandbox/denied-modifier-or.ast.txt | 1 + .../data/sandbox/denied-modifier-or.error.txt | 1 + .../data/sandbox/denied-modifier-or.input.txt | 5 + .../sandbox/denied-modifier-or.sandbox.yaml | 6 + .../tests/data/sandbox/denied-section.ast.txt | 1 + .../data/sandbox/denied-section.error.txt | 2 + .../data/sandbox/denied-section.input.txt | 3 + tools/hrw4u/tests/data/sandbox/exceptions.txt | 10 + .../data/sandbox/multiple-denials.ast.txt | 1 + .../data/sandbox/multiple-denials.error.txt | 4 + .../data/sandbox/multiple-denials.input.txt | 4 + .../data/sandbox/per-test-sandbox.error.txt | 1 + .../data/sandbox/per-test-sandbox.input.txt | 3 + .../sandbox/per-test-sandbox.sandbox.yaml | 4 + tools/hrw4u/tests/data/sandbox/sandbox.yaml | 12 ++ tools/hrw4u/tests/test_lsp.py | 84 +++++++- tools/hrw4u/tests/test_sandbox.py | 36 ++++ tools/hrw4u/tests/utils.py | 83 ++++++++ 57 files changed, 943 insertions(+), 22 deletions(-) create mode 100644 tools/hrw4u/schema/sandbox.schema.json create mode 100644 tools/hrw4u/src/sandbox.py create mode 100644 tools/hrw4u/tests/data/sandbox/allowed.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/allowed.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/allowed.output.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-function.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-function.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-function.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-break.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-break.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-break.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-elif.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-elif.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-elif.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-elif.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-else.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-else.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-else.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-else.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-in.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-in.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-in.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-language-in.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-or.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-or.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-or.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-modifier-or.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/denied-section.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-section.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/denied-section.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/exceptions.txt create mode 100644 tools/hrw4u/tests/data/sandbox/multiple-denials.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/multiple-denials.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/multiple-denials.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/per-test-sandbox.error.txt create mode 100644 tools/hrw4u/tests/data/sandbox/per-test-sandbox.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/per-test-sandbox.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/sandbox.yaml create mode 100644 tools/hrw4u/tests/test_sandbox.py diff --git a/doc/admin-guide/configuration/hrw4u.en.rst b/doc/admin-guide/configuration/hrw4u.en.rst index c401dafc4e6..dbcfd99818d 100644 --- a/doc/admin-guide/configuration/hrw4u.en.rst +++ b/doc/admin-guide/configuration/hrw4u.en.rst @@ -434,10 +434,58 @@ Groups ... } +Control Flow +------------ + +HRW4U conditionals use ``if``, ``elif``, and ``else`` blocks. Each branch +takes a condition expression followed by a ``{ ... }`` body of statements: + +.. code-block:: none + + if condition { + statement; + } elif other-condition { + statement; + } else { + statement; + } + +``elif`` and ``else`` are optional and can be chained. Branches can be nested +to arbitrary depth: + +.. code-block:: none + + REMAP { + if inbound.status > 399 { + if inbound.status < 500 { + if inbound.status == 404 { + inbound.resp.X-Error = "not-found"; + } elif inbound.status == 403 { + inbound.resp.X-Error = "forbidden"; + } + } else { + inbound.resp.X-Error = "server-error"; + } + } + } + +The ``break;`` statement exits the current section immediately, skipping any +remaining statements and branches: + +.. code-block:: none + + REMAP { + if inbound.req.X-Internal != "1" { + break; + } + # Only reached for internal requests + inbound.req.X-Debug = "on"; + } + Condition operators ------------------- -HRW4U supports the following condition operators, which are used in `if (...)` expressions: +HRW4U supports the following condition operators, which are used in ``if`` expressions: ==================== ========================= ============================================ Operator HRW4U Syntax Description @@ -494,6 +542,137 @@ Run with `--debug all` to trace: - Condition evaluations - State and output emission +Sandbox Policy Enforcement +========================== + +Organizations deploying HRW4U across teams can restrict which language features +are permitted using a sandbox configuration file. When a denied feature is used, +the compiler emits a clear error with a configurable message explaining the +restriction. + +Pass the sandbox file with ``--sandbox``: + +.. code-block:: none + + hrw4u --sandbox /etc/trafficserver/hrw4u-sandbox.yaml rules.hrw4u + +The sandbox file is YAML with a single top-level ``sandbox`` key. A JSON +Schema for editor validation and autocomplete is provided at +``tools/hrw4u/schema/sandbox.schema.json``. + +.. code-block:: yaml + + sandbox: + message: | # optional: shown once after all denial errors + ... + deny: + sections: [ ... ] # section names, e.g. TXN_START + functions: [ ... ] # function names, e.g. run-plugin + conditions: [ ... ] # condition keys, e.g. geo. + operators: [ ... ] # operator keys, e.g. inbound.conn.dscp + language: [ ... ] # break, variables, in, else, elif + +All deny lists are optional. An empty or missing sandbox file permits everything. + +Denied Sections +--------------- + +The ``sections`` list accepts any of the HRW4U section names listed in the +`Sections`_ table, plus ``VARS`` to deny the variable declaration block. +A denied section causes the entire block to be rejected; the body is not +validated. + +Functions +--------- + +The ``functions`` list accepts any of the statement-function names used in +HRW4U source. The complete set of deniable functions is: + +====================== ============================================= +Function Description +====================== ============================================= +``add-header`` Add a header (``+=`` operator equivalent) +``counter`` Increment an ATS statistics counter +``keep_query`` Keep only specified query parameters +``no-op`` Explicit no-op statement +``remove_query`` Remove specified query parameters +``run-plugin`` Invoke an external remap plugin +``set-body-from`` Set response body from a URL +``set-config`` Override an ATS configuration variable +``set-debug`` Enable per-transaction ATS debug logging +``set-plugin-cntl`` Set a plugin control flag +``set-redirect`` Issue an HTTP redirect response +``skip-remap`` Skip remap processing (open proxy) +====================== ============================================= + +Conditions and Operators +------------------------ + +The ``conditions`` and ``operators`` lists use the same dot-notation keys shown +in the `Conditions`_ and `Operators`_ tables above (e.g. ``inbound.req.``, +``geo.``, ``outbound.conn.``). + +Entries ending with ``.`` use **prefix matching** — ``geo.`` denies all +``geo.*`` lookups (``geo.city``, ``geo.ASN``, etc.). Entries without a trailing +``.`` are matched exactly. + +Language Constructs +------------------- + +The ``language`` list accepts a fixed set of constructs: + +================ =================================================== +Construct What it denies +================ =================================================== +``break`` The ``break;`` statement (early section exit) +``variables`` The entire ``VARS`` section and all variable usage +``else`` The ``else { ... }`` branch of conditionals +``elif`` The ``elif ... { ... }`` branch of conditionals +``in`` The ``in [...]`` and ``!in [...]`` set membership operators +================ =================================================== + +Error Output +------------ + +When a denied feature is used the error output looks like: + +.. code-block:: none + + rules.hrw4u:3:4: error: 'set-debug' is denied by sandbox policy (function) + + This feature is restricted by CDN-SRE policy. + Contact cdn-sre@example.com for exceptions. + +The sandbox message is shown once at the end of the error summary, regardless +of how many denial errors were found. + +Example Configuration +--------------------- + +A typical policy for a CDN team where remap plugin authors should not have +access to low-level or dangerous features: + +.. code-block:: yaml + + sandbox: + message: | + This feature is not permitted by CDN-SRE policy. + To request an exception, file a ticket at https://help.example.com/cdn + + deny: + # Disallow hooks that run outside the normal remap context + sections: + - TXN_START + - TXN_CLOSE + - PRE_REMAP + + # Disallow functions that affect ATS internals or load arbitrary code + functions: + - run-plugin + - set-debug + - set-config + - skip-remap + Examples ======== diff --git a/tools/hrw4u/.gitignore b/tools/hrw4u/.gitignore index c61b1049d77..7bd07cb17d6 100644 --- a/tools/hrw4u/.gitignore +++ b/tools/hrw4u/.gitignore @@ -1,3 +1,4 @@ build/ dist/ uv.lock +*.spec diff --git a/tools/hrw4u/Makefile b/tools/hrw4u/Makefile index 33ab62873c2..9bd62be1e8e 100644 --- a/tools/hrw4u/Makefile +++ b/tools/hrw4u/Makefile @@ -51,6 +51,7 @@ UTILS_FILES=src/symbols_base.py \ SRC_FILES_HRW4U=src/visitor.py \ src/symbols.py \ src/suggestions.py \ + src/sandbox.py \ src/kg_visitor.py ALL_HRW4U_FILES=$(SHARED_FILES) $(UTILS_FILES) $(SRC_FILES_HRW4U) @@ -169,10 +170,10 @@ test: # Build standalone binaries (optional) build: gen - uv run pyinstaller --onefile --name hrw4u --strip $(SCRIPT_HRW4U) - uv run pyinstaller --onefile --name u4wrh --strip $(SCRIPT_U4WRH) - uv run pyinstaller --onefile --name hrw4u-lsp --strip $(SCRIPT_LSP) - uv run pyinstaller --onefile --name hrw4u-kg --strip $(SCRIPT_KG) + uv run pyinstaller --onedir --name hrw4u --strip $(SCRIPT_HRW4U) + uv run pyinstaller --onedir --name u4wrh --strip $(SCRIPT_U4WRH) + uv run pyinstaller --onedir --name hrw4u-lsp --strip $(SCRIPT_LSP) + uv run pyinstaller --onedir --name hrw4u-kg --strip $(SCRIPT_KG) # Wheel packaging (adjust pyproject to include both packages if desired) package: gen diff --git a/tools/hrw4u/pyproject.toml b/tools/hrw4u/pyproject.toml index ce607fd5b69..1a1a8defb2a 100644 --- a/tools/hrw4u/pyproject.toml +++ b/tools/hrw4u/pyproject.toml @@ -42,6 +42,7 @@ classifiers = [ ] dependencies = [ "antlr4-python3-runtime>=4.9,<5.0", + "pyyaml>=6.0,<7.0", "rapidfuzz>=3.0,<4.0", ] @@ -76,6 +77,7 @@ markers = [ "examples: marks tests for all header_rewrite docs examples", "reverse: marks tests for reverse conversion (header_rewrite -> hrw4u)", "ast: marks tests for AST validation", + "sandbox: marks tests for sandbox policy enforcement", ] [dependency-groups] diff --git a/tools/hrw4u/schema/sandbox.schema.json b/tools/hrw4u/schema/sandbox.schema.json new file mode 100644 index 00000000000..ecae39d5408 --- /dev/null +++ b/tools/hrw4u/schema/sandbox.schema.json @@ -0,0 +1,125 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://trafficserver.apache.org/schemas/hrw4u-sandbox.schema.json", + "title": "HRW4U Sandbox Configuration", + "description": "Policy deny-list for the hrw4u compiler (--sandbox FILE).", + "type": "object", + "additionalProperties": false, + "properties": { + "sandbox": { + "type": "object", + "additionalProperties": false, + "properties": { + "message": { + "type": "string", + "description": "Free-form text appended once after all denial errors. Use this to explain the policy and provide a contact or ticket link." + }, + "deny": { + "type": "object", + "additionalProperties": false, + "properties": { + "sections": { + "type": "array", + "description": "HRW4U section names to deny. A denied section rejects the entire block without validating its body.", + "items": { + "type": "string", + "enum": [ + "TXN_START", + "PRE_REMAP", + "REMAP", + "READ_REQUEST", + "SEND_REQUEST", + "READ_RESPONSE", + "SEND_RESPONSE", + "TXN_CLOSE", + "VARS" + ] + }, + "uniqueItems": true + }, + "functions": { + "type": "array", + "description": "Statement function names to deny.", + "items": { + "type": "string", + "enum": [ + "add-header", + "counter", + "keep_query", + "no-op", + "remove_query", + "run-plugin", + "set-body-from", + "set-config", + "set-debug", + "set-plugin-cntl", + "set-redirect", + "skip-remap" + ] + }, + "uniqueItems": true + }, + "conditions": { + "type": "array", + "description": "Condition keys to deny. Entries ending with '.' use prefix matching (e.g. 'geo.' denies all geo.* lookups). Keys match those in the HRW4U condition table.", + "items": { + "type": "string" + }, + "uniqueItems": true, + "examples": [ + ["geo.", "tcp.info", "inbound.conn.", "outbound.conn."] + ] + }, + "operators": { + "type": "array", + "description": "Operator (assignment target) keys to deny. Entries ending with '.' use prefix matching. Keys match those in the HRW4U operator table.", + "items": { + "type": "string" + }, + "uniqueItems": true, + "examples": [ + ["inbound.conn.dscp", "inbound.conn.mark", "outbound.conn.dscp", "outbound.conn.mark"] + ] + }, + "language": { + "type": "array", + "description": "Language constructs to deny.", + "items": { + "type": "string", + "enum": [ + "break", + "variables", + "in", + "else", + "elif" + ] + }, + "uniqueItems": true + }, + "modifiers": { + "type": "array", + "description": "Condition and operator modifiers to deny.", + "items": { + "type": "string", + "enum": [ + "AND", + "OR", + "NOT", + "NOCASE", + "PRE", + "SUF", + "EXT", + "MID", + "I", + "L", + "QSA" + ] + }, + "uniqueItems": true + } + } + } + } + } + } +} diff --git a/tools/hrw4u/scripts/hrw4u b/tools/hrw4u/scripts/hrw4u index 72dfc82e571..226ff6eac24 100755 --- a/tools/hrw4u/scripts/hrw4u +++ b/tools/hrw4u/scripts/hrw4u @@ -19,10 +19,26 @@ from __future__ import annotations +import argparse +from pathlib import Path +from typing import Any + from hrw4u.hrw4uLexer import hrw4uLexer from hrw4u.hrw4uParser import hrw4uParser from hrw4u.visitor import HRW4UVisitor from hrw4u.common import run_main +from hrw4u.sandbox import SandboxConfig + + +def _add_args(parser: argparse.ArgumentParser, output_group: argparse._MutuallyExclusiveGroup) -> None: + parser.add_argument("--sandbox", metavar="FILE", type=Path, help="Path to sandbox YAML configuration file") + + +def _visitor_kwargs(args: argparse.Namespace) -> dict[str, Any]: + kwargs: dict[str, Any] = {} + if args.sandbox: + kwargs['sandbox'] = SandboxConfig.load(args.sandbox) + return kwargs def main() -> None: @@ -34,7 +50,9 @@ def main() -> None: visitor_class=HRW4UVisitor, error_prefix="hrw4u", output_flag_name="hrw", - output_flag_help="Produce the HRW output (default)") + output_flag_help="Produce the HRW output (default)", + add_args=_add_args, + visitor_kwargs=_visitor_kwargs) if __name__ == "__main__": diff --git a/tools/hrw4u/scripts/hrw4u-lsp b/tools/hrw4u/scripts/hrw4u-lsp index 9b16f75eeef..78cda1fcebe 100755 --- a/tools/hrw4u/scripts/hrw4u-lsp +++ b/tools/hrw4u/scripts/hrw4u-lsp @@ -19,11 +19,13 @@ from __future__ import annotations +import argparse import json import os import sys import urllib.parse from functools import lru_cache +from pathlib import Path from typing import Any from antlr4.error.ErrorListener import ErrorListener @@ -31,6 +33,7 @@ from antlr4.error.ErrorListener import ErrorListener from hrw4u.hrw4uLexer import hrw4uLexer from hrw4u.hrw4uParser import hrw4uParser from hrw4u.visitor import HRW4UVisitor +from hrw4u.sandbox import SandboxConfig from hrw4u.common import create_parse_tree from hrw4u.tables import FUNCTION_MAP, STATEMENT_FUNCTION_MAP from hrw4u.states import SectionType @@ -82,6 +85,7 @@ class DocumentManager: self.variable_declarations: dict[str, dict[str, VariableDeclaration]] = {} self._completion_provider = CompletionProvider() self._uri_path_cache: dict[str, str] = {} + self.sandbox: SandboxConfig | None = None def _add_operator_completions(self, completions: list, base_prefix: str, current_section, context: CompletionContext) -> None: """Add operator and condition completions.""" @@ -147,7 +151,7 @@ class DocumentManager: parser_errors = list(parser._syntax_errors) if tree is not None: - visitor = HRW4UVisitor(filename=filename, error_collector=error_collector) + visitor = HRW4UVisitor(filename=filename, error_collector=error_collector, sandbox=self.sandbox) try: visitor.visit(tree) except Exception as e: @@ -465,6 +469,14 @@ class HRW4ULanguageServer: self.running = False def _handle_initialize(self, message: dict[str, Any]) -> None: + init_options = message.get("params", {}).get("initializationOptions", {}) + sandbox_path = init_options.get("sandboxPath", "") + if sandbox_path: + try: + self.document_manager.sandbox = SandboxConfig.load(Path(sandbox_path)) + except Exception as e: + print(f"hrw4u-lsp: warning: could not load sandbox config: {e}", file=sys.stderr) + response = { "jsonrpc": "2.0", "id": message["id"], @@ -603,7 +615,16 @@ class HRW4ULanguageServer: def main() -> None: """Main entry point for the LSP server.""" + parser = argparse.ArgumentParser(description="HRW4U Language Server") + parser.add_argument("--sandbox", metavar="FILE", type=Path, help="Path to sandbox YAML configuration file") + args = parser.parse_args() + server = HRW4ULanguageServer() + if args.sandbox: + try: + server.document_manager.sandbox = SandboxConfig.load(args.sandbox) + except Exception as e: + print(f"hrw4u-lsp: warning: could not load sandbox config: {e}", file=sys.stderr) server.start() diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index d694f0d58de..877d4a03c06 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -20,7 +20,7 @@ import argparse import re import sys -from typing import Final, NoReturn, Protocol, TextIO, Any +from typing import Final, NoReturn, Protocol, TextIO, Any, Callable from antlr4.error.ErrorStrategy import BailErrorStrategy, DefaultErrorStrategy from antlr4 import InputStream, CommonTokenStream @@ -199,7 +199,8 @@ def generate_output( visitor_class: type[VisitorProtocol], filename: str, args: Any, - error_collector: ErrorCollector | None = None) -> None: + error_collector: ErrorCollector | None = None, + extra_kwargs: dict[str, Any] | None = None) -> None: """Generate and print output based on mode with optional error collection.""" if args.ast: if tree is not None: @@ -209,8 +210,15 @@ def generate_output( else: if tree is not None: preserve_comments = not getattr(args, 'no_comments', False) - visitor = visitor_class( - filename=filename, debug=args.debug, error_collector=error_collector, preserve_comments=preserve_comments) + kwargs: dict[str, Any] = { + "filename": filename, + "debug": args.debug, + "error_collector": error_collector, + "preserve_comments": preserve_comments + } + if extra_kwargs: + kwargs.update(extra_kwargs) + visitor = visitor_class(**kwargs) try: result = visitor.visit(tree) if result: @@ -232,8 +240,15 @@ def generate_output( def run_main( - description: str, lexer_class: type[LexerProtocol], parser_class: type[ParserProtocol], - visitor_class: type[VisitorProtocol], error_prefix: str, output_flag_name: str, output_flag_help: str) -> None: + description: str, + lexer_class: type[LexerProtocol], + parser_class: type[ParserProtocol], + visitor_class: type[VisitorProtocol], + error_prefix: str, + output_flag_name: str, + output_flag_help: str, + add_args: Callable[[argparse.ArgumentParser, Any], None] | None = None, + visitor_kwargs: Callable[[argparse.Namespace], dict[str, Any]] | None = None) -> None: """ Generic main function for hrw4u and u4wrh scripts with bulk compilation support. @@ -245,6 +260,8 @@ def run_main( error_prefix: Error prefix for error messages output_flag_name: Name of output flag (e.g., "hrw", "hrw4u") output_flag_help: Help text for output flag + add_args: Optional callback(parser, output_group) to add extra CLI arguments + visitor_kwargs: Optional callback(args) -> dict of extra kwargs for the visitor """ parser = argparse.ArgumentParser( description=description, @@ -263,6 +280,9 @@ def run_main( parser.add_argument( "--stop-on-error", action="store_true", help="Stop processing on first error (default: collect and report multiple errors)") + if add_args: + add_args(parser, output_group) + args = parser.parse_args() if not hasattr(args, output_flag_name): @@ -271,11 +291,13 @@ def run_main( if not (args.ast or getattr(args, output_flag_name)): setattr(args, output_flag_name, True) + extra_kwargs = visitor_kwargs(args) if visitor_kwargs else None + if not args.files: content, filename = process_input(sys.stdin) tree, parser_obj, error_collector = create_parse_tree( content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error) - generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) return if any(':' in f for f in args.files): @@ -307,7 +329,7 @@ def run_main( original_stdout = sys.stdout try: sys.stdout = output_file - generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) finally: sys.stdout = original_stdout except Exception as e: @@ -332,4 +354,4 @@ def run_main( tree, parser_obj, error_collector = create_parse_tree( content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error) - generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) diff --git a/tools/hrw4u/src/errors.py b/tools/hrw4u/src/errors.py index a2710fb840f..e3e6ed2f8f2 100644 --- a/tools/hrw4u/src/errors.py +++ b/tools/hrw4u/src/errors.py @@ -112,6 +112,7 @@ class ErrorCollector: def __init__(self) -> None: """Initialize an empty error collector.""" self.errors: list[Hrw4uSyntaxError] = [] + self._sandbox_message: str | None = None def add_error(self, error: Hrw4uSyntaxError) -> None: """ @@ -119,6 +120,11 @@ def add_error(self, error: Hrw4uSyntaxError) -> None: """ self.errors.append(error) + def set_sandbox_message(self, message: str) -> None: + """Record the sandbox policy message to display once at the end.""" + if message and self._sandbox_message is None: + self._sandbox_message = message + def has_errors(self) -> bool: """ Check if any errors have been collected. @@ -137,6 +143,10 @@ def get_error_summary(self) -> str: if hasattr(error, '__notes__') and error.__notes__: lines.extend(error.__notes__) + if self._sandbox_message: + lines.append("") + lines.append(self._sandbox_message) + return "\n".join(lines) diff --git a/tools/hrw4u/src/sandbox.py b/tools/hrw4u/src/sandbox.py new file mode 100644 index 00000000000..6e3fdb5829c --- /dev/null +++ b/tools/hrw4u/src/sandbox.py @@ -0,0 +1,159 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sandbox configuration for restricting hrw4u language features.""" + +from __future__ import annotations + +import yaml +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from hrw4u.errors import SymbolResolutionError + + +class SandboxDenialError(SymbolResolutionError): + """Raised when a feature is denied by sandbox policy.""" + + def __init__(self, name: str, category: str, message: str) -> None: + super().__init__(name, f"'{name}' is denied by sandbox policy ({category})") + self.sandbox_message = message + + +_VALID_DENY_KEYS = frozenset({"sections", "functions", "conditions", "operators", "language", "modifiers"}) +_VALID_LANGUAGE_CONSTRUCTS = frozenset({"break", "variables", "in", "else", "elif"}) +_VALID_MODIFIERS = frozenset({"AND", "OR", "NOT", "NOCASE", "PRE", "SUF", "EXT", "MID", "I", "L", "QSA"}) + + +def _load_deny_set(deny: dict[str, Any], key: str) -> frozenset[str]: + raw = deny.get(key) + + if raw is None: + return frozenset() + if not isinstance(raw, list): + raise ValueError(f"sandbox.deny.{key} must be a list, got {type(raw).__name__}") + return frozenset(str(item).strip() for item in raw if item) + + +def _is_denied(name: str, deny_set: frozenset[str]) -> bool: + if name in deny_set: + return True + + for entry in deny_set: + if entry.endswith(".") and name.startswith(entry): + return True + + return False + + +@dataclass(frozen=True) +class SandboxConfig: + message: str + denied_sections: frozenset[str] + denied_functions: frozenset[str] + denied_conditions: frozenset[str] + denied_operators: frozenset[str] + denied_language: frozenset[str] + denied_modifiers: frozenset[str] + + @classmethod + def load(cls, path: Path) -> SandboxConfig: + with open(path, encoding="utf-8") as f: + raw = yaml.safe_load(f) + + if not isinstance(raw, dict) or "sandbox" not in raw: + raise ValueError(f"Sandbox config must have a top-level 'sandbox' key: {path}") + + sandbox = raw["sandbox"] + if not isinstance(sandbox, dict): + raise ValueError(f"sandbox must be a mapping: {path}") + + message = str(sandbox.get("message", "")).strip() + + deny = sandbox.get("deny", {}) + if not isinstance(deny, dict): + raise ValueError(f"sandbox.deny must be a mapping: {path}") + + unknown_keys = set(deny.keys()) - _VALID_DENY_KEYS + if unknown_keys: + raise ValueError(f"Unknown keys in sandbox.deny: {', '.join(sorted(unknown_keys))}") + + language = _load_deny_set(deny, "language") + unknown_lang = language - _VALID_LANGUAGE_CONSTRUCTS + if unknown_lang: + raise ValueError( + f"Unknown language constructs: {', '.join(sorted(unknown_lang))}. " + f"Valid: {', '.join(sorted(_VALID_LANGUAGE_CONSTRUCTS))}") + + modifiers = frozenset(s.upper() for s in _load_deny_set(deny, "modifiers")) + unknown_mods = modifiers - _VALID_MODIFIERS + if unknown_mods: + raise ValueError( + f"Unknown modifiers: {', '.join(sorted(unknown_mods))}. " + f"Valid: {', '.join(sorted(_VALID_MODIFIERS))}") + + return cls( + message=message, + denied_sections=_load_deny_set(deny, "sections"), + denied_functions=_load_deny_set(deny, "functions"), + denied_conditions=_load_deny_set(deny, "conditions"), + denied_operators=_load_deny_set(deny, "operators"), + denied_language=language, + denied_modifiers=modifiers, + ) + + @classmethod + def empty(cls) -> SandboxConfig: + return cls( + message="", + denied_sections=frozenset(), + denied_functions=frozenset(), + denied_conditions=frozenset(), + denied_operators=frozenset(), + denied_language=frozenset(), + denied_modifiers=frozenset(), + ) + + @property + def is_active(self) -> bool: + return bool( + self.denied_sections or self.denied_functions or self.denied_conditions or self.denied_operators or + self.denied_language or self.denied_modifiers) + + def check_section(self, section_name: str) -> None: + if _is_denied(section_name, self.denied_sections): + raise SandboxDenialError(section_name, "section", self.message) + + def check_function(self, func_name: str) -> None: + if _is_denied(func_name, self.denied_functions): + raise SandboxDenialError(func_name, "function", self.message) + + def check_condition(self, condition_key: str) -> None: + if _is_denied(condition_key, self.denied_conditions): + raise SandboxDenialError(condition_key, "condition", self.message) + + def check_operator(self, operator_key: str) -> None: + if _is_denied(operator_key, self.denied_operators): + raise SandboxDenialError(operator_key, "operator", self.message) + + def check_language(self, construct: str) -> None: + if construct in self.denied_language: + raise SandboxDenialError(construct, "language", self.message) + + def check_modifier(self, modifier: str) -> None: + if modifier.upper() in self.denied_modifiers: + raise SandboxDenialError(modifier, "modifier", self.message) diff --git a/tools/hrw4u/src/symbols.py b/tools/hrw4u/src/symbols.py index bb5210923f1..f341e380f48 100644 --- a/tools/hrw4u/src/symbols.py +++ b/tools/hrw4u/src/symbols.py @@ -24,12 +24,13 @@ from hrw4u.common import SystemDefaults from hrw4u.symbols_base import SymbolResolverBase from hrw4u.suggestions import SuggestionEngine +from hrw4u.sandbox import SandboxConfig class SymbolResolver(SymbolResolverBase): - def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG) -> None: - super().__init__(debug) + def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG, sandbox: SandboxConfig | None = None) -> None: + super().__init__(debug, sandbox=sandbox) self._symbols: dict[str, types.Symbol] = {} self._var_counter = {vt: 0 for vt in types.VarType} self._suggestion_engine = SuggestionEngine() @@ -74,6 +75,8 @@ def declare_variable(self, name: str, type_name: str, explicit_slot: int | None def resolve_assignment(self, name: str, value: str, section: SectionType | None = None) -> str: with self.debug_context("resolve_assignment", name, value, section): + self._sandbox.check_operator(name) + for op_key, params in self._operator_map.items(): if op_key.endswith("."): if name.startswith(op_key): @@ -118,6 +121,8 @@ def resolve_assignment(self, name: str, value: str, section: SectionType | None def resolve_add_assignment(self, name: str, value: str, section: SectionType | None = None) -> str: """Resolve += assignment, if it is supported for the given operator.""" with self.debug_context("resolve_add_assignment", name, value, section): + self._sandbox.check_operator(name) + for op_key, params in self._operator_map.items(): if op_key.endswith(".") and name.startswith(op_key) and params and params.add: self.validate_section_access(name, section, params.sections) @@ -138,6 +143,8 @@ def resolve_condition(self, name: str, section: SectionType | None = None) -> tu if symbol := self.symbol_for(name): return symbol.as_cond(), False + self._sandbox.check_condition(name) + if params := self._lookup_condition_cached(name): tag = params.target if params else None allowed_sections = params.sections if params else None @@ -192,6 +199,8 @@ def resolve_function(self, func_name: str, args: list[str], strip_quotes: bool = def resolve_statement_func(self, func_name: str, args: list[str], section: SectionType | None = None) -> str: with self.debug_context("resolve_statement_func", func_name, args, section): + self._sandbox.check_function(func_name) + if params := self._lookup_statement_function_cached(func_name): allowed_sections = params.sections if params else None self.validate_section_access(func_name, section, allowed_sections) diff --git a/tools/hrw4u/src/symbols_base.py b/tools/hrw4u/src/symbols_base.py index 1fb95cd60c0..d929d576d21 100644 --- a/tools/hrw4u/src/symbols_base.py +++ b/tools/hrw4u/src/symbols_base.py @@ -22,14 +22,16 @@ from hrw4u.states import SectionType from hrw4u.common import SystemDefaults from hrw4u.errors import SymbolResolutionError +from hrw4u.sandbox import SandboxConfig import hrw4u.tables as tables import hrw4u.types as types class SymbolResolverBase: - def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG) -> None: + def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG, sandbox: SandboxConfig | None = None) -> None: self._dbg = Dbg(debug) + self._sandbox = sandbox or SandboxConfig.empty() # Clear caches when debug status changes to ensure consistency if hasattr(self, '_condition_cache'): self._condition_cache.cache_clear() diff --git a/tools/hrw4u/src/visitor.py b/tools/hrw4u/src/visitor.py index e62f2220784..a1b441fad1b 100644 --- a/tools/hrw4u/src/visitor.py +++ b/tools/hrw4u/src/visitor.py @@ -29,6 +29,7 @@ from hrw4u.common import RegexPatterns, SystemDefaults from hrw4u.visitor_base import BaseHRWVisitor from hrw4u.validation import Validator +from hrw4u.sandbox import SandboxConfig # Cache regex validator at module level for efficiency _regex_validator = Validator.regex_pattern() @@ -49,14 +50,29 @@ def __init__( filename: str = SystemDefaults.DEFAULT_FILENAME, debug: bool = SystemDefaults.DEFAULT_DEBUG, error_collector=None, - preserve_comments: bool = True) -> None: + preserve_comments: bool = True, + sandbox: SandboxConfig | None = None) -> None: super().__init__(filename, debug, error_collector) self._cond_state = CondState() self._queued: QueuedItem | None = None self.preserve_comments = preserve_comments + self._sandbox = sandbox or SandboxConfig.empty() - self.symbol_resolver = SymbolResolver(debug) + self.symbol_resolver = SymbolResolver(debug, sandbox=self._sandbox) + + def _sandbox_check(self, ctx, check_fn) -> bool: + """Run a sandbox check, trapping any denial error into the error collector. + + Returns True if the check passed, False if denied (error was collected). + """ + try: + check_fn() + return True + except Exception as exc: + with self.trap(ctx): + raise exc + return False @lru_cache(maxsize=256) def _cached_symbol_resolution(self, symbol_text: str, section_name: str) -> tuple[str, bool]: @@ -250,6 +266,8 @@ def _prepare_section(self, ctx): raise SymbolResolutionError("section", "Missing section name") section_name = ctx.name.text + self._sandbox.check_section(section_name) + try: self.current_section = SectionType(section_name) except ValueError: @@ -318,6 +336,7 @@ def visitVarSection(self, ctx) -> None: else: raise error with self.debug_context("visitVarSection"): + self._sandbox.check_language("variables") self.visit(ctx.variables()) def visitCommentLine(self, ctx) -> None: @@ -332,6 +351,7 @@ def visitStatement(self, ctx) -> None: with self.debug_context("visitStatement"), self.trap(ctx): match ctx: case _ if ctx.BREAK(): + self._sandbox.check_language("break") self._dbg("BREAK") self.emit_statement("no-op [L]") return @@ -434,11 +454,15 @@ def visitIfStatement(self, ctx) -> None: def visitElseClause(self, ctx) -> None: with self.debug_context("visitElseClause"): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_language("else")): + return self.emit_condition("else", final=True) self.visit(ctx.block()) def visitElifClause(self, ctx) -> None: with self.debug_context("visitElifClause"): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_language("elif")): + return self.emit_condition("elif", final=True) with self.stmt_indented(), self.cond_indented(): self.visit(ctx.condition()) @@ -514,6 +538,8 @@ def visitComparison(self, ctx, *, last: bool = False) -> None: cond_txt = f"{lhs} {ctx.iprange().getText()}" case _ if ctx.set_(): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_language("in")): + return inner = ctx.set_().getText()[1:-1] # We no longer strip the quotes here for sets, fixed in #12256 cond_txt = f"{lhs} ({inner})" @@ -531,6 +557,7 @@ def visitModifier(self, ctx) -> None: for token in ctx.modifierList().mods: try: mod = token.text.upper() + self._sandbox.check_modifier(mod) self._cond_state.add_modifier(mod) except Exception as exc: with self.trap(ctx): @@ -575,6 +602,8 @@ def _end_lhs_then_emit_rhs(self, set_and_or: bool, rhs_emitter) -> None: def emit_expression(self, ctx, *, nested: bool = False, last: bool = False, grouped: bool = False) -> None: with self.debug_context("emit_expression"): if ctx.OR(): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_modifier("OR")): + return self.debug_log("`OR' detected") if grouped: self.debug_log("GROUP-START") @@ -592,6 +621,8 @@ def emit_expression(self, ctx, *, nested: bool = False, last: bool = False, grou def emit_term(self, ctx, *, last: bool = False) -> None: with self.debug_context("emit_term"): if ctx.AND(): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_modifier("AND")): + return self.debug_log("`AND' detected") self.emit_term(ctx.term(), last=False) self._end_lhs_then_emit_rhs(False, lambda: self.emit_factor(ctx.factor(), last=last)) diff --git a/tools/hrw4u/src/visitor_base.py b/tools/hrw4u/src/visitor_base.py index e7d1e88d2e0..94bce52470f 100644 --- a/tools/hrw4u/src/visitor_base.py +++ b/tools/hrw4u/src/visitor_base.py @@ -23,6 +23,7 @@ from hrw4u.states import SectionType from hrw4u.common import SystemDefaults from hrw4u.errors import hrw4u_error +from hrw4u.sandbox import SandboxConfig, SandboxDenialError class VisitorMixin: @@ -84,6 +85,7 @@ def __init__( self.filename = filename self.error_collector = error_collector self.output: list[str] = [] + self._sandbox = SandboxConfig.empty() self._state = VisitorState() self._dbg = Dbg(debug) @@ -290,6 +292,8 @@ def __exit__(_, exc_type, exc, tb): if self.error_collector: self.error_collector.add_error(error) + if isinstance(exc, SandboxDenialError) and exc.sandbox_message: + self.error_collector.set_sandbox_message(exc.sandbox_message) return True else: raise error from exc @@ -434,6 +438,9 @@ def _parse_op_tails(self, node, ctx=None) -> tuple[list[str], object, object]: raise Exception(f"Unknown modifier: {flag_text}") else: raise Exception(f"Unknown modifier: {flag_text}") + sandbox = self._sandbox + if sandbox is not None: + sandbox.check_modifier(flag_text) continue for kind in ("IDENT", "NUMBER", "STRING", "PERCENT_BLOCK", "COMPLEX_STRING"): diff --git a/tools/hrw4u/tests/data/sandbox/allowed.ast.txt b/tools/hrw4u/tests/data/sandbox/allowed.ast.txt new file mode 100644 index 00000000000..ff52d36c713 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/allowed.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (statement inbound.req.X-Foo = (value "allowed") ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/allowed.input.txt b/tools/hrw4u/tests/data/sandbox/allowed.input.txt new file mode 100644 index 00000000000..03d0ea6e525 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/allowed.input.txt @@ -0,0 +1,3 @@ +REMAP { + inbound.req.X-Foo = "allowed"; +} diff --git a/tools/hrw4u/tests/data/sandbox/allowed.output.txt b/tools/hrw4u/tests/data/sandbox/allowed.output.txt new file mode 100644 index 00000000000..4c68298572b --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/allowed.output.txt @@ -0,0 +1,2 @@ +cond %{REMAP_PSEUDO_HOOK} [AND] + set-header X-Foo "allowed" diff --git a/tools/hrw4u/tests/data/sandbox/denied-function.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-function.ast.txt new file mode 100644 index 00000000000..b1df2741276 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-function.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (statement (functionCall set-debug ( )) ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-function.error.txt b/tools/hrw4u/tests/data/sandbox/denied-function.error.txt new file mode 100644 index 00000000000..4054fd4a899 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-function.error.txt @@ -0,0 +1,2 @@ +'set-debug' is denied by sandbox policy (function) +Feature denied by sandbox policy. Contact platform team. diff --git a/tools/hrw4u/tests/data/sandbox/denied-function.input.txt b/tools/hrw4u/tests/data/sandbox/denied-function.input.txt new file mode 100644 index 00000000000..8954b692226 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-function.input.txt @@ -0,0 +1,3 @@ +REMAP { + set-debug(); +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-break.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-language-break.ast.txt new file mode 100644 index 00000000000..f6e1872e715 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-break.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (statement inbound.req.X-Foo = (value "test") ;)) (sectionBody (statement break ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-break.error.txt b/tools/hrw4u/tests/data/sandbox/denied-language-break.error.txt new file mode 100644 index 00000000000..9df5faeb5c9 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-break.error.txt @@ -0,0 +1,2 @@ +'break' is denied by sandbox policy (language) +Feature denied by sandbox policy. Contact platform team. diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-break.input.txt b/tools/hrw4u/tests/data/sandbox/denied-language-break.input.txt new file mode 100644 index 00000000000..773d2e80798 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-break.input.txt @@ -0,0 +1,4 @@ +REMAP { + inbound.req.X-Foo = "test"; + break; +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-elif.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-language-elif.ast.txt new file mode 100644 index 00000000000..475169068b8 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-elif.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (conditional (ifStatement if (condition (expression (term (factor (comparison (comparable inbound.req.X-Foo) == (value "a")))))) (block { (blockItem (statement inbound.req.X-Result = (value "a") ;)) })) (elifClause elif (condition (expression (term (factor (comparison (comparable inbound.req.X-Foo) == (value "b")))))) (block { (blockItem (statement inbound.req.X-Result = (value "b") ;)) })))) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-elif.error.txt b/tools/hrw4u/tests/data/sandbox/denied-language-elif.error.txt new file mode 100644 index 00000000000..ea2490396ad --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-elif.error.txt @@ -0,0 +1 @@ +'elif' is denied by sandbox policy (language) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-elif.input.txt b/tools/hrw4u/tests/data/sandbox/denied-language-elif.input.txt new file mode 100644 index 00000000000..96d094b6750 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-elif.input.txt @@ -0,0 +1,7 @@ +REMAP { + if inbound.req.X-Foo == "a" { + inbound.req.X-Result = "a"; + } elif inbound.req.X-Foo == "b" { + inbound.req.X-Result = "b"; + } +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-elif.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/denied-language-elif.sandbox.yaml new file mode 100644 index 00000000000..be279a696d8 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-elif.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "Feature denied by sandbox policy. Contact platform team." + + deny: + language: + - elif diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-else.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-language-else.ast.txt new file mode 100644 index 00000000000..45256070144 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-else.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (conditional (ifStatement if (condition (expression (term (factor (comparison (comparable inbound.req.X-Foo) == (value "a")))))) (block { (blockItem (statement inbound.req.X-Result = (value "yes") ;)) })) (elseClause else (block { (blockItem (statement inbound.req.X-Result = (value "no") ;)) })))) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-else.error.txt b/tools/hrw4u/tests/data/sandbox/denied-language-else.error.txt new file mode 100644 index 00000000000..4aa2d20b1d5 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-else.error.txt @@ -0,0 +1 @@ +'else' is denied by sandbox policy (language) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-else.input.txt b/tools/hrw4u/tests/data/sandbox/denied-language-else.input.txt new file mode 100644 index 00000000000..35261cb4f7a --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-else.input.txt @@ -0,0 +1,7 @@ +REMAP { + if inbound.req.X-Foo == "a" { + inbound.req.X-Result = "yes"; + } else { + inbound.req.X-Result = "no"; + } +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-else.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/denied-language-else.sandbox.yaml new file mode 100644 index 00000000000..58c2bde66b0 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-else.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "Feature denied by sandbox policy. Contact platform team." + + deny: + language: + - else diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-in.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-language-in.ast.txt new file mode 100644 index 00000000000..63ef38354c8 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-in.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (conditional (ifStatement if (condition (expression (term (factor (comparison (comparable inbound.url.path) in (set [ (value "php") , (value "html") ])))))) (block { (blockItem (statement inbound.req.X-Result = (value "yes") ;)) })))) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-in.error.txt b/tools/hrw4u/tests/data/sandbox/denied-language-in.error.txt new file mode 100644 index 00000000000..539bb7d50eb --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-in.error.txt @@ -0,0 +1 @@ +'in' is denied by sandbox policy (language) diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-in.input.txt b/tools/hrw4u/tests/data/sandbox/denied-language-in.input.txt new file mode 100644 index 00000000000..7deb8550973 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-in.input.txt @@ -0,0 +1,5 @@ +REMAP { + if inbound.url.path in ["php", "html"] { + inbound.req.X-Result = "yes"; + } +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-language-in.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/denied-language-in.sandbox.yaml new file mode 100644 index 00000000000..f10fef32e15 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-language-in.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "Feature denied by sandbox policy. Contact platform team." + + deny: + language: + - in diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.ast.txt new file mode 100644 index 00000000000..abdf80daea9 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (conditional (ifStatement if (condition (expression (term (factor (comparison (comparable inbound.req.X-Foo) == (value "bar") (modifier with (modifierList NOCASE))))))) (block { (blockItem (statement inbound.req.X-Result = (value "yes") ;)) })))) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.error.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.error.txt new file mode 100644 index 00000000000..3177ef0613f --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.error.txt @@ -0,0 +1 @@ +'NOCASE' is denied by sandbox policy (modifier) diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.input.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.input.txt new file mode 100644 index 00000000000..59d52c70ef9 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.input.txt @@ -0,0 +1,5 @@ +REMAP { + if inbound.req.X-Foo == "bar" with NOCASE { + inbound.req.X-Result = "yes"; + } +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.sandbox.yaml new file mode 100644 index 00000000000..8f849222ad4 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-nocase.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "Modifier denied by sandbox policy. Contact platform team." + + deny: + modifiers: + - NOCASE diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-or.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.ast.txt new file mode 100644 index 00000000000..e2741a07bf3 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (conditional (ifStatement if (condition (expression (expression (term (factor ( (expression (term (factor (comparison (comparable inbound.req.X-A) == (value "a"))))) )))) || (term (factor ( (expression (term (factor (comparison (comparable inbound.req.X-B) == (value "b"))))) ))))) (block { (blockItem (statement inbound.req.X-Result = (value "yes") ;)) })))) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-or.error.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.error.txt new file mode 100644 index 00000000000..687b6244302 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.error.txt @@ -0,0 +1 @@ +'OR' is denied by sandbox policy (modifier) diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-or.input.txt b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.input.txt new file mode 100644 index 00000000000..3a2041d9208 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.input.txt @@ -0,0 +1,5 @@ +REMAP { + if (inbound.req.X-A == "a") || (inbound.req.X-B == "b") { + inbound.req.X-Result = "yes"; + } +} diff --git a/tools/hrw4u/tests/data/sandbox/denied-modifier-or.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.sandbox.yaml new file mode 100644 index 00000000000..11954a30621 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-modifier-or.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "Modifier denied by sandbox policy. Contact platform team." + + deny: + modifiers: + - OR diff --git a/tools/hrw4u/tests/data/sandbox/denied-section.ast.txt b/tools/hrw4u/tests/data/sandbox/denied-section.ast.txt new file mode 100644 index 00000000000..1e85d1675a7 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-section.ast.txt @@ -0,0 +1 @@ +(program (programItem (section PRE_REMAP { (sectionBody (statement inbound.req.X-Foo = (value "test") ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/denied-section.error.txt b/tools/hrw4u/tests/data/sandbox/denied-section.error.txt new file mode 100644 index 00000000000..04d8c28c8f2 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-section.error.txt @@ -0,0 +1,2 @@ +'PRE_REMAP' is denied by sandbox policy (section) +Feature denied by sandbox policy. Contact platform team. diff --git a/tools/hrw4u/tests/data/sandbox/denied-section.input.txt b/tools/hrw4u/tests/data/sandbox/denied-section.input.txt new file mode 100644 index 00000000000..637d71a216e --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/denied-section.input.txt @@ -0,0 +1,3 @@ +PRE_REMAP { + inbound.req.X-Foo = "test"; +} diff --git a/tools/hrw4u/tests/data/sandbox/exceptions.txt b/tools/hrw4u/tests/data/sandbox/exceptions.txt new file mode 100644 index 00000000000..7d3a5c986c1 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/exceptions.txt @@ -0,0 +1,10 @@ +# Sandbox test exceptions +# Format: test_name: direction +# +# Sandbox deny tests run WITH a sandbox config via the test suite; +# testcase.py runs without one. Mark deny tests that would produce +# symbol errors without a sandbox as u4wrh so testcase.py skips them. +# +# per-test-sandbox uses TXN_START, which rejects inbound.req.X-Foo +# without a sandbox (section denial fires first in the real test). +per-test-sandbox.input: u4wrh diff --git a/tools/hrw4u/tests/data/sandbox/multiple-denials.ast.txt b/tools/hrw4u/tests/data/sandbox/multiple-denials.ast.txt new file mode 100644 index 00000000000..f7a07055cae --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/multiple-denials.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (statement (functionCall set-debug ( )) ;)) (sectionBody (statement break ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/multiple-denials.error.txt b/tools/hrw4u/tests/data/sandbox/multiple-denials.error.txt new file mode 100644 index 00000000000..ab4a265cdd1 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/multiple-denials.error.txt @@ -0,0 +1,4 @@ +Found 2 errors: +'set-debug' is denied by sandbox policy (function) +'break' is denied by sandbox policy (language) +Feature denied by sandbox policy. Contact platform team. diff --git a/tools/hrw4u/tests/data/sandbox/multiple-denials.input.txt b/tools/hrw4u/tests/data/sandbox/multiple-denials.input.txt new file mode 100644 index 00000000000..42dc9dc0611 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/multiple-denials.input.txt @@ -0,0 +1,4 @@ +REMAP { + set-debug(); + break; +} diff --git a/tools/hrw4u/tests/data/sandbox/per-test-sandbox.error.txt b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.error.txt new file mode 100644 index 00000000000..9ac39a0dffb --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.error.txt @@ -0,0 +1 @@ +'TXN_START' is denied by sandbox policy (section) diff --git a/tools/hrw4u/tests/data/sandbox/per-test-sandbox.input.txt b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.input.txt new file mode 100644 index 00000000000..bdfdf647a71 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.input.txt @@ -0,0 +1,3 @@ +TXN_START { + inbound.req.X-Foo = "test"; +} diff --git a/tools/hrw4u/tests/data/sandbox/per-test-sandbox.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.sandbox.yaml new file mode 100644 index 00000000000..39dcd84eb5e --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/per-test-sandbox.sandbox.yaml @@ -0,0 +1,4 @@ +sandbox: + deny: + sections: + - TXN_START diff --git a/tools/hrw4u/tests/data/sandbox/sandbox.yaml b/tools/hrw4u/tests/data/sandbox/sandbox.yaml new file mode 100644 index 00000000000..f70a2fcae9e --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/sandbox.yaml @@ -0,0 +1,12 @@ +sandbox: + message: "Feature denied by sandbox policy. Contact platform team." + + deny: + sections: + - TXN_START + - TXN_CLOSE + - PRE_REMAP + functions: + - set-debug + language: + - break diff --git a/tools/hrw4u/tests/test_lsp.py b/tools/hrw4u/tests/test_lsp.py index 5f2896bd9e4..a3881456f51 100644 --- a/tools/hrw4u/tests/test_lsp.py +++ b/tools/hrw4u/tests/test_lsp.py @@ -333,9 +333,33 @@ def stop_server(self) -> None: if self.stderr_thread and self.stderr_thread.is_alive(): self.stderr_thread.join(timeout=1.0) + def wait_for_diagnostics(self, uri: str, timeout: float = 3.0) -> list[dict[str, Any]]: + """Wait for a publishDiagnostics notification for the given URI.""" + start_time = time.time() + stashed: list[dict[str, Any]] = [] + + while time.time() - start_time < timeout: + try: + msg = self.response_queue.get(timeout=0.1) + if msg.get("method") == "textDocument/publishDiagnostics": + if msg.get("params", {}).get("uri") == uri: + # Put stashed messages back + for m in stashed: + self.response_queue.put(m) + return msg["params"].get("diagnostics", []) + else: + stashed.append(msg) + else: + stashed.append(msg) + except queue.Empty: + continue + + for m in stashed: + self.response_queue.put(m) + return [] + def _create_test_document(client, content: str, test_name: str) -> str: - """Helper to create and open a test document.""" uri = f"file:///test_{test_name}.hrw4u" client.open_document(uri, content) return uri @@ -638,3 +662,61 @@ def test_unknown_namespace_fallback(shared_lsp_client) -> None: content = response["result"]["contents"]["value"] assert "HRW4U symbol" in content + + +# --------------------------------------------------------------------------- +# Sandbox LSP tests +# --------------------------------------------------------------------------- + +SANDBOX_YAML = Path(__file__).parent / "data" / "sandbox" / "sandbox.yaml" + + +@pytest.fixture(scope="module") +def sandbox_lsp_client(): + """LSP client started with --sandbox flag.""" + lsp_script = Path(__file__).parent.parent / "scripts" / "hrw4u-lsp" + if not lsp_script.exists(): + pytest.skip("hrw4u-lsp script not found - run 'make' first") + if not SANDBOX_YAML.exists(): + pytest.skip("sandbox.yaml not found") + + client = LSPClient([str(lsp_script), "--sandbox", str(SANDBOX_YAML)]) + client.start_server() + yield client + client.stop_server() + + +@pytest.mark.sandbox +def test_sandbox_denied_function_produces_diagnostic(sandbox_lsp_client) -> None: + """set-debug denied by sandbox should appear as a diagnostic error.""" + content = 'REMAP {\n set-debug();\n}\n' + uri = "file:///test_sandbox_denied_function.hrw4u" + sandbox_lsp_client.open_document(uri, content) + diagnostics = sandbox_lsp_client.wait_for_diagnostics(uri) + + assert any("denied by sandbox policy" in d.get("message", "") for d in diagnostics), \ + f"Expected sandbox denial diagnostic, got: {diagnostics}" + + +@pytest.mark.sandbox +def test_sandbox_denied_section_produces_diagnostic(sandbox_lsp_client) -> None: + """PRE_REMAP denied by sandbox should appear as a diagnostic error.""" + content = 'PRE_REMAP {\n inbound.req.X-Foo = "test";\n}\n' + uri = "file:///test_sandbox_denied_section.hrw4u" + sandbox_lsp_client.open_document(uri, content) + diagnostics = sandbox_lsp_client.wait_for_diagnostics(uri) + + assert any("denied by sandbox policy" in d.get("message", "") for d in diagnostics), \ + f"Expected sandbox denial diagnostic, got: {diagnostics}" + + +@pytest.mark.sandbox +def test_sandbox_allowed_content_has_no_denial(sandbox_lsp_client) -> None: + """Content using no denied features should produce no sandbox diagnostics.""" + content = 'REMAP {\n inbound.req.X-Foo = "allowed";\n}\n' + uri = "file:///test_sandbox_allowed.hrw4u" + sandbox_lsp_client.open_document(uri, content) + diagnostics = sandbox_lsp_client.wait_for_diagnostics(uri) + + sandbox_errors = [d for d in diagnostics if "denied by sandbox policy" in d.get("message", "")] + assert not sandbox_errors, f"Expected no sandbox denials, got: {sandbox_errors}" diff --git a/tools/hrw4u/tests/test_sandbox.py b/tools/hrw4u/tests/test_sandbox.py new file mode 100644 index 00000000000..75016c8aa19 --- /dev/null +++ b/tools/hrw4u/tests/test_sandbox.py @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from pathlib import Path + +import pytest +import utils + + +@pytest.mark.sandbox +@pytest.mark.parametrize("input_file,error_file,sandbox_file", utils.collect_sandbox_deny_test_files("sandbox")) +def test_sandbox_denials(input_file: Path, error_file: Path, sandbox_file: Path) -> None: + """Test that sandbox-denied features produce expected errors.""" + utils.run_sandbox_deny_test(input_file, error_file, sandbox_file) + + +@pytest.mark.sandbox +@pytest.mark.parametrize("input_file,output_file,sandbox_file", utils.collect_sandbox_allow_test_files("sandbox")) +def test_sandbox_allowed(input_file: Path, output_file: Path, sandbox_file: Path) -> None: + """Test that features not in the deny list compile normally under a sandbox.""" + utils.run_sandbox_allow_test(input_file, output_file, sandbox_file) diff --git a/tools/hrw4u/tests/utils.py b/tools/hrw4u/tests/utils.py index fac320fbcd0..6f486e7ab48 100644 --- a/tools/hrw4u/tests/utils.py +++ b/tools/hrw4u/tests/utils.py @@ -28,6 +28,8 @@ from hrw4u.hrw4uLexer import hrw4uLexer from hrw4u.hrw4uParser import hrw4uParser from hrw4u.visitor import HRW4UVisitor +from hrw4u.sandbox import SandboxConfig +from hrw4u.errors import ErrorCollector from u4wrh.u4wrhLexer import u4wrhLexer from u4wrh.u4wrhParser import u4wrhParser from u4wrh.hrw_visitor import HRWInverseVisitor @@ -51,9 +53,13 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l "collect_output_test_files", "collect_ast_test_files", "collect_failing_inputs", + "collect_sandbox_deny_test_files", + "collect_sandbox_allow_test_files", "run_output_test", "run_ast_test", "run_failing_test", + "run_sandbox_deny_test", + "run_sandbox_allow_test", "run_reverse_test", "run_bulk_test", ] @@ -144,6 +150,41 @@ def collect_failing_inputs(group: str) -> Iterator[pytest.param]: yield pytest.param(input_file, id=test_id) +def _collect_sandbox_test_files(group: str, result_suffix: str) -> Iterator[pytest.param]: + """Collect sandbox test files: (input, result, sandbox_config). + + Uses a per-test `{name}.sandbox.yaml` if present, otherwise falls back + to a shared `sandbox.yaml` in the same directory. + """ + base_dir = Path("tests/data") / group + shared_sandbox = base_dir / "sandbox.yaml" + + for input_file in sorted(base_dir.glob("*.input.txt")): + base = input_file.with_suffix("").with_suffix("") + result_file = base.with_suffix(result_suffix) + + if not result_file.exists(): + continue + + per_test_sandbox = base.with_suffix(".sandbox.yaml") + sandbox_file = per_test_sandbox if per_test_sandbox.exists() else shared_sandbox + + if not sandbox_file.exists(): + continue + + yield pytest.param(input_file, result_file, sandbox_file, id=base.name) + + +def collect_sandbox_deny_test_files(group: str) -> Iterator[pytest.param]: + """Collect sandbox denial test files: (input, error, sandbox_config).""" + yield from _collect_sandbox_test_files(group, ".error.txt") + + +def collect_sandbox_allow_test_files(group: str) -> Iterator[pytest.param]: + """Collect sandbox allow test files: (input, output, sandbox_config).""" + yield from _collect_sandbox_test_files(group, ".output.txt") + + def run_output_test(input_file: Path, output_file: Path) -> None: """Run output validation test comparing generated output with expected.""" input_text = input_file.read_text() @@ -257,6 +298,48 @@ def _assert_structured_error_fields( f"Actual full error:\n{actual_full_error}") +def run_sandbox_deny_test(input_file: Path, error_file: Path, sandbox_file: Path) -> None: + """Run a sandbox denial test, verifying that denied features produce expected errors.""" + text = input_file.read_text() + parser, tree = parse_input_text(text) + + sandbox = SandboxConfig.load(sandbox_file) + error_collector = ErrorCollector() + visitor = HRW4UVisitor(filename=str(input_file), error_collector=error_collector, sandbox=sandbox) + visitor.visit(tree) + + assert error_collector.has_errors(), f"Expected sandbox errors but none were raised for {input_file}" + + actual_summary = error_collector.get_error_summary() + expected_content = error_file.read_text().strip() + + for line in expected_content.splitlines(): + line = line.strip() + if line: + assert line in actual_summary, ( + f"Expected phrase not found in error summary for {input_file}:\n" + f" Missing: {line!r}\n" + f"Actual summary:\n{actual_summary}") + + +def run_sandbox_allow_test(input_file: Path, output_file: Path, sandbox_file: Path) -> None: + """Run a sandbox allow test, verifying that non-denied features compile normally.""" + text = input_file.read_text() + parser, tree = parse_input_text(text) + + sandbox = SandboxConfig.load(sandbox_file) + error_collector = ErrorCollector() + visitor = HRW4UVisitor(filename=str(input_file), error_collector=error_collector, sandbox=sandbox) + actual_output = "\n".join(visitor.visit(tree) or []).strip() + + assert not error_collector.has_errors(), ( + f"Expected no errors but sandbox denied something in {input_file}:\n" + f"{error_collector.get_error_summary()}") + + expected_output = output_file.read_text().strip() + assert actual_output == expected_output, f"Output mismatch in {input_file}" + + def run_reverse_test(input_file: Path, output_file: Path) -> None: """Run u4wrh on output.txt and compare with input.txt (round-trip test).""" output_text = output_file.read_text() From dc830d84286f02fc3f79c1b90b9230b7e09678f2 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Mon, 9 Mar 2026 23:55:43 -0700 Subject: [PATCH 2/5] Address Copilot's review comments - Check sandbox.check_language("variables") when a declared variable is referenced as a condition, for defense-in-depth. - Wrap visitVarSection()'s variables check in _sandbox_check() so denials are collected rather than propagated. - Narrow _sandbox_check() to catch SandboxDenialError only (not broad Exception) and use bare raise to preserve traceback. - Add "required": ["sandbox"] to schema to match loader behavior. --- tools/hrw4u/schema/sandbox.schema.json | 1 + tools/hrw4u/src/symbols.py | 1 + tools/hrw4u/src/visitor.py | 9 +++++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/hrw4u/schema/sandbox.schema.json b/tools/hrw4u/schema/sandbox.schema.json index ecae39d5408..5ef0c2c06e6 100644 --- a/tools/hrw4u/schema/sandbox.schema.json +++ b/tools/hrw4u/schema/sandbox.schema.json @@ -5,6 +5,7 @@ "description": "Policy deny-list for the hrw4u compiler (--sandbox FILE).", "type": "object", "additionalProperties": false, + "required": ["sandbox"], "properties": { "sandbox": { "type": "object", diff --git a/tools/hrw4u/src/symbols.py b/tools/hrw4u/src/symbols.py index f341e380f48..28b59c7c326 100644 --- a/tools/hrw4u/src/symbols.py +++ b/tools/hrw4u/src/symbols.py @@ -141,6 +141,7 @@ def resolve_add_assignment(self, name: str, value: str, section: SectionType | N def resolve_condition(self, name: str, section: SectionType | None = None) -> tuple[str, bool]: with self.debug_context("resolve_condition", name, section): if symbol := self.symbol_for(name): + self._sandbox.check_language("variables") return symbol.as_cond(), False self._sandbox.check_condition(name) diff --git a/tools/hrw4u/src/visitor.py b/tools/hrw4u/src/visitor.py index a1b441fad1b..b3d0755bcdd 100644 --- a/tools/hrw4u/src/visitor.py +++ b/tools/hrw4u/src/visitor.py @@ -29,7 +29,7 @@ from hrw4u.common import RegexPatterns, SystemDefaults from hrw4u.visitor_base import BaseHRWVisitor from hrw4u.validation import Validator -from hrw4u.sandbox import SandboxConfig +from hrw4u.sandbox import SandboxConfig, SandboxDenialError # Cache regex validator at module level for efficiency _regex_validator = Validator.regex_pattern() @@ -69,9 +69,9 @@ def _sandbox_check(self, ctx, check_fn) -> bool: try: check_fn() return True - except Exception as exc: + except SandboxDenialError: with self.trap(ctx): - raise exc + raise return False @lru_cache(maxsize=256) @@ -336,7 +336,8 @@ def visitVarSection(self, ctx) -> None: else: raise error with self.debug_context("visitVarSection"): - self._sandbox.check_language("variables") + if not self._sandbox_check(ctx, lambda: self._sandbox.check_language("variables")): + return self.visit(ctx.variables()) def visitCommentLine(self, ctx) -> None: From cf55d0a2f87d16f10685bcb4663e784f97902d6d Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Tue, 10 Mar 2026 23:38:06 -0700 Subject: [PATCH 3/5] hrw4u: Add sandbox warn mode and refactor policy internals Add a warn: block alongside deny: in sandbox YAML. Warned features compile successfully but emit warnings to stderr. Refactor SandboxConfig to use a PolicySets dataclass, unify diagnostic formatting, and eliminate duplicated check logic. Ideas-by: Juan Posadas Castillo --- doc/admin-guide/configuration/hrw4u.en.rst | 71 ++++-- tools/hrw4u/schema/sandbox.schema.json | 220 +++++++++--------- tools/hrw4u/src/common.py | 2 + tools/hrw4u/src/errors.py | 86 ++++--- tools/hrw4u/src/sandbox.py | 163 +++++++------ tools/hrw4u/src/symbols.py | 10 +- tools/hrw4u/src/symbols_base.py | 10 + tools/hrw4u/src/visitor.py | 43 +++- .../data/sandbox/warned-function.ast.txt | 1 + .../data/sandbox/warned-function.input.txt | 3 + .../data/sandbox/warned-function.output.txt | 2 + .../data/sandbox/warned-function.sandbox.yaml | 6 + .../data/sandbox/warned-function.warning.txt | 2 + tools/hrw4u/tests/test_sandbox.py | 7 + tools/hrw4u/tests/utils.py | 69 +++++- 15 files changed, 458 insertions(+), 237 deletions(-) create mode 100644 tools/hrw4u/tests/data/sandbox/warned-function.ast.txt create mode 100644 tools/hrw4u/tests/data/sandbox/warned-function.input.txt create mode 100644 tools/hrw4u/tests/data/sandbox/warned-function.output.txt create mode 100644 tools/hrw4u/tests/data/sandbox/warned-function.sandbox.yaml create mode 100644 tools/hrw4u/tests/data/sandbox/warned-function.warning.txt diff --git a/doc/admin-guide/configuration/hrw4u.en.rst b/doc/admin-guide/configuration/hrw4u.en.rst index dbcfd99818d..99f55d0463e 100644 --- a/doc/admin-guide/configuration/hrw4u.en.rst +++ b/doc/admin-guide/configuration/hrw4u.en.rst @@ -546,9 +546,9 @@ Sandbox Policy Enforcement ========================== Organizations deploying HRW4U across teams can restrict which language features -are permitted using a sandbox configuration file. When a denied feature is used, -the compiler emits a clear error with a configurable message explaining the -restriction. +are permitted using a sandbox configuration file. Features can be **denied** +(compilation fails with an error) or **warned** (compilation succeeds but a +warning is emitted). Both modes support the same feature categories. Pass the sandbox file with ``--sandbox``: @@ -563,7 +563,7 @@ Schema for editor validation and autocomplete is provided at .. code-block:: yaml sandbox: - message: | # optional: shown once after all denial errors + message: | # optional: shown once after all errors/warnings ... deny: sections: [ ... ] # section names, e.g. TXN_START @@ -571,8 +571,12 @@ Schema for editor validation and autocomplete is provided at conditions: [ ... ] # condition keys, e.g. geo. operators: [ ... ] # operator keys, e.g. inbound.conn.dscp language: [ ... ] # break, variables, in, else, elif + warn: + functions: [ ... ] # same categories as deny + conditions: [ ... ] -All deny lists are optional. An empty or missing sandbox file permits everything. +All lists are optional. An empty or missing sandbox file permits everything. +A feature may not appear in both ``deny`` and ``warn``. Denied Sections --------------- @@ -614,7 +618,25 @@ in the `Conditions`_ and `Operators`_ tables above (e.g. ``inbound.req.``, Entries ending with ``.`` use **prefix matching** — ``geo.`` denies all ``geo.*`` lookups (``geo.city``, ``geo.ASN``, etc.). Entries without a trailing -``.`` are matched exactly. +``.`` are matched exactly, which allows fine-grained control over sub-values: + +.. code-block:: yaml + + sandbox: + deny: + operators: + - http.cntl.SKIP_REMAP # deny just this sub-value + conditions: + - geo.ASN # deny ASN lookups specifically + warn: + operators: + - http.cntl. # warn on all other http.cntl.* usage + conditions: + - geo. # warn on remaining geo.* lookups + +Prefix entries and exact entries can be combined across ``deny`` and ``warn`` +to create graduated policies — deny the dangerous sub-values while warning on +the rest. Language Constructs ------------------- @@ -622,7 +644,7 @@ Language Constructs The ``language`` list accepts a fixed set of constructs: ================ =================================================== -Construct What it denies +Construct What it controls ================ =================================================== ``break`` The ``break;`` statement (early section exit) ``variables`` The entire ``VARS`` section and all variable usage @@ -631,8 +653,8 @@ Construct What it denies ``in`` The ``in [...]`` and ``!in [...]`` set membership operators ================ =================================================== -Error Output ------------- +Output +------ When a denied feature is used the error output looks like: @@ -643,14 +665,25 @@ When a denied feature is used the error output looks like: This feature is restricted by CDN-SRE policy. Contact cdn-sre@example.com for exceptions. -The sandbox message is shown once at the end of the error summary, regardless -of how many denial errors were found. +When a warned feature is used the compiler emits a warning but succeeds: + +.. code-block:: none + + rules.hrw4u:5:4: warning: 'set-config' is warned by sandbox policy (function) + + This feature is restricted by CDN-SRE policy. + Contact cdn-sre@example.com for exceptions. + +The sandbox message is shown once at the end of the output, regardless of how +many denial errors or warnings were found. Warnings alone do not cause a +non-zero exit code. Example Configuration --------------------- A typical policy for a CDN team where remap plugin authors should not have -access to low-level or dangerous features: +access to low-level or dangerous features, with transitional warnings for +features being phased out: .. code-block:: yaml @@ -669,9 +702,21 @@ access to low-level or dangerous features: # Disallow functions that affect ATS internals or load arbitrary code functions: - run-plugin + - skip-remap + + # Deny a specific dangerous sub-value + operators: + - http.cntl.SKIP_REMAP + + warn: + # These functions will be denied in a future release + functions: - set-debug - set-config - - skip-remap + + # Warn on all remaining http.cntl usage + operators: + - http.cntl. Examples ======== diff --git a/tools/hrw4u/schema/sandbox.schema.json b/tools/hrw4u/schema/sandbox.schema.json index 5ef0c2c06e6..516064ceee1 100644 --- a/tools/hrw4u/schema/sandbox.schema.json +++ b/tools/hrw4u/schema/sandbox.schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://trafficserver.apache.org/schemas/hrw4u-sandbox.schema.json", "title": "HRW4U Sandbox Configuration", - "description": "Policy deny-list for the hrw4u compiler (--sandbox FILE).", + "description": "Policy deny-list and warn-list for the hrw4u compiler (--sandbox FILE).", "type": "object", "additionalProperties": false, "required": ["sandbox"], @@ -13,112 +13,122 @@ "properties": { "message": { "type": "string", - "description": "Free-form text appended once after all denial errors. Use this to explain the policy and provide a contact or ticket link." + "description": "Free-form text appended once after all denial errors and warnings. Use this to explain the policy and provide a contact or ticket link." }, "deny": { - "type": "object", - "additionalProperties": false, - "properties": { - "sections": { - "type": "array", - "description": "HRW4U section names to deny. A denied section rejects the entire block without validating its body.", - "items": { - "type": "string", - "enum": [ - "TXN_START", - "PRE_REMAP", - "REMAP", - "READ_REQUEST", - "SEND_REQUEST", - "READ_RESPONSE", - "SEND_RESPONSE", - "TXN_CLOSE", - "VARS" - ] - }, - "uniqueItems": true - }, - "functions": { - "type": "array", - "description": "Statement function names to deny.", - "items": { - "type": "string", - "enum": [ - "add-header", - "counter", - "keep_query", - "no-op", - "remove_query", - "run-plugin", - "set-body-from", - "set-config", - "set-debug", - "set-plugin-cntl", - "set-redirect", - "skip-remap" - ] - }, - "uniqueItems": true - }, - "conditions": { - "type": "array", - "description": "Condition keys to deny. Entries ending with '.' use prefix matching (e.g. 'geo.' denies all geo.* lookups). Keys match those in the HRW4U condition table.", - "items": { - "type": "string" - }, - "uniqueItems": true, - "examples": [ - ["geo.", "tcp.info", "inbound.conn.", "outbound.conn."] - ] - }, - "operators": { - "type": "array", - "description": "Operator (assignment target) keys to deny. Entries ending with '.' use prefix matching. Keys match those in the HRW4U operator table.", - "items": { - "type": "string" - }, - "uniqueItems": true, - "examples": [ - ["inbound.conn.dscp", "inbound.conn.mark", "outbound.conn.dscp", "outbound.conn.mark"] - ] - }, - "language": { - "type": "array", - "description": "Language constructs to deny.", - "items": { - "type": "string", - "enum": [ - "break", - "variables", - "in", - "else", - "elif" - ] - }, - "uniqueItems": true - }, - "modifiers": { - "type": "array", - "description": "Condition and operator modifiers to deny.", - "items": { - "type": "string", - "enum": [ - "AND", - "OR", - "NOT", - "NOCASE", - "PRE", - "SUF", - "EXT", - "MID", - "I", - "L", - "QSA" - ] - }, - "uniqueItems": true - } - } + "$ref": "#/$defs/categoryBlock", + "description": "Features listed here are denied: compilation fails with an error." + }, + "warn": { + "$ref": "#/$defs/categoryBlock", + "description": "Features listed here produce warnings but compilation succeeds." + } + } + } + }, + "$defs": { + "categoryBlock": { + "type": "object", + "additionalProperties": false, + "properties": { + "sections": { + "type": "array", + "description": "HRW4U section names. A denied section rejects the entire block; a warned section emits a warning.", + "items": { + "type": "string", + "enum": [ + "TXN_START", + "PRE_REMAP", + "REMAP", + "READ_REQUEST", + "SEND_REQUEST", + "READ_RESPONSE", + "SEND_RESPONSE", + "TXN_CLOSE", + "VARS" + ] + }, + "uniqueItems": true + }, + "functions": { + "type": "array", + "description": "Statement function names.", + "items": { + "type": "string", + "enum": [ + "add-header", + "counter", + "keep_query", + "no-op", + "remove_query", + "run-plugin", + "set-body-from", + "set-config", + "set-debug", + "set-plugin-cntl", + "set-redirect", + "skip-remap" + ] + }, + "uniqueItems": true + }, + "conditions": { + "type": "array", + "description": "Condition keys. Entries ending with '.' use prefix matching (e.g. 'geo.' matches all geo.* lookups).", + "items": { + "type": "string" + }, + "uniqueItems": true, + "examples": [ + ["geo.", "tcp.info", "inbound.conn.", "outbound.conn."] + ] + }, + "operators": { + "type": "array", + "description": "Operator (assignment target) keys. Entries ending with '.' use prefix matching.", + "items": { + "type": "string" + }, + "uniqueItems": true, + "examples": [ + ["inbound.conn.dscp", "inbound.conn.mark", "outbound.conn.dscp", "outbound.conn.mark"] + ] + }, + "language": { + "type": "array", + "description": "Language constructs.", + "items": { + "type": "string", + "enum": [ + "break", + "variables", + "in", + "else", + "elif" + ] + }, + "uniqueItems": true + }, + "modifiers": { + "type": "array", + "description": "Condition and operator modifiers.", + "items": { + "type": "string", + "enum": [ + "AND", + "OR", + "NOT", + "NOCASE", + "PRE", + "SUF", + "EXT", + "MID", + "I", + "L", + "QSA" + ] + }, + "uniqueItems": true } } } diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 877d4a03c06..31ca386ed76 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -237,6 +237,8 @@ def generate_output( print(error_collector.get_error_summary(), file=sys.stderr) if not args.ast and tree is None: sys.exit(1) + elif error_collector and error_collector.has_warnings(): + print(error_collector.get_error_summary(), file=sys.stderr) def run_main( diff --git a/tools/hrw4u/src/errors.py b/tools/hrw4u/src/errors.py index e3e6ed2f8f2..b0011daf0fe 100644 --- a/tools/hrw4u/src/errors.py +++ b/tools/hrw4u/src/errors.py @@ -20,6 +20,24 @@ from antlr4.error.ErrorListener import ErrorListener +def _format_diagnostic(filename: str, line: int, col: int, severity: str, message: str, source_line: str) -> str: + header = f"{filename}:{line}:{col}: {severity}: {message}" + + lineno = f"{line:4d}" + code_line = f"{lineno} | {source_line}" + pointer_line = f"{' ' * 4} | {' ' * col}^" + return f"{header}\n{code_line}\n{pointer_line}" + + +def _extract_source_context(ctx: object) -> tuple[int, int, str]: + try: + input_stream = ctx.start.getInputStream() + source_line = input_stream.strdata.splitlines()[ctx.start.line - 1] + return ctx.start.line, ctx.start.column, source_line + except Exception: + return 0, 0, "" + + class ThrowingErrorListener(ErrorListener): """ANTLR error listener that throws exceptions on syntax errors.""" @@ -49,7 +67,7 @@ class Hrw4uSyntaxError(Exception): """Formatted syntax error with source context and Python 3.11+ exception notes.""" def __init__(self, filename: str, line: int, column: int, message: str, source_line: str) -> None: - super().__init__(self._format_error(filename, line, column, message, source_line)) + super().__init__(_format_diagnostic(filename, line, column, "error", message, source_line)) self.filename = filename self.line = line self.column = column @@ -63,14 +81,6 @@ def add_resolution_hint(self, hint: str) -> None: """Add resolution hint using Python 3.11+ exception notes.""" self.add_note(f"Hint: {hint}") - def _format_error(self, filename: str, line: int, col: int, message: str, source_line: str) -> str: - error = f"{filename}:{line}:{col}: error: {message}" - - lineno = f"{line:4d}" - code_line = f"{lineno} | {source_line}" - pointer_line = f"{' ' * 4} | {' ' * col}^" - return f"{error}\n{code_line}\n{pointer_line}" - class SymbolResolutionError(Exception): @@ -88,16 +98,8 @@ def hrw4u_error(filename: str, ctx: object, exc: Exception) -> Hrw4uSyntaxError: if isinstance(exc, Hrw4uSyntaxError): return exc - if ctx is None: - error = Hrw4uSyntaxError(filename, 0, 0, str(exc), "") - else: - try: - input_stream = ctx.start.getInputStream() - source_line = input_stream.strdata.splitlines()[ctx.start.line - 1] - except Exception: - source_line = "" - - error = Hrw4uSyntaxError(filename, ctx.start.line, ctx.start.column, str(exc), source_line) + line, col, source_line = _extract_source_context(ctx) if ctx else (0, 0, "") + error = Hrw4uSyntaxError(filename, line, col, str(exc), source_line) if hasattr(exc, '__notes__') and exc.__notes__: for note in exc.__notes__: @@ -106,42 +108,58 @@ def hrw4u_error(filename: str, ctx: object, exc: Exception) -> Hrw4uSyntaxError: return error +def format_diagnostic(filename: str, ctx: object, severity: str, message: str) -> str: + """Format a diagnostic message (error/warning) with source context from a parser ctx.""" + line, col, source_line = _extract_source_context(ctx) + return _format_diagnostic(filename, line, col, severity, message, source_line) + + class ErrorCollector: - """Collects multiple syntax errors for comprehensive error reporting.""" + """Collects multiple syntax errors and warnings for comprehensive reporting.""" def __init__(self) -> None: - """Initialize an empty error collector.""" self.errors: list[Hrw4uSyntaxError] = [] + self.warnings: list[str] = [] self._sandbox_message: str | None = None def add_error(self, error: Hrw4uSyntaxError) -> None: - """ - Add a syntax error to the collection. - """ self.errors.append(error) + def add_warning(self, warning: str) -> None: + self.warnings.append(warning) + def set_sandbox_message(self, message: str) -> None: """Record the sandbox policy message to display once at the end.""" if message and self._sandbox_message is None: self._sandbox_message = message def has_errors(self) -> bool: - """ - Check if any errors have been collected. - """ return bool(self.errors) + def has_warnings(self) -> bool: + return bool(self.warnings) + def get_error_summary(self) -> str: - if not self.errors: + if not self.errors and not self.warnings: return "No errors found." - count = len(self.errors) - lines = [f"Found {count} error{'s' if count > 1 else ''}:"] + lines: list[str] = [] + + if self.errors: + count = len(self.errors) + lines.append(f"Found {count} error{'s' if count > 1 else ''}:") + + for error in self.errors: + lines.append(str(error)) + if hasattr(error, '__notes__') and error.__notes__: + lines.extend(error.__notes__) - for error in self.errors: - lines.append(str(error)) - if hasattr(error, '__notes__') and error.__notes__: - lines.extend(error.__notes__) + if self.warnings: + if self.errors: + lines.append("") + count = len(self.warnings) + lines.append(f"{count} warning{'s' if count > 1 else ''}:") + lines.extend(self.warnings) if self._sandbox_message: lines.append("") diff --git a/tools/hrw4u/src/sandbox.py b/tools/hrw4u/src/sandbox.py index 6e3fdb5829c..75ceb76593e 100644 --- a/tools/hrw4u/src/sandbox.py +++ b/tools/hrw4u/src/sandbox.py @@ -19,7 +19,7 @@ from __future__ import annotations import yaml -from dataclasses import dataclass +from dataclasses import dataclass, fields from pathlib import Path from typing import Any @@ -34,41 +34,84 @@ def __init__(self, name: str, category: str, message: str) -> None: self.sandbox_message = message -_VALID_DENY_KEYS = frozenset({"sections", "functions", "conditions", "operators", "language", "modifiers"}) +_VALID_CATEGORY_KEYS = frozenset({"sections", "functions", "conditions", "operators", "language", "modifiers"}) _VALID_LANGUAGE_CONSTRUCTS = frozenset({"break", "variables", "in", "else", "elif"}) _VALID_MODIFIERS = frozenset({"AND", "OR", "NOT", "NOCASE", "PRE", "SUF", "EXT", "MID", "I", "L", "QSA"}) -def _load_deny_set(deny: dict[str, Any], key: str) -> frozenset[str]: - raw = deny.get(key) +def _load_set(data: dict[str, Any], key: str, prefix: str) -> frozenset[str]: + raw = data.get(key) if raw is None: return frozenset() if not isinstance(raw, list): - raise ValueError(f"sandbox.deny.{key} must be a list, got {type(raw).__name__}") + raise ValueError(f"sandbox.{prefix}.{key} must be a list, got {type(raw).__name__}") return frozenset(str(item).strip() for item in raw if item) -def _is_denied(name: str, deny_set: frozenset[str]) -> bool: - if name in deny_set: +def _is_matched(name: str, name_set: frozenset[str]) -> bool: + if name in name_set: return True - for entry in deny_set: + for entry in name_set: if entry.endswith(".") and name.startswith(entry): return True return False +@dataclass(frozen=True) +class PolicySets: + """A set of sandbox policy entries for one severity level (deny or warn).""" + sections: frozenset[str] = frozenset() + functions: frozenset[str] = frozenset() + conditions: frozenset[str] = frozenset() + operators: frozenset[str] = frozenset() + language: frozenset[str] = frozenset() + modifiers: frozenset[str] = frozenset() + + @classmethod + def load(cls, data: dict[str, Any], prefix: str) -> PolicySets: + if not isinstance(data, dict): + raise ValueError(f"sandbox.{prefix} must be a mapping") + + unknown_keys = set(data.keys()) - _VALID_CATEGORY_KEYS + if unknown_keys: + raise ValueError(f"Unknown keys in sandbox.{prefix}: {', '.join(sorted(unknown_keys))}") + + language = _load_set(data, "language", prefix) + unknown_lang = language - _VALID_LANGUAGE_CONSTRUCTS + if unknown_lang: + raise ValueError( + f"Unknown language constructs in sandbox.{prefix}: {', '.join(sorted(unknown_lang))}. " + f"Valid: {', '.join(sorted(_VALID_LANGUAGE_CONSTRUCTS))}") + + modifiers = frozenset(s.upper() for s in _load_set(data, "modifiers", prefix)) + unknown_mods = modifiers - _VALID_MODIFIERS + if unknown_mods: + raise ValueError( + f"Unknown modifiers in sandbox.{prefix}: {', '.join(sorted(unknown_mods))}. " + f"Valid: {', '.join(sorted(_VALID_MODIFIERS))}") + + return cls( + sections=_load_set(data, "sections", prefix), + functions=_load_set(data, "functions", prefix), + conditions=_load_set(data, "conditions", prefix), + operators=_load_set(data, "operators", prefix), + language=language, + modifiers=modifiers, + ) + + @property + def is_active(self) -> bool: + return any(getattr(self, f.name) for f in fields(self)) + + @dataclass(frozen=True) class SandboxConfig: message: str - denied_sections: frozenset[str] - denied_functions: frozenset[str] - denied_conditions: frozenset[str] - denied_operators: frozenset[str] - denied_language: frozenset[str] - denied_modifiers: frozenset[str] + deny: PolicySets + warn: PolicySets @classmethod def load(cls, path: Path) -> SandboxConfig: @@ -84,76 +127,54 @@ def load(cls, path: Path) -> SandboxConfig: message = str(sandbox.get("message", "")).strip() - deny = sandbox.get("deny", {}) - if not isinstance(deny, dict): + deny_data = sandbox.get("deny", {}) + if not isinstance(deny_data, dict): raise ValueError(f"sandbox.deny must be a mapping: {path}") + deny = PolicySets.load(deny_data, "deny") - unknown_keys = set(deny.keys()) - _VALID_DENY_KEYS - if unknown_keys: - raise ValueError(f"Unknown keys in sandbox.deny: {', '.join(sorted(unknown_keys))}") + warn_data = sandbox.get("warn", {}) + if not isinstance(warn_data, dict): + raise ValueError(f"sandbox.warn must be a mapping: {path}") + warn = PolicySets.load(warn_data, "warn") - language = _load_deny_set(deny, "language") - unknown_lang = language - _VALID_LANGUAGE_CONSTRUCTS - if unknown_lang: - raise ValueError( - f"Unknown language constructs: {', '.join(sorted(unknown_lang))}. " - f"Valid: {', '.join(sorted(_VALID_LANGUAGE_CONSTRUCTS))}") + for f in fields(PolicySets): + overlap = getattr(deny, f.name) & getattr(warn, f.name) + if overlap: + raise ValueError(f"sandbox.deny.{f.name} and sandbox.warn.{f.name} overlap: {', '.join(sorted(overlap))}") - modifiers = frozenset(s.upper() for s in _load_deny_set(deny, "modifiers")) - unknown_mods = modifiers - _VALID_MODIFIERS - if unknown_mods: - raise ValueError( - f"Unknown modifiers: {', '.join(sorted(unknown_mods))}. " - f"Valid: {', '.join(sorted(_VALID_MODIFIERS))}") - - return cls( - message=message, - denied_sections=_load_deny_set(deny, "sections"), - denied_functions=_load_deny_set(deny, "functions"), - denied_conditions=_load_deny_set(deny, "conditions"), - denied_operators=_load_deny_set(deny, "operators"), - denied_language=language, - denied_modifiers=modifiers, - ) + return cls(message=message, deny=deny, warn=warn) @classmethod def empty(cls) -> SandboxConfig: - return cls( - message="", - denied_sections=frozenset(), - denied_functions=frozenset(), - denied_conditions=frozenset(), - denied_operators=frozenset(), - denied_language=frozenset(), - denied_modifiers=frozenset(), - ) + return cls(message="", deny=PolicySets(), warn=PolicySets()) @property def is_active(self) -> bool: - return bool( - self.denied_sections or self.denied_functions or self.denied_conditions or self.denied_operators or - self.denied_language or self.denied_modifiers) + return self.deny.is_active or self.warn.is_active + + def _check(self, name: str, category: str) -> str | None: + display = category.rstrip("s") + + if _is_matched(name, getattr(self.deny, category)): + raise SandboxDenialError(name, display, self.message) + if _is_matched(name, getattr(self.warn, category)): + return f"'{name}' is warned by sandbox policy ({display})" + return None - def check_section(self, section_name: str) -> None: - if _is_denied(section_name, self.denied_sections): - raise SandboxDenialError(section_name, "section", self.message) + def check_section(self, section_name: str) -> str | None: + return self._check(section_name, "sections") - def check_function(self, func_name: str) -> None: - if _is_denied(func_name, self.denied_functions): - raise SandboxDenialError(func_name, "function", self.message) + def check_function(self, func_name: str) -> str | None: + return self._check(func_name, "functions") - def check_condition(self, condition_key: str) -> None: - if _is_denied(condition_key, self.denied_conditions): - raise SandboxDenialError(condition_key, "condition", self.message) + def check_condition(self, condition_key: str) -> str | None: + return self._check(condition_key, "conditions") - def check_operator(self, operator_key: str) -> None: - if _is_denied(operator_key, self.denied_operators): - raise SandboxDenialError(operator_key, "operator", self.message) + def check_operator(self, operator_key: str) -> str | None: + return self._check(operator_key, "operators") - def check_language(self, construct: str) -> None: - if construct in self.denied_language: - raise SandboxDenialError(construct, "language", self.message) + def check_language(self, construct: str) -> str | None: + return self._check(construct, "language") - def check_modifier(self, modifier: str) -> None: - if modifier.upper() in self.denied_modifiers: - raise SandboxDenialError(modifier, "modifier", self.message) + def check_modifier(self, modifier: str) -> str | None: + return self._check(modifier.upper(), "modifiers") diff --git a/tools/hrw4u/src/symbols.py b/tools/hrw4u/src/symbols.py index 28b59c7c326..63504042546 100644 --- a/tools/hrw4u/src/symbols.py +++ b/tools/hrw4u/src/symbols.py @@ -75,7 +75,7 @@ def declare_variable(self, name: str, type_name: str, explicit_slot: int | None def resolve_assignment(self, name: str, value: str, section: SectionType | None = None) -> str: with self.debug_context("resolve_assignment", name, value, section): - self._sandbox.check_operator(name) + self._collect_warning(self._sandbox.check_operator(name)) for op_key, params in self._operator_map.items(): if op_key.endswith("."): @@ -121,7 +121,7 @@ def resolve_assignment(self, name: str, value: str, section: SectionType | None def resolve_add_assignment(self, name: str, value: str, section: SectionType | None = None) -> str: """Resolve += assignment, if it is supported for the given operator.""" with self.debug_context("resolve_add_assignment", name, value, section): - self._sandbox.check_operator(name) + self._collect_warning(self._sandbox.check_operator(name)) for op_key, params in self._operator_map.items(): if op_key.endswith(".") and name.startswith(op_key) and params and params.add: @@ -141,10 +141,10 @@ def resolve_add_assignment(self, name: str, value: str, section: SectionType | N def resolve_condition(self, name: str, section: SectionType | None = None) -> tuple[str, bool]: with self.debug_context("resolve_condition", name, section): if symbol := self.symbol_for(name): - self._sandbox.check_language("variables") + self._collect_warning(self._sandbox.check_language("variables")) return symbol.as_cond(), False - self._sandbox.check_condition(name) + self._collect_warning(self._sandbox.check_condition(name)) if params := self._lookup_condition_cached(name): tag = params.target if params else None @@ -200,7 +200,7 @@ def resolve_function(self, func_name: str, args: list[str], strip_quotes: bool = def resolve_statement_func(self, func_name: str, args: list[str], section: SectionType | None = None) -> str: with self.debug_context("resolve_statement_func", func_name, args, section): - self._sandbox.check_function(func_name) + self._collect_warning(self._sandbox.check_function(func_name)) if params := self._lookup_statement_function_cached(func_name): allowed_sections = params.sections if params else None diff --git a/tools/hrw4u/src/symbols_base.py b/tools/hrw4u/src/symbols_base.py index d929d576d21..cb58bb86952 100644 --- a/tools/hrw4u/src/symbols_base.py +++ b/tools/hrw4u/src/symbols_base.py @@ -32,6 +32,7 @@ class SymbolResolverBase: def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG, sandbox: SandboxConfig | None = None) -> None: self._dbg = Dbg(debug) self._sandbox = sandbox or SandboxConfig.empty() + self._sandbox_warnings: list[str] = [] # Clear caches when debug status changes to ensure consistency if hasattr(self, '_condition_cache'): self._condition_cache.cache_clear() @@ -43,6 +44,15 @@ def __init__(self, debug: bool = SystemDefaults.DEFAULT_DEBUG, sandbox: SandboxC def _condition_map(self) -> dict[str, types.MapParams]: return tables.CONDITION_MAP + def _collect_warning(self, warning: str | None) -> None: + if warning: + self._sandbox_warnings.append(warning) + + def drain_warnings(self) -> list[str]: + warnings = self._sandbox_warnings[:] + self._sandbox_warnings.clear() + return warnings + @cached_property def _operator_map(self) -> dict[str, types.MapParams]: return tables.OPERATOR_MAP diff --git a/tools/hrw4u/src/visitor.py b/tools/hrw4u/src/visitor.py index b3d0755bcdd..21fb06d3f75 100644 --- a/tools/hrw4u/src/visitor.py +++ b/tools/hrw4u/src/visitor.py @@ -24,7 +24,7 @@ from hrw4u.hrw4uVisitor import hrw4uVisitor from hrw4u.hrw4uParser import hrw4uParser from hrw4u.symbols import SymbolResolver, SymbolResolutionError -from hrw4u.errors import hrw4u_error +from hrw4u.errors import hrw4u_error, format_diagnostic from hrw4u.states import CondState, SectionType from hrw4u.common import RegexPatterns, SystemDefaults from hrw4u.visitor_base import BaseHRWVisitor @@ -64,16 +64,30 @@ def __init__( def _sandbox_check(self, ctx, check_fn) -> bool: """Run a sandbox check, trapping any denial error into the error collector. - Returns True if the check passed, False if denied (error was collected). + Returns True if the check passed (or warned), False if denied. """ try: - check_fn() + warning = check_fn() + if warning: + self._add_sandbox_warning(ctx, warning) return True except SandboxDenialError: with self.trap(ctx): raise return False + def _add_sandbox_warning(self, ctx, message: str) -> None: + """Format and collect a sandbox warning with source context.""" + if self.error_collector: + self.error_collector.add_warning(format_diagnostic(self.filename, ctx, "warning", message)) + if self._sandbox.message: + self.error_collector.set_sandbox_message(self._sandbox.message) + + def _drain_resolver_warnings(self, ctx) -> None: + """Drain any warnings accumulated in the symbol resolver.""" + for warning in self.symbol_resolver.drain_warnings(): + self._add_sandbox_warning(ctx, warning) + @lru_cache(maxsize=256) def _cached_symbol_resolution(self, symbol_text: str, section_name: str) -> tuple[str, bool]: """Cache expensive symbol resolution operations.""" @@ -174,12 +188,14 @@ def repl(m: re.Match) -> str: arg_str = m.group("args").strip() args = self._parse_function_args(arg_str) if arg_str else [] replacement = self.symbol_resolver.resolve_function(func_name, args, strip_quotes=False) + self._drain_resolver_warnings(ctx) self.debug_log(f"substitute: {{{func_name}({arg_str})}} -> {replacement}") return replacement if m.group("var"): var_name = m.group("var").strip() # Use resolve_condition directly to properly validate section restrictions replacement, _ = self.symbol_resolver.resolve_condition(var_name, self.current_section) + self._drain_resolver_warnings(ctx) self.debug_log(f"substitute: {{{var_name}}} -> {replacement}") return replacement raise SymbolResolutionError(m.group(0), "Unrecognized substitution format") @@ -266,7 +282,9 @@ def _prepare_section(self, ctx): raise SymbolResolutionError("section", "Missing section name") section_name = ctx.name.text - self._sandbox.check_section(section_name) + warning = self._sandbox.check_section(section_name) + if warning: + self._add_sandbox_warning(ctx, warning) try: self.current_section = SectionType(section_name) @@ -352,7 +370,9 @@ def visitStatement(self, ctx) -> None: with self.debug_context("visitStatement"), self.trap(ctx): match ctx: case _ if ctx.BREAK(): - self._sandbox.check_language("break") + warning = self._sandbox.check_language("break") + if warning: + self._add_sandbox_warning(ctx, warning) self._dbg("BREAK") self.emit_statement("no-op [L]") return @@ -363,6 +383,7 @@ def visitStatement(self, ctx) -> None: self._substitute_strings(arg, ctx) if arg.startswith('"') and arg.endswith('"') else arg for arg in args ] symbol = self.symbol_resolver.resolve_statement_func(func, subst_args, self.current_section) + self._drain_resolver_warnings(ctx) self.emit_statement(symbol) return @@ -375,6 +396,7 @@ def visitStatement(self, ctx) -> None: rhs = self._substitute_strings(rhs, ctx) self._dbg(f"assignment: {lhs} = {rhs}") out = self.symbol_resolver.resolve_assignment(lhs, rhs, self.current_section) + self._drain_resolver_warnings(ctx) self.emit_statement(out) return @@ -387,6 +409,7 @@ def visitStatement(self, ctx) -> None: rhs = self._substitute_strings(rhs, ctx) self._dbg(f"add assignment: {lhs} += {rhs}") out = self.symbol_resolver.resolve_add_assignment(lhs, rhs, self.current_section) + self._drain_resolver_warnings(ctx) self.emit_statement(out) return @@ -500,6 +523,7 @@ def visitComparison(self, ctx, *, last: bool = False) -> None: if comp.ident: ident_name = comp.ident.text lhs, _ = self._resolve_identifier_with_validation(ident_name) + self._drain_resolver_warnings(ctx) else: lhs = self.visitFunctionCall(comp.functionCall()) if not lhs: @@ -558,7 +582,9 @@ def visitModifier(self, ctx) -> None: for token in ctx.modifierList().mods: try: mod = token.text.upper() - self._sandbox.check_modifier(mod) + warning = self._sandbox.check_modifier(mod) + if warning: + self._add_sandbox_warning(ctx, warning) self._cond_state.add_modifier(mod) except Exception as exc: with self.trap(ctx): @@ -569,7 +595,9 @@ def visitFunctionCall(self, ctx) -> str: with self.trap(ctx): func, raw_args = self._parse_function_call(ctx) self._dbg(f"function: {func}({', '.join(raw_args)})") - return self.symbol_resolver.resolve_function(func, raw_args, strip_quotes=True) + result = self.symbol_resolver.resolve_function(func, raw_args, strip_quotes=True) + self._drain_resolver_warnings(ctx) + return result return "ERROR" def emit_condition(self, text: str, *, final: bool = False) -> None: @@ -677,6 +705,7 @@ def emit_factor(self, ctx, *, last: bool = False) -> None: case _ if ctx.ident: name = ctx.ident.text symbol, default_expr = self._resolve_identifier_with_validation(name) + self._drain_resolver_warnings(ctx) if default_expr: cond_txt = f"{symbol} =\"\"" diff --git a/tools/hrw4u/tests/data/sandbox/warned-function.ast.txt b/tools/hrw4u/tests/data/sandbox/warned-function.ast.txt new file mode 100644 index 00000000000..fda638a730c --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/warned-function.ast.txt @@ -0,0 +1 @@ +(program (programItem (section REMAP { (sectionBody (statement (functionCall set-config ( (argumentList (value "proxy.config.http.insert_age_in_response") , (value "0")) )) ;)) })) ) diff --git a/tools/hrw4u/tests/data/sandbox/warned-function.input.txt b/tools/hrw4u/tests/data/sandbox/warned-function.input.txt new file mode 100644 index 00000000000..a4ec2ee976f --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/warned-function.input.txt @@ -0,0 +1,3 @@ +REMAP { + set-config("proxy.config.http.insert_age_in_response", "0"); +} diff --git a/tools/hrw4u/tests/data/sandbox/warned-function.output.txt b/tools/hrw4u/tests/data/sandbox/warned-function.output.txt new file mode 100644 index 00000000000..8e97ce7d990 --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/warned-function.output.txt @@ -0,0 +1,2 @@ +cond %{REMAP_PSEUDO_HOOK} [AND] + set-config "proxy.config.http.insert_age_in_response" "0" diff --git a/tools/hrw4u/tests/data/sandbox/warned-function.sandbox.yaml b/tools/hrw4u/tests/data/sandbox/warned-function.sandbox.yaml new file mode 100644 index 00000000000..fd4ce1fadac --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/warned-function.sandbox.yaml @@ -0,0 +1,6 @@ +sandbox: + message: "This feature will be denied in a future release. Contact platform team." + + warn: + functions: + - set-config diff --git a/tools/hrw4u/tests/data/sandbox/warned-function.warning.txt b/tools/hrw4u/tests/data/sandbox/warned-function.warning.txt new file mode 100644 index 00000000000..12f6fdeb3ad --- /dev/null +++ b/tools/hrw4u/tests/data/sandbox/warned-function.warning.txt @@ -0,0 +1,2 @@ +'set-config' is warned by sandbox policy (function) +This feature will be denied in a future release. Contact platform team. diff --git a/tools/hrw4u/tests/test_sandbox.py b/tools/hrw4u/tests/test_sandbox.py index 75016c8aa19..fbc0e7a025d 100644 --- a/tools/hrw4u/tests/test_sandbox.py +++ b/tools/hrw4u/tests/test_sandbox.py @@ -34,3 +34,10 @@ def test_sandbox_denials(input_file: Path, error_file: Path, sandbox_file: Path) def test_sandbox_allowed(input_file: Path, output_file: Path, sandbox_file: Path) -> None: """Test that features not in the deny list compile normally under a sandbox.""" utils.run_sandbox_allow_test(input_file, output_file, sandbox_file) + + +@pytest.mark.sandbox +@pytest.mark.parametrize("input_file,warning_file,output_file,sandbox_file", utils.collect_sandbox_warn_test_files("sandbox")) +def test_sandbox_warnings(input_file: Path, warning_file: Path, output_file: Path, sandbox_file: Path) -> None: + """Test that sandbox-warned features produce warnings but compile successfully.""" + utils.run_sandbox_warn_test(input_file, warning_file, output_file, sandbox_file) diff --git a/tools/hrw4u/tests/utils.py b/tools/hrw4u/tests/utils.py index 6f486e7ab48..16b4c518719 100644 --- a/tools/hrw4u/tests/utils.py +++ b/tools/hrw4u/tests/utils.py @@ -55,11 +55,13 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l "collect_failing_inputs", "collect_sandbox_deny_test_files", "collect_sandbox_allow_test_files", + "collect_sandbox_warn_test_files", "run_output_test", "run_ast_test", "run_failing_test", "run_sandbox_deny_test", "run_sandbox_allow_test", + "run_sandbox_warn_test", "run_reverse_test", "run_bulk_test", ] @@ -181,8 +183,40 @@ def collect_sandbox_deny_test_files(group: str) -> Iterator[pytest.param]: def collect_sandbox_allow_test_files(group: str) -> Iterator[pytest.param]: - """Collect sandbox allow test files: (input, output, sandbox_config).""" - yield from _collect_sandbox_test_files(group, ".output.txt") + """Collect sandbox allow test files: (input, output, sandbox_config). + + Skips warned-* files which are handled by collect_sandbox_warn_test_files. + """ + for param in _collect_sandbox_test_files(group, ".output.txt"): + input_file = param.values[0] + if not input_file.name.startswith("warned-"): + yield param + + +def collect_sandbox_warn_test_files(group: str) -> Iterator[pytest.param]: + """Collect sandbox warning test files: (input, warning, output, sandbox_config). + + Warning tests have both a .warning.txt (expected warning phrases) and a + .output.txt (expected compiled output), since compilation should succeed. + """ + base_dir = Path("tests/data") / group + shared_sandbox = base_dir / "sandbox.yaml" + + for input_file in sorted(base_dir.glob("warned-*.input.txt")): + base = input_file.with_suffix("").with_suffix("") + warning_file = base.with_suffix(".warning.txt") + output_file = base.with_suffix(".output.txt") + + if not warning_file.exists() or not output_file.exists(): + continue + + per_test_sandbox = base.with_suffix(".sandbox.yaml") + sandbox_file = per_test_sandbox if per_test_sandbox.exists() else shared_sandbox + + if not sandbox_file.exists(): + continue + + yield pytest.param(input_file, warning_file, output_file, sandbox_file, id=base.name) def run_output_test(input_file: Path, output_file: Path) -> None: @@ -340,6 +374,37 @@ def run_sandbox_allow_test(input_file: Path, output_file: Path, sandbox_file: Pa assert actual_output == expected_output, f"Output mismatch in {input_file}" +def run_sandbox_warn_test(input_file: Path, warning_file: Path, output_file: Path, sandbox_file: Path) -> None: + """Run a sandbox warning test: compilation succeeds, warnings are emitted, output matches.""" + text = input_file.read_text() + parser, tree = parse_input_text(text) + + sandbox = SandboxConfig.load(sandbox_file) + error_collector = ErrorCollector() + visitor = HRW4UVisitor(filename=str(input_file), error_collector=error_collector, sandbox=sandbox) + actual_output = "\n".join(visitor.visit(tree) or []).strip() + + assert not error_collector.has_errors(), ( + f"Expected no errors but got errors in {input_file}:\n" + f"{error_collector.get_error_summary()}") + + assert error_collector.has_warnings(), f"Expected warnings but none were emitted for {input_file}" + + actual_summary = error_collector.get_error_summary() + expected_warnings = warning_file.read_text().strip() + + for line in expected_warnings.splitlines(): + line = line.strip() + if line: + assert line in actual_summary, ( + f"Expected warning phrase not found for {input_file}:\n" + f" Missing: {line!r}\n" + f"Actual summary:\n{actual_summary}") + + expected_output = output_file.read_text().strip() + assert actual_output == expected_output, f"Output mismatch in {input_file}" + + def run_reverse_test(input_file: Path, output_file: Path) -> None: """Run u4wrh on output.txt and compare with input.txt (round-trip test).""" output_text = output_file.read_text() From 392c564905ca8c6c93256356f9e2a8b4f49b3c8e Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Wed, 11 Mar 2026 09:29:14 -0700 Subject: [PATCH 4/5] Address Copilot's review comments - Surface bracket-modifier warnings from _parse_op_tails() to the error_collector via format_diagnostic. - Clarify docs: omitting --sandbox permits everything; a provided file must have a 'sandbox:' key. - Emit sandbox warnings as LSP severity=2 diagnostics in hrw4u-lsp. - Use parse_known_args() in hrw4u-lsp to tolerate editor-injected flags. - Enforce check_section("VARS") in visitVarSection() so deny/warn.sections: [VARS] works as documented. --- doc/admin-guide/configuration/hrw4u.en.rst | 4 +++- tools/hrw4u/scripts/hrw4u-lsp | 22 +++++++++++++++++++++- tools/hrw4u/src/visitor.py | 2 ++ tools/hrw4u/src/visitor_base.py | 8 ++++++-- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/doc/admin-guide/configuration/hrw4u.en.rst b/doc/admin-guide/configuration/hrw4u.en.rst index 99f55d0463e..004237f7661 100644 --- a/doc/admin-guide/configuration/hrw4u.en.rst +++ b/doc/admin-guide/configuration/hrw4u.en.rst @@ -575,7 +575,9 @@ Schema for editor validation and autocomplete is provided at functions: [ ... ] # same categories as deny conditions: [ ... ] -All lists are optional. An empty or missing sandbox file permits everything. +All lists are optional. If ``--sandbox`` is omitted, all features are permitted. +When a sandbox file is provided it must contain a top-level ``sandbox:`` key; +an empty policy can be expressed as ``sandbox: {}``. A feature may not appear in both ``deny`` and ``warn``. Denied Sections diff --git a/tools/hrw4u/scripts/hrw4u-lsp b/tools/hrw4u/scripts/hrw4u-lsp index 78cda1fcebe..aa6c74b6d9c 100755 --- a/tools/hrw4u/scripts/hrw4u-lsp +++ b/tools/hrw4u/scripts/hrw4u-lsp @@ -22,6 +22,7 @@ from __future__ import annotations import argparse import json import os +import re import sys import urllib.parse from functools import lru_cache @@ -248,6 +249,25 @@ class DocumentManager: "source": "hrw4u" }) + # Emit sandbox warnings as LSP severity=2 (Warning) diagnostics + if error_collector and error_collector.has_warnings(): + for warning_str in error_collector.warnings: + first_line = warning_str.split('\n')[0] + m = re.match(r'^.*?:(\d+):(\d+):\s+warning:\s+(.*)', first_line) + if m: + line_num = max(0, int(m.group(1)) - 1) + col_num = max(0, int(m.group(2))) + message = m.group(3) + diagnostics.append({ + "range": { + "start": {"line": line_num, "character": col_num}, + "end": {"line": line_num, "character": col_num + 1} + }, + "severity": 2, + "message": message, + "source": "hrw4u-sandbox" + }) + # Add parser errors only if they don't overlap with semantic errors for parser_error in parser_errors: error_line = parser_error.get("range", {}).get("start", {}).get("line", -1) @@ -617,7 +637,7 @@ def main() -> None: """Main entry point for the LSP server.""" parser = argparse.ArgumentParser(description="HRW4U Language Server") parser.add_argument("--sandbox", metavar="FILE", type=Path, help="Path to sandbox YAML configuration file") - args = parser.parse_args() + args, _unknown = parser.parse_known_args() server = HRW4ULanguageServer() if args.sandbox: diff --git a/tools/hrw4u/src/visitor.py b/tools/hrw4u/src/visitor.py index 21fb06d3f75..fe231c54564 100644 --- a/tools/hrw4u/src/visitor.py +++ b/tools/hrw4u/src/visitor.py @@ -354,6 +354,8 @@ def visitVarSection(self, ctx) -> None: else: raise error with self.debug_context("visitVarSection"): + if not self._sandbox_check(ctx, lambda: self._sandbox.check_section("VARS")): + return if not self._sandbox_check(ctx, lambda: self._sandbox.check_language("variables")): return self.visit(ctx.variables()) diff --git a/tools/hrw4u/src/visitor_base.py b/tools/hrw4u/src/visitor_base.py index 94bce52470f..63a05ba2edb 100644 --- a/tools/hrw4u/src/visitor_base.py +++ b/tools/hrw4u/src/visitor_base.py @@ -22,7 +22,7 @@ from hrw4u.debugging import Dbg from hrw4u.states import SectionType from hrw4u.common import SystemDefaults -from hrw4u.errors import hrw4u_error +from hrw4u.errors import hrw4u_error, format_diagnostic from hrw4u.sandbox import SandboxConfig, SandboxDenialError @@ -440,7 +440,11 @@ def _parse_op_tails(self, node, ctx=None) -> tuple[list[str], object, object]: raise Exception(f"Unknown modifier: {flag_text}") sandbox = self._sandbox if sandbox is not None: - sandbox.check_modifier(flag_text) + warning = sandbox.check_modifier(flag_text) + if warning and ctx and self.error_collector: + self.error_collector.add_warning(format_diagnostic(self.filename, ctx, "warning", warning)) + if sandbox.message: + self.error_collector.set_sandbox_message(sandbox.message) continue for kind in ("IDENT", "NUMBER", "STRING", "PERCENT_BLOCK", "COMPLEX_STRING"): From 0a99202d550dc33e4a8895038b82305491c568da Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Wed, 11 Mar 2026 10:21:53 -0700 Subject: [PATCH 5/5] Some more cleanup per Claude --- tools/hrw4u/scripts/hrw4u-lsp | 31 +++++++++++++++++-------------- tools/hrw4u/src/common.py | 6 ++---- tools/hrw4u/src/errors.py | 25 ++++++++++++++++++++++--- tools/hrw4u/src/visitor.py | 9 +-------- tools/hrw4u/src/visitor_base.py | 15 ++++++++++----- 5 files changed, 52 insertions(+), 34 deletions(-) diff --git a/tools/hrw4u/scripts/hrw4u-lsp b/tools/hrw4u/scripts/hrw4u-lsp index aa6c74b6d9c..fc27c6b5e15 100755 --- a/tools/hrw4u/scripts/hrw4u-lsp +++ b/tools/hrw4u/scripts/hrw4u-lsp @@ -22,7 +22,6 @@ from __future__ import annotations import argparse import json import os -import re import sys import urllib.parse from functools import lru_cache @@ -251,20 +250,24 @@ class DocumentManager: # Emit sandbox warnings as LSP severity=2 (Warning) diagnostics if error_collector and error_collector.has_warnings(): - for warning_str in error_collector.warnings: - first_line = warning_str.split('\n')[0] - m = re.match(r'^.*?:(\d+):(\d+):\s+warning:\s+(.*)', first_line) - if m: - line_num = max(0, int(m.group(1)) - 1) - col_num = max(0, int(m.group(2))) - message = m.group(3) - diagnostics.append({ - "range": { - "start": {"line": line_num, "character": col_num}, - "end": {"line": line_num, "character": col_num + 1} - }, + for w in error_collector.warnings: + line_num = max(0, w.line - 1) + col_num = max(0, w.column) + diagnostics.append( + { + "range": + { + "start": { + "line": line_num, + "character": col_num + }, + "end": { + "line": line_num, + "character": col_num + 1 + } + }, "severity": 2, - "message": message, + "message": w.message, "source": "hrw4u-sandbox" }) diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 31ca386ed76..bcf2a44ea3f 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -233,12 +233,10 @@ def generate_output( else: fatal(str(e)) - if error_collector and error_collector.has_errors(): + if error_collector and (error_collector.has_errors() or error_collector.has_warnings()): print(error_collector.get_error_summary(), file=sys.stderr) - if not args.ast and tree is None: + if error_collector.has_errors() and not args.ast and tree is None: sys.exit(1) - elif error_collector and error_collector.has_warnings(): - print(error_collector.get_error_summary(), file=sys.stderr) def run_main( diff --git a/tools/hrw4u/src/errors.py b/tools/hrw4u/src/errors.py index b0011daf0fe..4d0bacc05fb 100644 --- a/tools/hrw4u/src/errors.py +++ b/tools/hrw4u/src/errors.py @@ -17,6 +17,7 @@ from __future__ import annotations +from dataclasses import dataclass from antlr4.error.ErrorListener import ErrorListener @@ -114,18 +115,36 @@ def format_diagnostic(filename: str, ctx: object, severity: str, message: str) - return _format_diagnostic(filename, line, col, severity, message, source_line) +@dataclass(frozen=True, slots=True) +class Warning: + """Structured warning with source location for use by both CLI and LSP.""" + filename: str + line: int + column: int + message: str + source_line: str + + def format(self) -> str: + return _format_diagnostic(self.filename, self.line, self.column, "warning", self.message, self.source_line) + + @classmethod + def from_ctx(cls, filename: str, ctx: object, message: str) -> Warning: + line, col, source_line = _extract_source_context(ctx) + return cls(filename=filename, line=line, column=col, message=message, source_line=source_line) + + class ErrorCollector: """Collects multiple syntax errors and warnings for comprehensive reporting.""" def __init__(self) -> None: self.errors: list[Hrw4uSyntaxError] = [] - self.warnings: list[str] = [] + self.warnings: list[Warning] = [] self._sandbox_message: str | None = None def add_error(self, error: Hrw4uSyntaxError) -> None: self.errors.append(error) - def add_warning(self, warning: str) -> None: + def add_warning(self, warning: Warning) -> None: self.warnings.append(warning) def set_sandbox_message(self, message: str) -> None: @@ -159,7 +178,7 @@ def get_error_summary(self) -> str: lines.append("") count = len(self.warnings) lines.append(f"{count} warning{'s' if count > 1 else ''}:") - lines.extend(self.warnings) + lines.extend(w.format() for w in self.warnings) if self._sandbox_message: lines.append("") diff --git a/tools/hrw4u/src/visitor.py b/tools/hrw4u/src/visitor.py index fe231c54564..fa3cc28e089 100644 --- a/tools/hrw4u/src/visitor.py +++ b/tools/hrw4u/src/visitor.py @@ -24,7 +24,7 @@ from hrw4u.hrw4uVisitor import hrw4uVisitor from hrw4u.hrw4uParser import hrw4uParser from hrw4u.symbols import SymbolResolver, SymbolResolutionError -from hrw4u.errors import hrw4u_error, format_diagnostic +from hrw4u.errors import hrw4u_error from hrw4u.states import CondState, SectionType from hrw4u.common import RegexPatterns, SystemDefaults from hrw4u.visitor_base import BaseHRWVisitor @@ -76,13 +76,6 @@ def _sandbox_check(self, ctx, check_fn) -> bool: raise return False - def _add_sandbox_warning(self, ctx, message: str) -> None: - """Format and collect a sandbox warning with source context.""" - if self.error_collector: - self.error_collector.add_warning(format_diagnostic(self.filename, ctx, "warning", message)) - if self._sandbox.message: - self.error_collector.set_sandbox_message(self._sandbox.message) - def _drain_resolver_warnings(self, ctx) -> None: """Drain any warnings accumulated in the symbol resolver.""" for warning in self.symbol_resolver.drain_warnings(): diff --git a/tools/hrw4u/src/visitor_base.py b/tools/hrw4u/src/visitor_base.py index 63a05ba2edb..a0e7f52fbbe 100644 --- a/tools/hrw4u/src/visitor_base.py +++ b/tools/hrw4u/src/visitor_base.py @@ -22,7 +22,7 @@ from hrw4u.debugging import Dbg from hrw4u.states import SectionType from hrw4u.common import SystemDefaults -from hrw4u.errors import hrw4u_error, format_diagnostic +from hrw4u.errors import hrw4u_error, Warning from hrw4u.sandbox import SandboxConfig, SandboxDenialError @@ -272,6 +272,13 @@ def __exit__(self, exc_type, exc_val, exc_tb): return DebugContext(self, method_name, args) + def _add_sandbox_warning(self, ctx, message: str) -> None: + """Format and collect a sandbox warning with source context.""" + if self.error_collector: + self.error_collector.add_warning(Warning.from_ctx(self.filename, ctx, message)) + if self._sandbox.message: + self.error_collector.set_sandbox_message(self._sandbox.message) + def trap(self, ctx, *, note: str | None = None): """ Context manager for consistent error handling across visitor methods. @@ -441,10 +448,8 @@ def _parse_op_tails(self, node, ctx=None) -> tuple[list[str], object, object]: sandbox = self._sandbox if sandbox is not None: warning = sandbox.check_modifier(flag_text) - if warning and ctx and self.error_collector: - self.error_collector.add_warning(format_diagnostic(self.filename, ctx, "warning", warning)) - if sandbox.message: - self.error_collector.set_sandbox_message(sandbox.message) + if warning and ctx: + self._add_sandbox_warning(ctx, warning) continue for kind in ("IDENT", "NUMBER", "STRING", "PERCENT_BLOCK", "COMPLEX_STRING"):