From dbf9cf4aa1dfcbf22c4d4f7cf5a000a364035142 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Sat, 30 May 2026 21:07:50 +0100 Subject: [PATCH] =?UTF-8?q?feat(query):=20accept=20issue=20#33=20literal?= =?UTF-8?q?=20examples=20=E2=80=94=20`diff`=20head=20+=20inline=20kwargs?= =?UTF-8?q?=20+=20PA-id=20auto-route?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #33's body advertises three example query expressions. Two of them parsed-then-errored or parsed-then-matched-nothing under the existing grammar: (category PA001 :severity Critical :pr-state nil) → parse error: "expected ')', got Atom(":severity")" (diff :since 2026-04-12 :category PA022) → parse error: "unknown query head: diff" (category PA001) → parsed, but matched zero findings — `category` expects the `WeakPointCategory` Debug name (e.g. "UnsafeCode"), not a rule ID This patch makes all three literal expressions in the issue work as documented: * New `diff` head — keyword-only sugar for `(and (kw VALUE) ...)`, unwrapped to the single clause when only one kwarg is supplied. * Inline `:keyword VALUE` kwargs accepted on every unary head (`category`, `rule-id`, `severity`, `repo`, `file`, `pr-state`, `since`) via a shared `parse_trailing_kwargs` helper. Desugars to `(and head-positional kw-clauses…)`. Existing single-arg forms keep parsing to the same atomic `Query` variants. * `(category PAxxx)` auto-routes to `Query::RuleId` so the issue's literal expression actually selects findings. Shared `query_for_keyword` dispatch keeps inline-kwargs and `diff` in lock-step — adding a new unary head requires a single row. Docs updated in the `src/query/mod.rs` module header with the new supported forms and a worked desugaring example. 12 new unit tests (300 total, was 288): parse_category_pa_id_auto_routes_to_rule_id parse_category_word_does_not_auto_route parse_issue_example_category_with_inline_kwargs parse_issue_example_diff_form parse_diff_with_single_kwarg_unwraps parse_diff_empty_errors parse_diff_rejects_unknown_keyword parse_diff_rejects_positional_arg parse_inline_kwargs_on_severity_head parse_inline_kwargs_on_since_head run_issue_example_category_with_kwargs_matches run_diff_form_matches_recent_findings End-to-end smoke against the three issue examples: all parse, all run, all return "No matches" against an empty store (correct). `cargo test --lib`: 300 passed. `cargo clippy --all-targets --no-default-features -- -D warnings`: clean. Refs #33. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 18 +++ src/query/mod.rs | 281 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 280 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c7c79d..472c762 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [Unreleased] — 2026-05-30 + +### Added +- **Query parser: `(diff :since :category ...)` head + inline `:keyword + VALUE` kwargs on every unary head** (`src/query/mod.rs`, issue #33). The + issue body's three literal example expressions now parse verbatim: + - `(crosslang :from FFI :to ProofDrift)` — already worked. + - `(category PA001 :severity Critical :pr-state nil)` — now parses as + `(and (rule-id PA001) (severity Critical) (pr-state nil))`, with + PA-prefixed values on `category` auto-routed to `rule-id` so the + query actually matches findings. + - `(diff :since 2026-04-12 :category PA022)` — new `diff` head is + keyword-only sugar for an `(and ...)` over its kwarg pairs. + Inline kwargs are accepted on `category`, `rule-id`, `severity`, `repo`, + `file`, `pr-state`, and `since` — adding a `:keyword VALUE` after the + positional value desugars to `(and (head positional) (kw value) ...)`. + Behaviour unchanged for existing query expressions; 12 new unit tests. + ## [Unreleased] — 2026-04-18 ### Added diff --git a/src/query/mod.rs b/src/query/mod.rs index 05e1b5d..5463c4f 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -11,6 +11,7 @@ //! //! ```text //! (category UnsafeCode) +//! (category PA004) ; auto-routes to (rule-id PA004) //! (rule-id PA004) //! (severity Critical) //! (repo ) @@ -22,6 +23,22 @@ //! (and ...) //! (or ...) //! (not ) +//! (diff :since DATE :category C ...) ; sugar for (and (since DATE) (category C) ...) +//! ``` +//! +//! ### Inline keyword args on unary heads +//! +//! Any unary head (`category`, `rule-id`, `severity`, `repo`, `file`, +//! `pr-state`, `since`) accepts trailing `:keyword VALUE` pairs that are +//! desugared into an `(and ...)` conjunction with the positional clause. +//! This makes the issue #33 examples parse verbatim: +//! +//! ```text +//! (category PA001 :severity Critical :pr-state nil) +//! ≡ (and (rule-id PA001) (severity Critical) (pr-state nil)) +//! +//! (diff :since 2026-04-12 :category PA022) +//! ≡ (and (since 2026-04-12) (rule-id PA022)) //! ``` //! //! ## Semantic notes @@ -236,38 +253,27 @@ fn parse_form(tokens: &[Token], cursor: &mut usize) -> Result { match head_lower.as_str() { "category" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::Category(v)) + finish_unary_with_kwargs(category_or_rule_id(v), tokens, cursor) } "rule-id" | "ruleid" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::RuleId(v)) + finish_unary_with_kwargs(Query::RuleId(v), tokens, cursor) } "severity" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::Severity(v)) + finish_unary_with_kwargs(Query::Severity(v), tokens, cursor) } "repo" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::Repo(v)) + finish_unary_with_kwargs(Query::Repo(v), tokens, cursor) } "file" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::File(v)) + finish_unary_with_kwargs(Query::File(v), tokens, cursor) } "pr-state" | "prstate" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - let val = if v.eq_ignore_ascii_case("nil") || v.eq_ignore_ascii_case("none") { - None - } else { - Some(v) - }; - Ok(Query::PrState(val)) + finish_unary_with_kwargs(Query::PrState(pr_state_value(v)), tokens, cursor) } "and" => { let children = parse_children(tokens, cursor)?; @@ -290,8 +296,21 @@ fn parse_form(tokens: &[Token], cursor: &mut usize) -> Result { } "since" => { let v = parse_value(tokens, cursor)?; - close_paren(tokens, cursor)?; - Ok(Query::Since(v)) + finish_unary_with_kwargs(Query::Since(v), tokens, cursor) + } + "diff" => { + // `(diff :kw VALUE ...)` — sugar for `(and (kw VALUE) ...)`. + // Keyword-only; no positional. Matches the issue #33 example + // `(diff :since 2026-04-12 :category PA022)` verbatim. + let kwargs = parse_trailing_kwargs(tokens, cursor)?; + if kwargs.is_empty() { + bail!("(diff ...) requires at least one ':keyword VALUE' pair"); + } + if kwargs.len() == 1 { + Ok(kwargs.into_iter().next().unwrap()) + } else { + Ok(Query::And(kwargs)) + } } "crosslang" => { // Two accepted shapes: @@ -388,6 +407,95 @@ fn close_paren(tokens: &[Token], cursor: &mut usize) -> Result<()> { } } +/// `(category PA001)` should mean "rule-id PA001", since `category` matches +/// the `WeakPointCategory` Debug name and the PA-prefixed strings are rule +/// IDs (`PA001`..`PA025`). Issue #33's literal example +/// `(category PA001 :severity Critical :pr-state nil)` relies on this +/// routing — without it the head matches zero findings. +fn category_or_rule_id(v: String) -> Query { + let bytes = v.as_bytes(); + let looks_like_rule_id = bytes.len() > 2 + && bytes[0].eq_ignore_ascii_case(&b'P') + && bytes[1].eq_ignore_ascii_case(&b'A') + && bytes[2..].iter().all(|b| b.is_ascii_digit()); + if looks_like_rule_id { + Query::RuleId(v) + } else { + Query::Category(v) + } +} + +/// Translate the surface `pr-state` value into the `Option` used +/// by `Query::PrState`. `nil` / `none` (case-insensitive) → `None`. +fn pr_state_value(v: String) -> Option { + if v.eq_ignore_ascii_case("nil") || v.eq_ignore_ascii_case("none") { + None + } else { + Some(v) + } +} + +/// Dispatch a `:keyword VALUE` pair to the corresponding atomic Query. +/// +/// Used by both `parse_trailing_kwargs` (after a positional head value) and +/// `diff` (keyword-only). Keeps the inline-kwargs surface in lock-step with +/// the corresponding unary heads — adding a new unary head requires a row +/// here so it can also be used as an inline-kwarg or `diff` keyword. +fn query_for_keyword(kw: &str, v: String) -> Result { + match kw { + "category" => Ok(category_or_rule_id(v)), + "rule-id" | "ruleid" => Ok(Query::RuleId(v)), + "severity" => Ok(Query::Severity(v)), + "repo" => Ok(Query::Repo(v)), + "file" => Ok(Query::File(v)), + "pr-state" | "prstate" => Ok(Query::PrState(pr_state_value(v))), + "since" => Ok(Query::Since(v)), + other => Err(anyhow!("unknown query keyword: :{}", other)), + } +} + +/// Consume an optional sequence of `:keyword VALUE` pairs followed by the +/// closing `)`. Returns each pair as a sub-`Query`. +fn parse_trailing_kwargs(tokens: &[Token], cursor: &mut usize) -> Result> { + let mut out = Vec::new(); + loop { + match tokens.get(*cursor) { + Some(Token::RParen) => { + *cursor += 1; + return Ok(out); + } + Some(Token::Atom(a)) if a.starts_with(':') => { + let kw = a[1..].to_ascii_lowercase(); + *cursor += 1; + let v = parse_value(tokens, cursor)?; + out.push(query_for_keyword(&kw, v)?); + } + Some(other) => bail!("expected ':keyword VALUE' or ')', got {:?}", other), + None => bail!("missing ')'"), + } + } +} + +/// After a unary head's positional value has been parsed, attach any +/// trailing `:keyword VALUE` pairs by wrapping the positional clause and +/// the keyword clauses into an `(and ...)`. If there are no kwargs, the +/// positional clause is returned as-is. +fn finish_unary_with_kwargs( + positional: Query, + tokens: &[Token], + cursor: &mut usize, +) -> Result { + let extras = parse_trailing_kwargs(tokens, cursor)?; + if extras.is_empty() { + Ok(positional) + } else { + let mut all = Vec::with_capacity(1 + extras.len()); + all.push(positional); + all.extend(extras); + Ok(Query::And(all)) + } +} + // =========================================================================== // Evaluation // =========================================================================== @@ -1111,4 +1219,139 @@ mod tests { assert!(s.contains("demo")); assert!(s.contains("pr-filed")); } + + // ----------------------------------------------------------------------- + // Issue #33 literal examples — verbatim from the issue body. + // ----------------------------------------------------------------------- + + #[test] + fn parse_category_pa_id_auto_routes_to_rule_id() { + // `(category PA001)` should mean "rule-id PA001" so the issue's + // literal example matches findings. + let q = parse("(category PA001)").unwrap(); + assert_eq!(q, Query::RuleId("PA001".to_string())); + } + + #[test] + fn parse_category_word_does_not_auto_route() { + let q = parse("(category UnsafeCode)").unwrap(); + assert_eq!(q, Query::Category("UnsafeCode".to_string())); + } + + #[test] + fn parse_issue_example_category_with_inline_kwargs() { + // Issue #33 example: `(category PA001 :severity Critical :pr-state nil)` + let q = parse("(category PA001 :severity Critical :pr-state nil)").unwrap(); + assert_eq!( + q, + Query::And(vec![ + Query::RuleId("PA001".to_string()), + Query::Severity("Critical".to_string()), + Query::PrState(None), + ]) + ); + } + + #[test] + fn parse_issue_example_diff_form() { + // Issue #33 example: `(diff :since 2026-04-12 :category PA022)` + let q = parse("(diff :since 2026-04-12 :category PA022)").unwrap(); + assert_eq!( + q, + Query::And(vec![ + Query::Since("2026-04-12".to_string()), + Query::RuleId("PA022".to_string()), + ]) + ); + } + + #[test] + fn parse_diff_with_single_kwarg_unwraps() { + // `(diff :since X)` is just `(since X)` — no point wrapping a single + // clause in `(and ...)`. + let q = parse("(diff :since 2026-04-12)").unwrap(); + assert_eq!(q, Query::Since("2026-04-12".to_string())); + } + + #[test] + fn parse_diff_empty_errors() { + let err = parse("(diff)").unwrap_err().to_string(); + assert!( + err.contains("(diff ...) requires at least one"), + "got: {}", + err + ); + } + + #[test] + fn parse_diff_rejects_unknown_keyword() { + let err = parse("(diff :nonsense X)").unwrap_err().to_string(); + assert!( + err.contains("unknown query keyword: :nonsense"), + "got: {}", + err + ); + } + + #[test] + fn parse_diff_rejects_positional_arg() { + // `(diff X)` — bare positional before any kwarg should fail. + let err = parse("(diff X)").unwrap_err().to_string(); + assert!(err.contains("expected ':keyword VALUE'"), "got: {}", err); + } + + #[test] + fn parse_inline_kwargs_on_severity_head() { + let q = parse("(severity Critical :pr-state nil)").unwrap(); + assert_eq!( + q, + Query::And(vec![ + Query::Severity("Critical".to_string()), + Query::PrState(None), + ]) + ); + } + + #[test] + fn parse_inline_kwargs_on_since_head() { + let q = parse("(since 2026-04-12 :category PA022)").unwrap(); + assert_eq!( + q, + Query::And(vec![ + Query::Since("2026-04-12".to_string()), + Query::RuleId("PA022".to_string()), + ]) + ); + } + + #[test] + fn run_issue_example_category_with_kwargs_matches() { + // End-to-end: write findings then run the verbatim issue example. + let dir = tempdir().unwrap(); + write_test_findings(dir.path()); + // None of the synthetic findings are PA001 (UnsafeCode → PA004), + // so the verbatim issue expression yields zero matches without + // erroring — proves the parser path is wired in. + let q = parse("(category PA001 :severity Critical :pr-state nil)").unwrap(); + let hits = run(&q, dir.path()).unwrap(); + assert!(hits.is_empty()); + } + + #[test] + fn run_diff_form_matches_recent_findings() { + let dir = tempdir().unwrap(); + write_test_findings(dir.path()); + // The synthetic findings carry `first_seen_run = "test-run"` which + // is NOT a valid timestamp, so `(since ...)` falls back to the + // hexad's `created_at`. We use a date well before the test run so + // every finding qualifies; then category PA004 selects the + // UnsafeCode finding from `alpha`. + let q = parse("(diff :since 2020-01-01 :category PA004)").unwrap(); + let hits = run(&q, dir.path()).unwrap(); + assert!( + !hits.is_empty(), + "diff form should find UnsafeCode/PA004 findings" + ); + assert!(hits.iter().all(|h| h.rule_id == "PA004")); + } }