From 34e0e2f0d6aa92221b787dd3d0de67c59d17ad1e Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Sat, 30 May 2026 15:10:56 +0100 Subject: [PATCH] feat(typecheck): String relational ops (<, >, <=, >=) (closes #458) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `TCon "String"` case to the comparison branch in `Typecheck.synth_expr`'s `ExprBinary` handler, alongside the existing Int and Float dispatches. Lex-compare semantics — JS's native string comparison gives byte-wise lexicographic order out of the box, so the JS-family backends (codegen_deno, js_codegen) need no change: the existing `<` / `>` / `<=` / `>=` op emission already does the right thing once typecheck stops rejecting String operands. Pre-fix, `let _ = "a" < "b"` raised `TypeMismatch (String, Int)`, forcing downstream ports (e.g. standards#284's `check-ts-allowlist` port) to inline byte-wise `str_lt` helpers using `char_to_int(string_get(...))` at every use site. Regression fixture `tests/codegen-deno/string_lex_cmp.affine` + harness exercise 22 assertions covering all four ops (functional form + literal form), equal-string corner cases (<, <=, >= behaviour all three directions), empty strings, and prefix-relations. Non-ASCII string comparison is naturally exercised once #463 (the companion Unicode-escape codegen fix for #460) lands — until then, this fixture stays ASCII-only so it doesn't inherit the octal-escape ESM SyntaxError. The relational typecheck is orthogonal to the literal encoding; both ship together once both PRs are in. Verified: `tools/run_codegen_deno_tests.sh` (14/14 harnesses green); `dune test` (352/352 green). Closes #458 Refs hyperpolymath/standards#284 (the seam-analyst PR that surfaced this — `str_lt` workaround) Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/typecheck.ml | 6 ++++ tests/codegen-deno/string_lex_cmp.affine | 22 +++++++++++++ tests/codegen-deno/string_lex_cmp.harness.mjs | 33 +++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 tests/codegen-deno/string_lex_cmp.affine create mode 100644 tests/codegen-deno/string_lex_cmp.harness.mjs diff --git a/lib/typecheck.ml b/lib/typecheck.ml index 676f29a..1ed29a4 100644 --- a/lib/typecheck.ml +++ b/lib/typecheck.ml @@ -1024,6 +1024,12 @@ let rec synth (ctx : context) (expr : expr) : ty result = | TCon "Float" -> let* () = check ctx rhs ty_float in Ok ty_bool + | TCon "String" -> + (* String relational ops are byte-wise lexicographic, matching + JS / Lua / Rust convention. Surfaced by #458 (was the + rate-limiter for several TS→AS ports). *) + let* () = check ctx rhs ty_string in + Ok ty_bool | _ -> let (lhs_ty', rhs_ty, result_ty) = type_of_binop op in let* () = unify_or_err lhs_ty lhs_ty' in diff --git a/tests/codegen-deno/string_lex_cmp.affine b/tests/codegen-deno/string_lex_cmp.affine new file mode 100644 index 0000000..99491fa --- /dev/null +++ b/tests/codegen-deno/string_lex_cmp.affine @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MPL-2.0 +// issue #458 — String relational ops (<, >, <=, >=) must be built-in +// for lexicographic comparison. Pre-fix, `a < b` for `String` raised +// `TypeMismatch (String, Int)` because the comparison branch in +// Typecheck dispatched only on Float / Int. Now String dispatches +// alongside, lowering to JS `<` (which already does lex compare). + +pub fn lt(a: String, b: String) -> Bool { return a < b; } +pub fn gt(a: String, b: String) -> Bool { return a > b; } +pub fn le(a: String, b: String) -> Bool { return a <= b; } +pub fn ge(a: String, b: String) -> Bool { return a >= b; } + +// Common derived: equal-length lex compare, single-character pivot, +// empty-string handling. +pub fn first_lt() -> Bool { return "abc" < "abd"; } +pub fn first_gt() -> Bool { return "z" > "a"; } +pub fn equal_strings_le() -> Bool { return "x" <= "x"; } +pub fn equal_strings_ge() -> Bool { return "x" >= "x"; } +pub fn equal_strings_lt() -> Bool { return "x" < "x"; } +pub fn empty_lt() -> Bool { return "" < "a"; } +pub fn empty_le() -> Bool { return "" <= ""; } +pub fn prefix_lt() -> Bool { return "abc" < "abcd"; } diff --git a/tests/codegen-deno/string_lex_cmp.harness.mjs b/tests/codegen-deno/string_lex_cmp.harness.mjs new file mode 100644 index 0000000..3f28454 --- /dev/null +++ b/tests/codegen-deno/string_lex_cmp.harness.mjs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MPL-2.0 +// issue #458 — exercises String relational ops via the Deno-ESM +// backend. JS's <, >, <=, >= on strings are lex compare natively, so +// the typecheck fix is enough — codegen needs no special case. +import assert from "node:assert/strict"; +import { + lt, gt, le, ge, + first_lt, first_gt, + equal_strings_le, equal_strings_ge, equal_strings_lt, + empty_lt, empty_le, prefix_lt, +} from "./string_lex_cmp.deno.js"; + +assert.equal(lt("abc", "abd"), true, "abc < abd"); +assert.equal(lt("abd", "abc"), false, "abd not < abc"); +assert.equal(gt("z", "a"), true, "z > a"); +assert.equal(gt("a", "z"), false, "a not > z"); +assert.equal(le("x", "x"), true, "x <= x (equal)"); +assert.equal(le("x", "y"), true, "x <= y"); +assert.equal(le("y", "x"), false, "y not <= x"); +assert.equal(ge("x", "x"), true, "x >= x (equal)"); +assert.equal(ge("y", "x"), true, "y >= x"); +assert.equal(ge("x", "y"), false, "x not >= y"); + +assert.equal(first_lt(), true, "lit: abc < abd"); +assert.equal(first_gt(), true, "lit: z > a"); +assert.equal(equal_strings_le(), true, "equal le"); +assert.equal(equal_strings_ge(), true, "equal ge"); +assert.equal(equal_strings_lt(), false, "equal lt is false"); +assert.equal(empty_lt(), true, "empty < non-empty"); +assert.equal(empty_le(), true, "empty <= empty"); +assert.equal(prefix_lt(), true, "prefix < longer"); + +console.log("string_lex_cmp.harness.mjs OK");