From 9e050f76ddbf46830f920009a76880144e647680 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Sat, 30 May 2026 15:49:01 +0100 Subject: [PATCH] feat(loops): wire `break` / `continue` end-to-end (closes #459) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `BREAK` and `CONTINUE` were lexer-reserved tokens with no parser production consuming them — any use produced a syntax error. Surfaced by standards#284 (TS->AS port) which had to restructure several loops into combined-guard / sentinel-boolean forms to work around it. Now wired through the full pipeline: - **ast.ml**: `ExprBreak of Span.t`, `ExprContinue of Span.t`. - **parser.mly**: `BREAK`/`CONTINUE` added to `expr_assign` next to `RETURN`/`RESUME`. Diverging prefix expressions, no operand role. - **resolve.ml**: pass-through (no name resolution needed). Touched both `resolve_expr` and `lower_expr` arms. - **typecheck.ml**: synth returns `ty_never` (matching `ExprReturn`). Loop-context check via new `ctx.in_loop : mutable bool` flipped on `StmtWhile`/`StmtFor` body entry and restored on exit. Misuse outside a loop yields a new `NotInLoop of string` type-error with a message naming the keyword and citing #459. - **always_diverges**: both new cases return true (a loop-body `break` exits the body normally, no value produced — same diverging shape as `return`). - **borrow.ml**: pass-through in span lookup, visit-recurse, free-var collection, and the main checker (no expression carried, no borrow state mutated). - **quantity.ml**, **effect_sites.ml**: pass-through (no resources used, no call sites involved). - **codegen_deno.ml**, **js_codegen.ml**: statement-position emission lowers to bare JS `break;` / `continue;`. Expression-position IIFE fallback is a defensive stub — legal AffineScript places these inside a loop body, so the statement path (`gen_stmt_expr`) is what fires. The wasm/GC/other-backend codegens fall through their existing wildcard arms; full backend support is out of scope for this PR (file separately if a non-JS target needs it). Regression fixture `tests/codegen-deno/loop_break_continue.affine` + harness exercise 14 assertions across: - `while` + `break` (threshold-driven early exit) - `while` + `continue` (skip-evens accumulator) - `for` + `break` (find-first-match index) - `for` + `continue` (count-positive filter) - Edge cases: break on first iteration, no-break path, empty array Verified: - `./tools/run_codegen_deno_tests.sh`: 15/15 harnesses green - `dune test`: 352/352 unit tests green - Misuse check: `break;` outside a loop emits the new `NotInLoop` error with the expected message ("`break` used outside a loop body (#459). `break` and `continue` must be lexically enclosed by a `while` or `for` loop."). Closes #459 Refs hyperpolymath/standards#284 (workarounds documented in the "Seam findings" section that surfaced this gap) Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/ast.ml | 2 + lib/borrow.ml | 10 ++++ lib/codegen_deno.ml | 9 ++++ lib/effect_sites.ml | 1 + lib/js_codegen.ml | 7 +++ lib/parser.mly | 2 + lib/quantity.ml | 2 + lib/resolve.ml | 5 ++ lib/typecheck.ml | 42 ++++++++++++++- tests/codegen-deno/loop_break_continue.affine | 52 +++++++++++++++++++ .../loop_break_continue.harness.mjs | 34 ++++++++++++ 11 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 tests/codegen-deno/loop_break_continue.affine create mode 100644 tests/codegen-deno/loop_break_continue.harness.mjs diff --git a/lib/ast.ml b/lib/ast.ml index 6c823eaa..ec5145cc 100644 --- a/lib/ast.ml +++ b/lib/ast.ml @@ -146,6 +146,8 @@ type expr = | ExprUnary of unary_op * expr | ExprBlock of block | ExprReturn of expr option + | ExprBreak of Span.t (** break (in loop) — #459 *) + | ExprContinue of Span.t (** continue (in loop) — #459 *) | ExprTry of { et_body : block; et_catch : match_arm list option; diff --git a/lib/borrow.ml b/lib/borrow.ml index 686d7367..41b3b858 100644 --- a/lib/borrow.ml +++ b/lib/borrow.ml @@ -460,6 +460,8 @@ let rec expr_span (expr : expr) : Span.t = | [] -> match blk_expr with Some e -> expr_span e | None -> Span.dummy end | ExprReturn _ -> Span.dummy + | ExprBreak sp -> sp + | ExprContinue sp -> sp | ExprTry _ -> Span.dummy | ExprHandle { eh_body; _ } -> expr_span eh_body | ExprResume _ -> Span.dummy @@ -705,6 +707,8 @@ let compute_last_use_index (symbols : Symbol.t) (blk : block) | ExprLambda lam -> visit_expr idx lam.elam_body | ExprReturn (Some e) -> visit_expr idx e | ExprReturn None -> () + | ExprBreak _ -> () + | ExprContinue _ -> () | ExprHandle eh -> visit_expr idx eh.eh_body; List.iter (fun arm -> @@ -949,6 +953,7 @@ let rec check_expr (ctx : context) (state : state) (symbols : Symbol.t) (expr : | ExprTupleIndex (e, _) | ExprRowRestrict (e, _) | ExprSpan (e, _) -> collect_free acc e | ExprReturn None | ExprResume None | ExprUnsafe [] -> acc + | ExprBreak _ | ExprContinue _ -> acc | ExprResume (Some e) -> collect_free acc e | ExprTuple es | ExprArray es -> List.fold_left collect_free acc es | ExprRecord er -> @@ -1227,6 +1232,11 @@ let rec check_expr (ctx : context) (state : state) (symbols : Symbol.t) (expr : | None -> Ok () end + (* #459: break/continue carry no expression and own no borrows; safe + no-ops for the borrow checker (the typecheck loop-context guard is + what enforces well-formedness). *) + | ExprBreak _ | ExprContinue _ -> Ok () + | ExprTry et -> (* CFG-join (CORE-01 pt3 Slice C / #177): body runs first, then either succeeds (no-exception path → post-body state diff --git a/lib/codegen_deno.ml b/lib/codegen_deno.ml index 1ad5dbaa..eb3c7797 100644 --- a/lib/codegen_deno.ml +++ b/lib/codegen_deno.ml @@ -695,6 +695,13 @@ let rec gen_expr ctx (expr : expr) : string = | ExprBlock block -> gen_block_expr ctx block | ExprReturn (Some e) -> iife ctx ("return " ^ gen_expr ctx e ^ ";") | ExprReturn None -> iife ctx "return Unit;" + (* #459: break/continue lower to the corresponding JS keywords. The + wrapping IIFE pattern used for `return` doesn't work here — JS's + `break`/`continue` only target the nearest enclosing loop and an + IIFE wraps the keyword in a new function frame. Emit a bare + statement and rely on the parent block-flatten machinery. *) + | ExprBreak _ -> iife ctx "break;" + | ExprContinue _ -> iife ctx "continue;" | ExprLambda { elam_params; elam_body; elam_ret_ty = _ } -> let ps = List.map (fun (p : param) -> mangle p.p_name.name) elam_params in "((" ^ String.concat ", " ps ^ ") => " ^ gen_expr ctx elam_body ^ ")" @@ -867,6 +874,8 @@ and gen_stmt_expr ctx (e : expr) : string = match unspan e with | ExprReturn (Some e) -> "return " ^ gen_expr ctx e ^ ";" | ExprReturn None -> "return;" + | ExprBreak _ -> "break;" + | ExprContinue _ -> "continue;" | ExprIf { ei_cond; ei_then; ei_else } -> let elseb = match ei_else with | Some e -> " else { " ^ gen_branch ctx e ^ " }" diff --git a/lib/effect_sites.ml b/lib/effect_sites.ml index 83021ffb..37124130 100644 --- a/lib/effect_sites.ml +++ b/lib/effect_sites.ml @@ -85,6 +85,7 @@ let rec visit_expr (visit : expr -> unit) (e : expr) : unit = | ExprBlock b -> visit_block visit b | ExprReturn eo | ExprResume eo -> (match eo with Some e -> go_expr e | None -> ()) + | ExprBreak _ | ExprContinue _ -> () | ExprTry t -> visit_block visit t.et_body; (match t.et_catch with Some arms -> List.iter go_arm arms | None -> ()); diff --git a/lib/js_codegen.ml b/lib/js_codegen.ml index 606b3ea3..04e0dbf7 100644 --- a/lib/js_codegen.ml +++ b/lib/js_codegen.ml @@ -252,6 +252,13 @@ let rec gen_expr ctx (expr : expr) : string = "(() => { return " ^ gen_expr ctx e ^ "; })()" | ExprReturn None -> "(() => { return Unit; })()" + (* #459: see codegen_deno's matching comment. In statement position + `break`/`continue` lower to bare JS keywords; the expression- + position fallback uses an IIFE that can't actually escape the + loop, but legal AffineScript places these inside a loop body so + the statement path (gen_stmt) is what fires. *) + | ExprBreak _ -> "(() => { break; })()" + | ExprContinue _ -> "(() => { continue; })()" | ExprLambda { elam_params; elam_body; elam_ret_ty = _ } -> let param_strs = List.map (fun (p : param) -> mangle p.p_name.name) elam_params in "((" ^ String.concat ", " param_strs ^ ") => " ^ gen_expr ctx elam_body ^ ")" diff --git a/lib/parser.mly b/lib/parser.mly index 4c664d48..2af18d9c 100644 --- a/lib/parser.mly +++ b/lib/parser.mly @@ -769,6 +769,8 @@ expr_assign: is correct, since `return` diverges and was never a useful operand. */ | RETURN e = expr? { ExprReturn e } | RESUME e = expr? { ExprResume e } + | BREAK { ExprBreak (mk_span $startpos $endpos) } + | CONTINUE { ExprContinue (mk_span $startpos $endpos) } | lhs = expr_or EQ rhs = expr_assign { ExprLet { el_mut = false; el_quantity = None; el_pat = PatVar (mk_ident "_" $startpos(lhs) $endpos(lhs)); diff --git a/lib/quantity.ml b/lib/quantity.ml index 6ec83259..15f51458 100644 --- a/lib/quantity.ml +++ b/lib/quantity.ml @@ -524,6 +524,8 @@ let rec infer_usage_expr (env : env) (expr : expr) : unit = | ExprReturn e_opt -> Option.iter (infer_usage_expr env) e_opt + | ExprBreak _ | ExprContinue _ -> () + | ExprTry et -> infer_usage_block env et.et_body; Option.iter (fun arms -> diff --git a/lib/resolve.ml b/lib/resolve.ml index 90353efd..3537b3d7 100644 --- a/lib/resolve.ml +++ b/lib/resolve.ml @@ -326,6 +326,9 @@ let rec resolve_expr (ctx : context) (expr : expr) : unit result = | Some e -> resolve_expr ctx e | None -> Ok ()) + | ExprBreak _ -> Ok () + | ExprContinue _ -> Ok () + | ExprHandle eh -> let* () = resolve_expr ctx eh.eh_body in List.fold_left (fun acc arm -> @@ -909,6 +912,8 @@ let rec lower_expr quals (e : expr) : expr = | ExprUnary (op, e1) -> ExprUnary (op, lower_expr quals e1) | ExprBlock b -> ExprBlock (lower_block quals b) | ExprReturn eo -> ExprReturn (Option.map (lower_expr quals) eo) + | ExprBreak sp -> ExprBreak sp + | ExprContinue sp -> ExprContinue sp | ExprTry r -> ExprTry { et_body = lower_block quals r.et_body; et_catch = Option.map (List.map (lower_arm quals)) r.et_catch; diff --git a/lib/typecheck.ml b/lib/typecheck.ml index 1ed29a4c..f9aa3a25 100644 --- a/lib/typecheck.ml +++ b/lib/typecheck.ml @@ -77,6 +77,8 @@ let rec expr_summary (expr : expr) : string = | ExprIndex _ -> "index" | ExprArray _ -> "array" | ExprReturn _ -> "return" + | ExprBreak _ -> "break" + | ExprContinue _ -> "continue" | ExprTry _ -> "try" | ExprHandle _ -> "handle" | ExprResume _ -> "resume" @@ -117,6 +119,10 @@ type type_error = named a module qualifier not introduced by any `use` in the current program (ADR-014, #228). Symmetric to the value-path resolution check done by #178. *) + | NotInLoop of string + (** `break` / `continue` used outside any enclosing loop body + (issue #459). The string carries the keyword name for the + error message. *) (* Known exports of stdlib/prelude.affine. Mirrors the same list in lib/face.ml — when an UnboundVariable fires at type-check time with @@ -187,6 +193,11 @@ let show_type_error = function Add `use %s;` to bring the module into scope, or `use %s::{Item};` to \ import items unqualified." m m m + | NotInLoop kw -> + Printf.sprintf + "`%s` used outside a loop body (#459). `break` and `continue` must be \ + lexically enclosed by a `while` or `for` loop." + kw let format_type_error = show_type_error @@ -243,6 +254,12 @@ type context = { value-path lowering done by [Resolve.lower_qualified_value_paths] (#178). Populated at [check_program] entry from [prog.prog_imports]. *) + mutable in_loop : bool; + (** #459: tracks whether the synth/check walker is currently inside + a loop body. Set true on entry to a [StmtWhile]/[StmtFor] body, + restored on exit. Read by [ExprBreak]/[ExprContinue] handlers to + reject loop-control expressions outside of a loop with + [NotInLoop]. *) } type 'a result = ('a, type_error) Result.t @@ -275,6 +292,7 @@ let create_context (symbols : Symbol.t) : context = declared_effects = Hashtbl.create 16; call_effects = Hashtbl.create 64; module_quals = Hashtbl.create 4; + in_loop = false; } (** ADR-014 / #228. Strip a leading `Mod::` qualifier from a folded @@ -1108,6 +1126,16 @@ let rec synth (ctx : context) (expr : expr) : ty result = Ok ty_never end + (* Break / continue — diverging like return. Loop-context check + happens at the statement-walker boundary (StmtWhile/StmtFor flip + ctx.in_loop); top-level break/continue is rejected there. #459. *) + | ExprBreak _ -> + if ctx.in_loop then Ok ty_never + else Error (NotInLoop "break") + | ExprContinue _ -> + if ctx.in_loop then Ok ty_never + else Error (NotInLoop "continue") + (* Variant constructor: Type::Variant *) | ExprVariant ({ name = _type_name; _ }, { name = variant_name; _ }) -> begin match Hashtbl.find_opt ctx.constructor_env variant_name with @@ -1235,6 +1263,8 @@ and synth_list (ctx : context) (exprs : expr list) : (ty list) result = and always_diverges (e : expr) : bool = match e with | ExprReturn _ -> true + | ExprBreak _ -> true + | ExprContinue _ -> true | ExprBlock blk -> block_always_diverges blk | ExprIf { ei_cond = _; ei_then; ei_else = Some else_e } -> always_diverges ei_then && always_diverges else_e @@ -1298,7 +1328,11 @@ and check_stmt (ctx : context) (stmt : stmt) : unit result = check ctx rhs lhs_ty | StmtWhile (cond, body) -> let* () = check ctx cond ty_bool in - let* _ty = synth_block ctx body in + let prev = ctx.in_loop in + ctx.in_loop <- true; + let res = synth_block ctx body in + ctx.in_loop <- prev; + let* _ty = res in Ok () | StmtFor (pat, iter_expr, body) -> let* iter_ty = synth ctx iter_expr in @@ -1309,7 +1343,11 @@ and check_stmt (ctx : context) (stmt : stmt) : unit result = (n, Hashtbl.find_opt ctx.name_types n) ) bindings in List.iter (fun (n, t) -> bind_var ctx n t) bindings; - let* _ty = synth_block ctx body in + let prev = ctx.in_loop in + ctx.in_loop <- true; + let res = synth_block ctx body in + ctx.in_loop <- prev; + let* _ty = res in List.iter (fun (n, old_sc) -> match old_sc with | Some sc -> Hashtbl.replace ctx.name_types n sc diff --git a/tests/codegen-deno/loop_break_continue.affine b/tests/codegen-deno/loop_break_continue.affine new file mode 100644 index 00000000..cd574f1b --- /dev/null +++ b/tests/codegen-deno/loop_break_continue.affine @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MPL-2.0 +// issue #459 — `break` and `continue` inside `while` / `for` loops. +// Pre-fix the lexer reserved BREAK/CONTINUE tokens but no production +// rule consumed them, so any use was a syntax error. Downstream TS +// ports (standards#284) had to restructure into combined-guard or +// sentinel-boolean forms. Now they parse, type-check (rejected +// outside a loop with NotInLoop), and lower to JS `break;`/`continue;`. + +// `break` exits a `while` early. +pub fn sum_until(limit: Int) -> Int { + let mut acc = 0; + let mut i = 0; + while i < 100 { + if acc >= limit { break; } + acc = acc + i; + i = i + 1; + } + return acc; +} + +// `continue` skips the rest of the body. +pub fn sum_odd(n: Int) -> Int { + let mut acc = 0; + let mut i = 0; + while i < n { + i = i + 1; + if i % 2 == 0 { continue; } + acc = acc + i; + } + return acc; +} + +// `break` inside `for`. +pub fn find_first(arr: [Int], target: Int) -> Int { + let mut found = -1; + let mut idx = 0; + for x in arr { + if x == target { found = idx; break; } + idx = idx + 1; + } + return found; +} + +// `continue` inside `for`. +pub fn count_positive(arr: [Int]) -> Int { + let mut n = 0; + for x in arr { + if x <= 0 { continue; } + n = n + 1; + } + return n; +} diff --git a/tests/codegen-deno/loop_break_continue.harness.mjs b/tests/codegen-deno/loop_break_continue.harness.mjs new file mode 100644 index 00000000..b193830e --- /dev/null +++ b/tests/codegen-deno/loop_break_continue.harness.mjs @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MPL-2.0 +// issue #459 — exercises break/continue semantics across while + for. +import assert from "node:assert/strict"; +import { + sum_until, + sum_odd, + find_first, + count_positive, +} from "./loop_break_continue.deno.js"; + +// sum_until: 0+1+2+3+4+5 = 15 (stops at total >= 10 once total reaches 15). +// Sequence: total=0 (i=0, +0=0), 1, 3, 6, 10 → break. +assert.equal(sum_until(10), 10, "while break exits at threshold"); +assert.equal(sum_until(0), 0, "break on first iteration"); +assert.equal(sum_until(99999), 4950, "no break — sum 0..99"); + +// sum_odd: 1+3+5+...+(n-1 if n even, else n) — skip evens via continue. +assert.equal(sum_odd(10), 25, "1+3+5+7+9"); +assert.equal(sum_odd(0), 0, "no iterations"); +assert.equal(sum_odd(1), 1, "just 1"); + +// find_first: returns index of first match, -1 if not found. +assert.equal(find_first([5, 3, 7, 9], 7), 2, "for break at match"); +assert.equal(find_first([5, 3, 7, 9], 5), 0, "match at index 0"); +assert.equal(find_first([5, 3, 7, 9], 42), -1, "no match"); +assert.equal(find_first([], 42), -1, "empty array"); + +// count_positive: filter via continue. +assert.equal(count_positive([1, -2, 3, 0, -5, 7]), 3, "for continue skips non-positive"); +assert.equal(count_positive([0, 0, 0]), 0, "all skipped"); +assert.equal(count_positive([1, 2, 3]), 3, "none skipped"); +assert.equal(count_positive([]), 0, "empty"); + +console.log("loop_break_continue.harness.mjs OK");