hyperpolymath · hyperpolymath · May 30, 2026 · May 30, 2026
diff --git a/lib/codegen_deno.ml b/lib/codegen_deno.ml
@@ -721,8 +721,8 @@ and gen_literal (lit : literal) : string =
       if String.length s > 0 && s.[String.length s - 1] = '.' then s ^ "0" else s
   | LitBool (true, _)  -> "true"
   | LitBool (false, _) -> "false"
-  | LitString (s, _)   -> "\"" ^ String.escaped s ^ "\""
-  | LitChar (c, _)     -> "\"" ^ Char.escaped c ^ "\""
+  | LitString (s, _)   -> Js_codegen.js_string_lit s
+  | LitChar (c, _)     -> Js_codegen.js_string_lit (String.make 1 c)
   | LitUnit _          -> "Unit"
 
 and gen_pattern ctx (pat : pattern) : string =

diff --git a/lib/js_codegen.ml b/lib/js_codegen.ml
@@ -101,6 +101,62 @@ let mangle (name : string) : string =
   if List.mem name js_reserved then name ^ "_"
   else name
 
+(** Lower a UTF-8 byte string to a JS double-quoted literal that is
+    safe under strict-mode ESM.
+
+    OCaml's [String.escaped] emits non-ASCII bytes as [\NNN] *decimal*
+    sequences; JavaScript parses [\NNN] as *octal* escapes which strict
+    mode rejects ([SyntaxError: Octal escape sequences are not allowed
+    in strict mode]) and which would decode to wrong characters even
+    outside strict mode. This helper instead decodes the UTF-8 byte
+    sequence to code points and emits [\uXXXX] (BMP) or [\u{XXXXX}]
+    (non-BMP) Unicode escapes — accepted everywhere, no parser-mode
+    surprises, and preserves the original character. Closes #460. *)
+let js_string_lit (s : string) : string =
+  let buf = Buffer.create (String.length s + 8) in
+  Buffer.add_char buf '"';
+  let n = String.length s in
+  let i = ref 0 in
+  while !i < n do
+    let b0 = Char.code s.[!i] in
+    if b0 < 0x80 then begin
+      (match Char.chr b0 with
+       | '\\' -> Buffer.add_string buf "\\\\"
+       | '"'  -> Buffer.add_string buf "\\\""
+       | '\n' -> Buffer.add_string buf "\\n"
+       | '\r' -> Buffer.add_string buf "\\r"
+       | '\t' -> Buffer.add_string buf "\\t"
+       | c when b0 >= 0x20 && b0 <= 0x7E -> Buffer.add_char buf c
+       | _ -> Buffer.add_string buf (Printf.sprintf "\\x%02X" b0));
+      incr i
+    end else begin
+      let cp, len =
+        if b0 < 0xC0 then (b0, 1)
+        else if b0 < 0xE0 && !i + 1 < n then
+          let b1 = Char.code s.[!i + 1] in
+          (((b0 land 0x1F) lsl 6) lor (b1 land 0x3F), 2)
+        else if b0 < 0xF0 && !i + 2 < n then
+          let b1 = Char.code s.[!i + 1] in
+          let b2 = Char.code s.[!i + 2] in
+          (((b0 land 0x0F) lsl 12) lor ((b1 land 0x3F) lsl 6) lor (b2 land 0x3F), 3)
+        else if !i + 3 < n then
+          let b1 = Char.code s.[!i + 1] in
+          let b2 = Char.code s.[!i + 2] in
+          let b3 = Char.code s.[!i + 3] in
+          (((b0 land 0x07) lsl 18) lor ((b1 land 0x3F) lsl 12)
+            lor ((b2 land 0x3F) lsl 6) lor (b3 land 0x3F), 4)
+        else (b0, 1)
+      in
+      if cp <= 0xFFFF then
+        Buffer.add_string buf (Printf.sprintf "\\u%04X" cp)
+      else
+        Buffer.add_string buf (Printf.sprintf "\\u{%X}" cp);
+      i := !i + len
+    end
+  done;
+  Buffer.add_char buf '"';
+  Buffer.contents buf
+
 (* ============================================================================
    Expression Code Generation
    ============================================================================ *)
@@ -230,8 +286,8 @@ and gen_literal (lit : literal) : string =
       if String.length s > 0 && s.[String.length s - 1] = '.' then s ^ "0" else s
   | LitBool (true, _)    -> "true"
   | LitBool (false, _)   -> "false"
-  | LitString (s, _)     -> "\"" ^ String.escaped s ^ "\""
-  | LitChar (c, _)       -> "\"" ^ Char.escaped c ^ "\""
+  | LitString (s, _)     -> js_string_lit s
+  | LitChar (c, _)       -> js_string_lit (String.make 1 c)
   | LitUnit _            -> "Unit"
 
 and gen_pattern ctx (pat : pattern) : string =

diff --git a/tests/codegen-deno/non_ascii.affine b/tests/codegen-deno/non_ascii.affine
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: MPL-2.0
+// issue #460 — non-ASCII string literals must round-trip under
+// strict-mode ESM. Pre-fix, the JS codegen used OCaml `String.escaped`
+// which emitted `\NNN` decimal sequences; the JS parser reads `\NNN`
+// as OCTAL escapes, which strict-mode ESM rejects with
+// `SyntaxError: Octal escape sequences are not allowed in strict mode`.
+// Post-fix, non-ASCII bytes lower to `\uXXXX` / `\u{XXXXX}` Unicode
+// escapes which all JS parser modes accept.
+
+pub fn emoji_cross() -> String { return "❌"; }
+pub fn emoji_check() -> String { return "✓"; }
+pub fn cjk_hello() -> String { return "你好"; }
+pub fn latin_accent() -> String { return "café résumé"; }
+pub fn non_bmp_sob() -> String { return "😭"; }
+pub fn mixed() -> String { return "[OK] café 你好 ❌"; }
+pub fn ascii_only() -> String { return "plain ASCII"; }
+pub fn quotes_and_backslash() -> String { return "\"escaped\" and \\back"; }
diff --git a/tests/codegen-deno/non_ascii.harness.mjs b/tests/codegen-deno/non_ascii.harness.mjs
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: MPL-2.0
+// issue #460 — round-trip non-ASCII string literals through the
+// Deno-ESM backend under strict-mode ESM. The `import` itself is the
+// strictest test: if the emitted `.deno.js` contains octal escapes,
+// the module fails to parse and the import throws SyntaxError before
+// any assertion can run.
+import assert from "node:assert/strict";
+import {
+  emoji_cross,
+  emoji_check,
+  cjk_hello,
+  latin_accent,
+  non_bmp_sob,
+  mixed,
+  ascii_only,
+  quotes_and_backslash,
+} from "./non_ascii.deno.js";
+
+assert.equal(emoji_cross(), "❌", "BMP emoji ❌ round-trips");
+assert.equal(emoji_check(), "✓", "BMP check mark ✓ round-trips");
+assert.equal(cjk_hello(), "你好", "CJK 'nihao' round-trips");
+assert.equal(latin_accent(), "café résumé", "Latin accented round-trips");
+assert.equal(non_bmp_sob(), "\u{1F62D}", "non-BMP code point round-trips");
+assert.equal(mixed(), "[OK] café 你好 ❌", "mixed ASCII+non-ASCII round-trips");
+assert.equal(ascii_only(), "plain ASCII", "ASCII-only unchanged");
+assert.equal(quotes_and_backslash(), "\"escaped\" and \\back", "quote+backslash escapes preserved");
+
+console.log("non_ascii.harness.mjs OK");