Skip to content

Commit 506f032

Browse files
Revert all commits after 7905959 (fuzzer changes and dependent fixes)
Reverts 19f6476, 44668b2, 5544004, 04472a5, 737b687, bb9adbd, returning fuzz.rs, parser, VM string builtins, Cargo.toml, vm.json and the fuzzing doc to the state at 7905959.
1 parent bb9adbd commit 506f032

8 files changed

Lines changed: 38 additions & 72 deletions

File tree

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,3 @@ inherits = "release"
3434
opt-level = 3
3535
strip = false
3636
debug = true
37-
panic = "unwind"

compiler/src/bin/fuzz.rs

Lines changed: 32 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@ use compiler::modules::lexer::{lex, TokenType};
22
use compiler::modules::lexer::tables::token_to_str;
33
use compiler::modules::parser::{Parser, SSAChunk};
44
use compiler::modules::vm::{VM, Val};
5-
use std::{panic, sync::mpsc, thread, time::{Duration, Instant, SystemTime}};
5+
use std::{panic, time::{Duration, Instant, SystemTime}};
66

77
const MAX_LEN: usize = 2048;
88
const SAVE_DIR: &str = "crashes";
99
const PRINT_INTERVAL: u64 = 10_000;
10-
const MAX_SECS: u64 = 600; // 10 minutes
11-
const VM_TIMEOUT: Duration = Duration::from_millis(200);
10+
const MAX_SECS: u64 = 60; // 60 seconds
1211
const SLOW_THRESHOLD: Duration = Duration::from_millis(50);
1312

1413
struct Rng(u64);
@@ -90,13 +89,11 @@ const BOUNDARIES: [i64; 13] = [
9089

9190
fn boundary_int(rng: &mut Rng) -> i64 { BOUNDARIES[rng.usize_in(BOUNDARIES.len())] }
9291

93-
/* 25% boundary values; rest are full-range random i64 */
9492
fn rand_int(rng: &mut Rng) -> String {
9593
if rng.usize_in(4) == 0 { boundary_int(rng).to_string() }
9694
else { (rng.next() as i64).to_string() }
9795
}
9896

99-
/* Picks one of ten mutation strategies at uniform random */
10097
fn mutate(src: &str, corpus: &[String], rng: &mut Rng) -> String {
10198
match rng.usize_in(10) {
10299
0 => byte_flip(src, rng),
@@ -121,14 +118,12 @@ fn byte_flip(src: &str, rng: &mut Rng) -> String {
121118
String::from_utf8_lossy(&bytes).into_owned()
122119
}
123120

124-
/* Splits into lines, applies f in place, rejoins; shared by drop/duplicate */
125121
fn with_lines(src: &str, f: impl FnOnce(&mut Vec<&str>)) -> String {
126122
let mut lines: Vec<&str> = src.lines().collect();
127123
f(&mut lines);
128124
lines.join("\n")
129125
}
130126

131-
/* Injects a keyword snippet at a random line; exercises keywords in unexpected positions */
132127
fn insert_keyword(src: &str, rng: &mut Rng) -> String {
133128
let kw = rand_keyword(rng);
134129
let name = rand_name(rng);
@@ -157,7 +152,6 @@ fn duplicate_line(src: &str, rng: &mut Rng) -> String {
157152
with_lines(src, |lines| { let idx = rng.usize_in(lines.len()); lines.insert(idx, lines[idx]); })
158153
}
159154

160-
/* Cross-seeds two corpus entries to produce novel program shapes */
161155
fn splice(src: &str, corpus: &[String], rng: &mut Rng) -> String {
162156
if corpus.is_empty() { return src.to_string(); }
163157
let other = &corpus[rng.usize_in(corpus.len())];
@@ -170,7 +164,6 @@ fn splice(src: &str, corpus: &[String], rng: &mut Rng) -> String {
170164
out.join("\n")
171165
}
172166

173-
/* Replaces the first numeric literal with a NaN-box boundary value */
174167
fn inject_boundary(src: &str, rng: &mut Rng) -> String {
175168
let boundary = boundary_int(rng).to_string();
176169
let bytes = src.as_bytes();
@@ -226,7 +219,6 @@ fn indent_bomb(rng: &mut Rng) -> String {
226219
out
227220
}
228221

229-
/* Injects a comment line to exercise lexer comment skipping */
230222
fn add_comment(src: &str, rng: &mut Rng) -> String {
231223
let comment = format!("# {}", rand_int(rng));
232224
let mut lines: Vec<&str> = src.lines().collect();
@@ -283,41 +275,34 @@ impl Perf {
283275
}
284276
}
285277

286-
enum Outcome { Crash, ParseErr, VmErr, Timeout, Clean(u128, Duration, Duration, Duration) }
278+
enum Outcome { Crash, ParseErr, VmErr, Clean(u128, Duration, Duration, Duration) }
287279

288-
/* Runs lex→parse→VM in an isolated thread; catches panics and enforces VM_TIMEOUT */
289280
fn run_once(src: &str) -> Outcome {
290-
let src = if src.len() > MAX_LEN { src[..MAX_LEN].to_string() } else { src.to_string() };
291-
let (tx, rx) = mpsc::channel();
292-
thread::Builder::new().stack_size(8 * 1024 * 1024).spawn(move || {
293-
let outcome = match panic::catch_unwind(panic::AssertUnwindSafe(|| {
294-
let t0 = Instant::now();
295-
let (tokens, _) = lex(&src);
296-
let t_lex = t0.elapsed();
297-
298-
let t1 = Instant::now();
299-
let (chunk, errs) = Parser::new(&src, tokens.into_iter()).parse();
300-
let t_parse = t1.elapsed();
301-
302-
let bm = opcode_bitmap(&chunk);
303-
304-
let t2 = Instant::now();
305-
let ok = VM::new(&chunk).run().is_ok();
306-
let t_vm = t2.elapsed();
307-
308-
(errs.is_empty(), ok, bm, t_lex, t_parse, t_vm)
309-
})) {
310-
Err(_) => Outcome::Crash,
311-
Ok((false, ..)) => Outcome::ParseErr,
312-
Ok((true, false, ..)) => Outcome::VmErr,
313-
Ok((true, true, bm, tl, tp, tv)) => Outcome::Clean(bm, tl, tp, tv),
314-
};
315-
let _ = tx.send(outcome);
316-
});
317-
rx.recv_timeout(VM_TIMEOUT).unwrap_or(Outcome::Timeout)
281+
let src = if src.len() > MAX_LEN { &src[..MAX_LEN] } else { src };
282+
match panic::catch_unwind(panic::AssertUnwindSafe(|| {
283+
let t0 = Instant::now();
284+
let (tokens, _) = lex(src);
285+
let t_lex = t0.elapsed();
286+
287+
let t1 = Instant::now();
288+
let (chunk, errs) = Parser::new(src, tokens.into_iter()).parse();
289+
let t_parse = t1.elapsed();
290+
291+
let bm = opcode_bitmap(&chunk);
292+
293+
let t2 = Instant::now();
294+
let ok = VM::new(&chunk).run().is_ok();
295+
let t_vm = t2.elapsed();
296+
297+
(errs.is_empty(), ok, bm, t_lex, t_parse, t_vm)
298+
})) {
299+
Err(_) => Outcome::Crash,
300+
Ok((false, ..)) => Outcome::ParseErr,
301+
Ok((true, false, ..)) => Outcome::VmErr,
302+
Ok((true, true, bm, tl, tp, tv)) => Outcome::Clean(bm, tl, tp, tv),
303+
}
318304
}
319305

320-
/* Coverage-guided seed pool; retains inputs that reach new opcodes */
321306
struct Corpus { entries: Vec<String>, seen: u128 }
322307

323308
impl Corpus {
@@ -333,15 +318,14 @@ impl Corpus {
333318
}
334319
}
335320

336-
/* Run counters and start time for the periodic progress display */
337-
struct Stats { iters: u64, crashes: u64, adds: u64, timeouts: u64, start: Instant }
321+
struct Stats { iters: u64, crashes: u64, adds: u64, start: Instant }
338322

339323
impl Stats {
340-
fn new() -> Self { Self { iters: 0, crashes: 0, adds: 0, timeouts: 0, start: Instant::now() } }
324+
fn new() -> Self { Self { iters: 0, crashes: 0, adds: 0, start: Instant::now() } }
341325
fn print(&self, corpus: usize, perf: &Perf) {
342326
let s = self.start.elapsed().as_secs_f64().max(0.001);
343-
eprintln!("[{:7.1}s] iters={:<9} {:.0}/s crashes={} timeouts={} corpus={} new_cov={}",
344-
s, self.iters, self.iters as f64 / s, self.crashes, self.timeouts, corpus, self.adds);
327+
eprintln!("[{:7.1}s] iters={:<9} {:.0}/s crashes={} corpus={} new_cov={}",
328+
s, self.iters, self.iters as f64 / s, self.crashes, corpus, self.adds);
345329
perf.print();
346330
}
347331
}
@@ -365,20 +349,19 @@ fn main() {
365349
stats.crashes += 1;
366350
let path = format!("{SAVE_DIR}/crash_{:06}.py", stats.crashes);
367351
let _ = std::fs::write(&path, &input);
368-
eprintln!("\n[CRASH #{:06}] -> {path}\n {:?}\n", stats.crashes, &input[..input.len().min(120)]);
352+
eprintln!("\n[CRASH #{:06}] {path}\n {:?}\n", stats.crashes, &input[..input.len().min(120)]);
369353
}
370354
Outcome::Clean(bm, t_lex, t_parse, t_vm) => {
371355
perf.record(t_lex, t_parse, t_vm);
372356
let total = t_lex + t_parse + t_vm;
373357
if total > SLOW_THRESHOLD {
374358
let path = format!("{SAVE_DIR}/slow_{:06}.py", stats.iters);
375359
let _ = std::fs::write(&path, &input);
376-
eprintln!("\n[SLOW {}ms] -> {path}\n lex={}µs parse={}µs vm={}µs\n",
360+
eprintln!("\n[SLOW {}ms] {path}\n lex={}µs parse={}µs vm={}µs\n",
377361
total.as_millis(), t_lex.as_micros(), t_parse.as_micros(), t_vm.as_micros());
378362
}
379363
if corpus.add(input, bm) { stats.adds += 1; }
380364
}
381-
Outcome::Timeout => { stats.timeouts += 1; }
382365
_ => {}
383366
}
384367

compiler/src/modules/parser/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ pub struct Parser<'src, I: Iterator<Item = Token>> {
6161
// `true=for` (PopIter on break), false=while; parallels loop_starts/loop_breaks.
6262
pub(super) loop_kinds: Vec<bool>,
6363
pub(super) expr_depth: usize,
64-
pub(super) block_depth: usize,
6564
pub(super) saw_newline: bool,
6665
/* True inside f-string brace expr; disables `=` assignment so `f"{x=}"` parses as debug form. */
6766
pub(super) in_fstring_expr: bool,
@@ -460,7 +459,6 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
460459
saw_newline: false,
461460
in_fstring_expr: false,
462461
expr_depth: 0,
463-
block_depth: 0,
464462
last_line: 0,
465463
last_end: 0,
466464
bracket_stack: Vec::new(),

compiler/src/modules/parser/stmt.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::s;
22

33
use super::Parser;
4-
use super::types::{OpCode, MAX_BLOCK_DEPTH};
4+
use super::types::OpCode;
55

66
use crate::modules::lexer::{Token, TokenType};
77

@@ -331,15 +331,6 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
331331

332332
/* Compiles Indent/Dedent block; is_body=true stops after ReturnValue to skip dead code. */
333333
fn compile_block_inner(&mut self, is_body: bool) {
334-
if self.block_depth >= MAX_BLOCK_DEPTH {
335-
self.errors.push(crate::modules::parser::types::Diagnostic {
336-
msg: crate::s!("nesting too deep"),
337-
start: self.tokens.peek().map_or(0, |t| t.start),
338-
end: self.tokens.peek().map_or(0, |t| t.end),
339-
});
340-
return;
341-
}
342-
self.block_depth += 1;
343334
let indented = self.eat_if(TokenType::Indent);
344335
loop {
345336
while self.eat_if(TokenType::Semi) {}
@@ -358,7 +349,6 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
358349
if just_returned || !matches!(self.peek(), Some(TokenType::Semi)) { break; }
359350
} else if !matches!(self.peek(), Some(TokenType::Semi)) { break; }
360351
}
361-
self.block_depth -= 1;
362352
}
363353

364354
/* Name-led statement: assign, augmented-op, attr, index, call, or tuple unpack. */

compiler/src/modules/parser/types.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::modules::vm::types::ExternFn;
55
use alloc::{string::{String, ToString}, vec, vec::Vec};
66

77
pub(crate) const MAX_EXPR_DEPTH: usize = 200;
8-
pub(crate) const MAX_BLOCK_DEPTH: usize = 80;
98
pub(crate) const MAX_INSTRUCTIONS: usize = 65_535;
109

1110
#[derive(Debug, Clone, Copy, PartialEq)]

compiler/src/modules/vm/handlers/builtin_methods/string.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ pub fn rpartition(vm: &mut VM, recv: Val, pos: &[Val]) -> Result<(), VmErr> {
235235
pub fn center(vm: &mut VM, recv: Val, pos: &[Val]) -> Result<(), VmErr> {
236236
let s = recv_str(vm, recv)?;
237237
if !pos[0].is_int() { return Err(cold_type("center() width must be an integer")); }
238-
let width = pos[0].as_int().clamp(0, 1 << 20) as usize;
238+
let width = pos[0].as_int() as usize;
239239
let fill = if pos.len() > 1 {
240240
val_to_str(vm, pos[1])?.chars().next().unwrap_or(' ')
241241
} else { ' ' };
@@ -251,7 +251,7 @@ pub fn center(vm: &mut VM, recv: Val, pos: &[Val]) -> Result<(), VmErr> {
251251
pub fn zfill(vm: &mut VM, recv: Val, pos: &[Val]) -> Result<(), VmErr> {
252252
if !pos[0].is_int() { return Err(cold_type("zfill() requires an integer argument")); }
253253
let s = recv_str(vm, recv)?;
254-
let width = pos[0].as_int().clamp(0, 1 << 20) as usize;
254+
let width = pos[0].as_int() as usize;
255255
let nchars = s.chars().count();
256256
let out = if nchars >= width {
257257
s

compiler/tests/cases/vm.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,8 +818,6 @@
818818
{"src": "print('-5'.zfill(4))", "output": ["-005"]},
819819
{"src": "print('abc'.zfill(5))", "output": ["00abc"]},
820820
{"src": "print('abc'.zfill(2))", "output": ["abc"]},
821-
{"src": "print('abc'.center(-1))", "output": ["abc"]},
822-
{"src": "print('abc'.zfill(-1))", "output": ["abc"]},
823821
{"src": "print('hello'[:])", "output": ["hello"]},
824822
{"src": "print('hello'[1:])", "output": ["ello"]},
825823
{"src": "print('hello'[:3])", "output": ["hel"]},

docs/pages/implementation/fuzzing.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@ corpus -> mutate -> lex + parse + vm -> catch_unwind -> [crash | new coverage |
1818
| `Crash` | panic anywhere in the pipeline | save to `crashes/crash_NNNNNN.py` |
1919
| `ParseErr` | parser emitted one or more diagnostics | discard |
2020
| `VmErr` | VM returned a typed error | discard |
21-
| `Timeout` | VM did not finish within 200 ms | discard, increment timeout counter |
2221
| `Clean(bm)` | compiled and executed without panic | admit to corpus if `bm` covers new opcodes |
2322

24-
`ParseErr`, `VmErr`, and `Timeout` are expected outcomes — typed errors and infinite loops are not bugs. Only an unhandled panic indicates a defect.
23+
`ParseErr` and `VmErr` are expected outcomes — typed errors are not bugs. Only an unhandled panic indicates a defect.
2524

2625
## Coverage
2726

@@ -31,7 +30,7 @@ An input is admitted to the corpus only when its bitmap introduces at least one
3130

3231
## Iteration
3332

34-
Some strategies are applied uniformly at random: `byte_flip` (XOR a random byte), `insert_keyword`, `drop_line`, `duplicate_line`, `splice` (join two corpus halves), `inject_boundary` (i64 boundary literals targeting VM overflow), `deep_nest` (100–220 bracket levels, attacks `MAX_EXPR_DEPTH`), `token_shuffle`, `indent_bomb` (50–110 nested `if True:` blocks), and `add_comment`.
33+
Some sstrategies are applied uniformly at random: `byte_flip` (XOR a random byte), `insert_keyword`, `drop_line`, `duplicate_line`, `splice` (join two corpus halves), `inject_boundary` (i64 boundary literals targeting VM overflow), `deep_nest` (100–220 bracket levels, attacks `MAX_EXPR_DEPTH`), `token_shuffle`, `indent_bomb` (50–110 nested `if True:` blocks), and `add_comment`.
3534

3635
## Known Targets
3736

@@ -58,7 +57,7 @@ The `fuzz` profile inherits from `release` with two overrides: `panic = "unwind"
5857
Output is written to stderr every 10 000 iterations:
5958

6059
```txt
61-
[5.3s] iters=10000 1886/s crashes=0 timeouts=2 corpus=24 new_cov=4
60+
[5.3s] iters=10000 1886/s crashes=0 corpus=24 new_cov=4
6261
```
6362

6463
Crashes are saved immediately on detection. To reproduce a crash against the standard compiler binary:

0 commit comments

Comments
 (0)