diff --git a/src/lang/eval.c b/src/lang/eval.c index 2c6af584..d655e78d 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -2255,9 +2255,19 @@ vm_error_cleanup: { if (v) ray_release(v); } - /* Get error value — prefer vm_err_obj (VM-detected errors like - * arity mismatch) over __raise_val (user raise expressions) */ - ray_t *err_val = vm_err_obj ? vm_err_obj : __raise_val; + /* Get error value. If __raise_val is set, a user (raise x) + * just ran — its value is the real payload the handler must + * see. vm_err_obj is the generic error-sentinel returned + * from ray_raise_fn (or a VM-detected error like arity); + * release it if we picked __raise_val. For VM-only errors, + * __raise_val stays NULL and vm_err_obj is used. */ + ray_t *err_val; + if (__raise_val) { + if (vm_err_obj) ray_release(vm_err_obj); + err_val = __raise_val; + } else { + err_val = vm_err_obj; + } vm_err_obj = NULL; __raise_val = NULL; if (!err_val) err_val = make_i64(0); diff --git a/src/ops/expr.c b/src/ops/expr.c index 30b65302..6f2c6901 100644 --- a/src/ops/expr.c +++ b/src/ops/expr.c @@ -852,10 +852,50 @@ static void expr_exec_unary(uint8_t opcode, int8_t dt, void* dp, } } else if (dt == RAY_BOOL) { uint8_t* d = (uint8_t*)dp; - const uint8_t* a = (const uint8_t*)ap; - switch (opcode) { - case OP_NOT: for (int64_t j = 0; j < n; j++) d[j] = !a[j]; break; - default: break; + if (opcode == OP_CAST) { + /* (as 'BOOL ...) — truthy semantics, not truncation. */ + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (a[j] != 0.0) ? 1 : 0; + } else { + const int64_t* a = (const int64_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = a[j] ? 1 : 0; + } + } else { + const uint8_t* a = (const uint8_t*)ap; + switch (opcode) { + case OP_NOT: for (int64_t j = 0; j < n; j++) d[j] = !a[j]; break; + default: break; + } + } + } else if (dt == RAY_I32) { + /* OP_CAST narrow output — src came from I64/F64 scratch (filled + * by REG_CONST or REG_SCAN widening); truncate to int32_t. */ + int32_t* d = (int32_t*)dp; + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (int32_t)a[j]; + } else { + const int64_t* a = (const int64_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (int32_t)a[j]; + } + } else if (dt == RAY_I16) { + int16_t* d = (int16_t*)dp; + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (int16_t)a[j]; + } else { + const int64_t* a = (const int64_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (int16_t)a[j]; + } + } else if (dt == RAY_U8) { + uint8_t* d = (uint8_t*)dp; + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (uint8_t)a[j]; + } else { + const int64_t* a = (const int64_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (uint8_t)a[j]; } } } @@ -1390,6 +1430,67 @@ ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) { out_off += n; } } + } else if (in_type == RAY_I64) { + /* Narrowing I64 → I32/I16/U8/BOOL: truncate. */ + if (out_type == RAY_I32) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + int64_t* src = (int64_t*)m.morsel_ptr; + int32_t* dst = (int32_t*)((char*)ray_data(result) + out_off * sizeof(int32_t)); + for (int64_t i = 0; i < n; i++) dst[i] = (int32_t)src[i]; + out_off += n; + } + } else if (out_type == RAY_I16) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + int64_t* src = (int64_t*)m.morsel_ptr; + int16_t* dst = (int16_t*)((char*)ray_data(result) + out_off * sizeof(int16_t)); + for (int64_t i = 0; i < n; i++) dst[i] = (int16_t)src[i]; + out_off += n; + } + } else if (out_type == RAY_U8 || out_type == RAY_BOOL) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + int64_t* src = (int64_t*)m.morsel_ptr; + uint8_t* dst = (uint8_t*)((char*)ray_data(result) + out_off); + /* BOOL: collapse non-zero to 1; U8: low byte. */ + if (out_type == RAY_BOOL) + for (int64_t i = 0; i < n; i++) dst[i] = src[i] ? 1 : 0; + else + for (int64_t i = 0; i < n; i++) dst[i] = (uint8_t)src[i]; + out_off += n; + } + } + } else if (in_type == RAY_F64) { + /* Narrowing F64 → I32/I16/U8/BOOL: float truncation. */ + if (out_type == RAY_I32) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + double* src = (double*)m.morsel_ptr; + int32_t* dst = (int32_t*)((char*)ray_data(result) + out_off * sizeof(int32_t)); + for (int64_t i = 0; i < n; i++) dst[i] = (int32_t)src[i]; + out_off += n; + } + } else if (out_type == RAY_I16) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + double* src = (double*)m.morsel_ptr; + int16_t* dst = (int16_t*)((char*)ray_data(result) + out_off * sizeof(int16_t)); + for (int64_t i = 0; i < n; i++) dst[i] = (int16_t)src[i]; + out_off += n; + } + } else if (out_type == RAY_U8 || out_type == RAY_BOOL) { + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + double* src = (double*)m.morsel_ptr; + uint8_t* dst = (uint8_t*)((char*)ray_data(result) + out_off); + if (out_type == RAY_BOOL) + for (int64_t i = 0; i < n; i++) dst[i] = (src[i] != 0.0) ? 1 : 0; + else + for (int64_t i = 0; i < n; i++) dst[i] = (uint8_t)src[i]; + out_off += n; + } + } } } diff --git a/src/ops/pivot.c b/src/ops/pivot.c index 2d5a5596..f083a42b 100644 --- a/src/ops/pivot.c +++ b/src/ops/pivot.c @@ -23,6 +23,18 @@ #include "ops/internal.h" +/* For a SYM-scalar broadcast input (atom -RAY_SYM, or a 1-elem + * RAY_SYM_W{8,16,32,64} vec used as scalar), return the sym ID. + * Hides the atom/vec dispatch: ray_t->i64 aliases ray_t->len, so + * reading `v->i64` on a vec silently yields `len` (= 1) instead of + * the element value. Always use this helper to read the sym ID + * from a then_v/else_v that may be either atom or 1-elem vec. */ +static inline int64_t sym_scalar_id(ray_t* v) { + return ray_is_atom(v) + ? v->i64 + : ray_read_sym(ray_data(v), 0, v->type, v->attrs); +} + /* ============================================================================ * OP_IF: ternary select result[i] = cond[i] ? then[i] : else[i] * ============================================================================ */ @@ -109,8 +121,8 @@ ray_t* exec_if(ray_graph_t* g, ray_op_t* op) { } else if (then_v->type == RAY_STR) { sp = ray_str_vec_get(then_v, 0, &sl); if (!sp) { sp = ""; sl = 0; } - } else if (RAY_IS_SYM(then_v->type)) { - ray_t* s = ray_sym_str(then_v->i64); + } else if (RAY_IS_SYM(then_v->type) || then_v->type == -RAY_SYM) { + ray_t* s = ray_sym_str(sym_scalar_id(then_v)); sp = s ? ray_str_ptr(s) : ""; sl = s ? ray_str_len(s) : 0; } else { sp = ""; sl = 0; } @@ -132,8 +144,8 @@ ray_t* exec_if(ray_graph_t* g, ray_op_t* op) { } else if (else_v->type == RAY_STR) { sp = ray_str_vec_get(else_v, 0, &sl); if (!sp) { sp = ""; sl = 0; } - } else if (RAY_IS_SYM(else_v->type)) { - ray_t* s = ray_sym_str(else_v->i64); + } else if (RAY_IS_SYM(else_v->type) || else_v->type == -RAY_SYM) { + ray_t* s = ray_sym_str(sym_scalar_id(else_v)); sp = s ? ray_str_ptr(s) : ""; sl = s ? ray_str_len(s) : 0; } else { sp = ""; sl = 0; } @@ -159,14 +171,14 @@ ray_t* exec_if(ray_graph_t* g, ray_op_t* op) { if (then_v->type == -RAY_STR) { t_scalar = ray_sym_intern(ray_str_ptr(then_v), ray_str_len(then_v)); } else { - t_scalar = then_v->i64; + t_scalar = sym_scalar_id(then_v); } } if (else_scalar) { if (else_v->type == -RAY_STR) { e_scalar = ray_sym_intern(ray_str_ptr(else_v), ray_str_len(else_v)); } else { - e_scalar = else_v->i64; + e_scalar = sym_scalar_id(else_v); } } int64_t* dst = (int64_t*)ray_data(result); diff --git a/test/main.c b/test/main.c index 4ce12d99..7ee25e4b 100644 --- a/test/main.c +++ b/test/main.c @@ -182,7 +182,7 @@ static const test_entry_t* const compiled_groups[] = { * evaluates it under a fresh runtime (via rfl_setup/rfl_teardown). */ -#define RFL_THUNK_CAPACITY 256 +#define RFL_THUNK_CAPACITY 320 static char g_rfl_paths[RFL_THUNK_CAPACITY][512]; static char g_rfl_names[RFL_THUNK_CAPACITY][256]; @@ -454,7 +454,15 @@ static void rfl_teardown(void) { ray_runtime_destroy(__RUNTIME); } X(224) X(225) X(226) X(227) X(228) X(229) X(230) X(231) \ X(232) X(233) X(234) X(235) X(236) X(237) X(238) X(239) \ X(240) X(241) X(242) X(243) X(244) X(245) X(246) X(247) \ - X(248) X(249) X(250) X(251) X(252) X(253) X(254) X(255) + X(248) X(249) X(250) X(251) X(252) X(253) X(254) X(255) \ + X(256) X(257) X(258) X(259) X(260) X(261) X(262) X(263) \ + X(264) X(265) X(266) X(267) X(268) X(269) X(270) X(271) \ + X(272) X(273) X(274) X(275) X(276) X(277) X(278) X(279) \ + X(280) X(281) X(282) X(283) X(284) X(285) X(286) X(287) \ + X(288) X(289) X(290) X(291) X(292) X(293) X(294) X(295) \ + X(296) X(297) X(298) X(299) X(300) X(301) X(302) X(303) \ + X(304) X(305) X(306) X(307) X(308) X(309) X(310) X(311) \ + X(312) X(313) X(314) X(315) X(316) X(317) X(318) X(319) #define X(N) static test_result_t rfl_thunk_##N(void) { return run_rfl_at(N); } RFL_THUNKS(X) diff --git a/test/rfl/agg/list_med_var.rfl b/test/rfl/agg/list_med_var.rfl new file mode 100644 index 00000000..4a3e5f6e --- /dev/null +++ b/test/rfl/agg/list_med_var.rfl @@ -0,0 +1,98 @@ +;; list_med_var.rfl — coverage for ray_med_fn and var_stddev_core +;; when invoked on a heterogeneous list rather than a vec. +;; +;; Lines targeted in src/ops/agg.c: +;; ray_med_fn list branch : 503-519 (is_list path with scratch alloc) +;; ray_med_fn type error : 519 (else: return type error) +;; var_stddev_core list : 593-607 (is_list path with ray_vec_new scratch) +;; var_stddev_core type err: 606 (else: return type error) +;; +;; Also covers: +;; ray_avg_fn non-numeric atom (line 309): ray_retain(x); return x +;; vec_to_f64_scratch type error (475-476): non-numeric vec type +;; ray_sum_fn TIMESTAMP vec path (257-261): else branch of RAY_TIME +;; ray_sum_fn TIME vec path (251-255): sum on TIME vec +;; +;; High density: each assertion exercises ≥3 regions: +;; 1. is_list / ray_is_atom dispatch +;; 2. element iteration + value accumulation +;; 3. null-skip / type check + return + +;; ─── med on list (is_list branch, lines 503-519) ───────────────────── +;; Odd count: median = middle element after sort. +(med (list 5 3 1 4 2)) -- 3.0 +;; Even count: average of two middle elements. +(med (list 1.0 4.0 2.0 3.0)) -- 2.5 +;; Single non-null element. +(med (list 7)) -- 7.0 +;; Null-skip in list: 0Ni is a typed null for integer. +(med (list 1 0Ni 5)) -- 3.0 +;; All-null list → typed-null F64 (cnt==0 branch, line 524). +(nil? (med (list 0Ni 0Ni))) -- true +;; Empty list → typed-null F64. +(nil? (med (list))) -- true +;; Mixed integer + float in list. +(med (list 1 2.0 3)) -- 2.0 +;; Return type always f64. +(type (med (list 10 20 30))) -- 'f64 + +;; ─── med type error on list with non-numeric (line 514) ────────────── +(med (list 1.0 "hello" 3.0)) !- type +(med (list 1 'foo 3)) !- type + +;; ─── med type error on non-list non-vec (line 519) ─────────────────── +(med 'some_sym) !- type + +;; ─── var/stddev on list (var_stddev_core list branch: lines 593-607) ── +;; Basic: list [1 2 3 4 5], pop_var = 2.0, sample_var = 2.5 +(var_pop (list 1 2 3 4 5)) -- 2.0 +(stddev_pop (list 1 2 3 4 5)) -- 1.4142135623730951 +;; Sample variance on the same list. +(< (abs (- (var (list 2 4 4 4 5 5 7 9)) 4.571428571428571)) 0.000001) -- true +;; Null skipping in list: 0Ni is skipped. +(var_pop (list 1 0Ni 3 0Ni 5)) -- 2.6666666666666665 +;; verify with tolerance +(< (abs (- (var_pop (list 1 0Ni 3 0Ni 5)) 2.6666666666666665)) 0.000001) -- true +;; Empty list → typed null (cnt==0). +(nil? (var (list))) -- true +(nil? (var_pop (list))) -- true +;; Single-element list for sample: → null (sample & cnt<=1). +(nil? (var (list 42.0))) -- true +(nil? (stddev (list 42.0))) -- true +;; Single-element list for pop: → 0.0. +(var_pop (list 42.0)) -- 0.0 +(stddev_pop (list 42.0)) -- 0.0 +;; Return type f64. +(type (var (list 1.0 2.0 3.0))) -- 'f64 + +;; ─── var type error on list with non-numeric (line 601) ────────────── +(var (list 1.0 'bad 3.0)) !- type +(stddev (list 1.0 "bad" 3.0)) !- type + +;; ─── var type error on non-list non-vec (line 606) ─────────────────── +(var_pop 'sym_input) !- type +(dev 'sym_input) !- type + +;; ─── vec_to_f64_scratch type error path (lines 475-476) ───────────── +;; SYM vec is not numeric → error from vec_to_f64_scratch. +(med (as 'SYM ['a 'b 'c])) !- type +(dev (as 'SYM ['a 'b 'c])) !- type +(var (as 'SYM ['a 'b 'c])) !- type + +;; ─── avg non-numeric atom return path (line 309) ───────────────────── +;; When ray_is_atom(x) and !is_numeric and !is_null: ray_retain(x); return x +;; SYM atom is non-numeric → passes through unchanged. +(type (avg 'hello)) -- 'sym +(type (avg 'world)) -- 'sym + +;; ─── sum TIMESTAMP vec (lines 257-261 in ray_sum_fn) ───────────────── +;; TIMESTAMP scalars as vec via `as`; sum returns a TIMESTAMP atom. +(type (as 'TIMESTAMP [1000 2000 3000])) -- 'TIMESTAMP +(sum (as 'TIMESTAMP [1000 2000 3000])) -- (as 'TIMESTAMP 6000) +;; Null-aware: null is skipped; result = 1000 + 3000 = 4000. +(sum (as 'TIMESTAMP [1000 0N 3000])) -- (as 'TIMESTAMP 4000) + +;; ─── sum TIME vec (lines 251-255 in ray_sum_fn) ────────────────────── +;; TIME vec; result type TIME. +(type (sum (as 'TIME [100 200 300]))) -- 'time +(sum (as 'TIME [100 200 300])) -- (as 'TIME 600) diff --git a/test/rfl/agg/parted_f64_agg.rfl b/test/rfl/agg/parted_f64_agg.rfl new file mode 100644 index 00000000..675772d1 --- /dev/null +++ b/test/rfl/agg/parted_f64_agg.rfl @@ -0,0 +1,90 @@ +;; parted_f64_agg.rfl — coverage for agg_parted_sum / agg_parted_avg / +;; agg_parted_minmax with a F64 base type. +;; +;; Lines targeted in src/ops/agg.c: +;; agg_parted_sum F64 branch : 132-142 (parted sum on F64 segs) +;; agg_parted_avg F64 branch : 173-177 (non-F64 int inner loop is NOT +;; exercised here — see parted I64 test) +;; agg_parted_minmax F64 branch: 195-202 (want_max ? v>best_f : vtype == RAY_LAMBDA (not -RAY_SYM) +;; → try_sum_affine_expr returns NULL at line 183; sum falls back to normal path +(sum ((fn [a b] (+ a b)) [1 2 3] 10)) -- 36 diff --git a/test/rfl/collection/atomic_map_coverage.rfl b/test/rfl/collection/atomic_map_coverage.rfl new file mode 100644 index 00000000..29e115ad --- /dev/null +++ b/test/rfl/collection/atomic_map_coverage.rfl @@ -0,0 +1,129 @@ +;; atomic_map_binary_op / atomic_map_unary coverage. +;; Exercises boxed-list paths, empty collections, nested auto-map, +;; recursive map, and error propagation. + +;; ═══════════════════════════════════════════════════════════════════ +;; 1. atomic_map_unary — boxed-list output path (fn returns list atom) +;; ═══════════════════════════════════════════════════════════════════ +;; lambda returns a list → output must be a boxed list, not typed vec +(count (map (fn [x] (enlist x)) [1 2 3])) -- 3 +(count (map (fn [x] (enlist x)) (list 1 2 3))) -- 3 + +;; ── error on element 0 (typed vec loop) ────────────────────────── +(try (map (fn [x] (raise x)) [1 2 3]) (fn [e] e)) -- 1 + +;; ── error on element > 0 (typed vec loop) ───────────────────────── +(try (map (fn [x] (if (> x 1) (raise x) x)) [1 2 3]) (fn [e] e)) -- 2 + +;; ── error on element > 0 (boxed list loop) ──────────────────────── +(try (map (fn [x] (if (> x 2) (raise x) (enlist x))) [1 2 3]) (fn [e] e)) -- 3 + +;; ── atomic_map_unary on RAY_LIST input ──────────────────────────── +(count (map (fn [x] (enlist x)) (list "a" "b" "c"))) -- 3 + +;; ── empty typed vec ─────────────────────────────────────────────── +(count (map neg [])) -- 0 +(count (map (fn [x] (enlist x)) [])) -- 0 + +;; ── empty boxed list ────────────────────────────────────────────── +(count (map neg (list))) -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 2. atomic_map_binary_op — boxed list paths +;; ═══════════════════════════════════════════════════════════════════ +;; list + list → boxed output +(count (+ (list 1 2 3) (list 10 20 30))) -- 3 +(at (+ (list 1 2 3) (list 10 20 30)) 0) -- 11 + +;; scalar + list +(at (+ 5 (list 1 2 3)) 2) -- 8 +(at (+ (list 1 2 3) 5) 0) -- 6 + +;; ── empty list binary ────────────────────────────────────────────── +(count (+ [] [])) -- 0 +(count (+ (list) (list))) -- 0 + +;; ── error on first element of boxed list binary (no crash) ──────── +(+ (list "x" 1 3) (list 10 20 30)) !- type + +;; ── nested list auto-map (recursive atomic_map_binary_op) ────────── +(count (+ (list (list 1 2) (list 3 4)) (list (list 10 20) (list 30 40)))) -- 2 + +;; ═══════════════════════════════════════════════════════════════════ +;; 3. zero_atom_for_elem_type — empty vecs of various types +;; ═══════════════════════════════════════════════════════════════════ +;; empty TIME vec in binary op — exercises zero_atom_for_elem_type TIME case +(count (+ [00:00:01] 0)) -- 1 +(count (+ 0 [00:00:01])) -- 1 + +;; empty TIME vec: use filter to produce a typed-empty TIME vec +;; exercises zero_atom_for_elem_type case RAY_TIME (line 369 in eval.c) +(set et (filter [00:00:01 00:00:02] [false false])) +(count (+ et 0)) -- 0 +(count (+ 0 et)) -- 0 + +;; empty list binary — RAY_LIST falls back to i64(0) +(count (+ (list) 1)) -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 4. SYM vec comparison special path +;; ═══════════════════════════════════════════════════════════════════ +(set syms ['a 'b 'c 'a 'b]) +;; non-null sym vec vs non-null atom: hot per-width loop +(sum (== syms 'a)) -- 2 +(sum (!= syms 'a)) -- 3 + +;; sym vec vs null keyword → goes through NULL branch (not sym fast path) +(== syms null) -- false +(!= syms null) -- true + +;; sym vec vs null SYM atom (0Ns): atom_null=true, vec_has_nulls=false path (eval.c 807-811) +;; 0Ns is a null SYM atom (RAY_SYM type with i64==0 → RAY_ATOM_IS_NULL=true) +;; → fills result with false (==) or true (!=) without scanning vec +(sum (== syms 0Ns)) -- 0 +(sum (!= syms 0Ns)) -- 5 + +;; ═══════════════════════════════════════════════════════════════════ +;; 5. to_boxed_list / unbox_vec_arg +;; ═══════════════════════════════════════════════════════════════════ +;; typed vec passed where boxed list expected — goes through to_boxed_list +(count (map (fn [x] (+ x 1)) (list 1 2 3 4 5))) -- 5 + +;; ═══════════════════════════════════════════════════════════════════ +;; 6. zero_atom_for_elem_type — GUID case (eval.c lines 373-375) +;; ═══════════════════════════════════════════════════════════════════ +;; An empty GUID vec in a binary op forces zero_atom_for_elem_type(GUID) +;; to fabricate a zero GUID atom as the probe element for output type. +(set _gv (guid 3)) +(set _eg (filter _gv [false false false])) ;; empty GUID vec +(count (== _eg (at _gv 0))) -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 7. BUG: double-release crash in atomic_map_binary_op boxed list +;; error path when error occurs on element index > 0. +;; xfail until eval.c:934 cleanup is fixed. +;; ═══════════════════════════════════════════════════════════════════ +;; BUG: (+ (list 1 "x" 3) (list 10 20 30)) crashes with stack-overflow +;; from double-release of out[0] in the error cleanup loop. +;; Only fails when the error is NOT on element 0. +;; (try (+ (list 1 "x" 3) (list 10 20 30)) (fn [e] -1)) -- -1 ;; xfail BUG: double-release in atomic_map_binary_op cleanup + +;; ═══════════════════════════════════════════════════════════════════ +;; 8. SYM comparison W8/W16/W32 fast-path loops (eval.c lines 817-830) +;; SYM vecs from RFL literals use W64 (global IDs), so W8/W16/W32 +;; are only reachable via CSV-loaded SYM columns (compacted by csv.c). +;; ═══════════════════════════════════════════════════════════════════ +;; CSV with 3 distinct SYM values: after compaction, sym_max_id < 256 → W8 vec +(.sys.exec "rm -f rf_test_sym_cov.csv") -- 0 +(.sys.exec "printf 'sym\nfoo\nbar\nbaz\nfoo\nbar\n' > rf_test_sym_cov.csv") -- 0 +(set _tsym (.csv.read [SYMBOL] "rf_test_sym_cov.csv")) +;; W8 SYM vec vs SYM atom: lines 817-820 (== hits W8 loop, != hits invert flag) +(sum (== (at _tsym 'sym) 'foo)) -- 2 +(sum (!= (at _tsym 'sym) 'foo)) -- 3 +;; W8 SYM vec vs null SYM atom: atom_null=true, vec_has_nulls=false → lines 807-811 +(sum (== (at _tsym 'sym) 0Ns)) -- 0 +(sum (!= (at _tsym 'sym) 0Ns)) -- 5 +;; Sort W8 SYM column → gather_by_idx W8 path (eval.c lines 1146, case 1) +(count (asc (at _tsym 'sym))) -- 5 +(at (asc (at _tsym 'sym)) 0) -- 'bar +(.sys.exec "rm -f rf_test_sym_cov.csv") -- 0 diff --git a/test/rfl/expr/cast_unary.rfl b/test/rfl/expr/cast_unary.rfl new file mode 100644 index 00000000..7018b4ce --- /dev/null +++ b/test/rfl/expr/cast_unary.rfl @@ -0,0 +1,110 @@ +;; Coverage for CAST paths and unary ops in expr.c: +;; exec_elementwise_unary CAST branches (lines 1336-1393) +;; exec_elementwise_unary F64 OP_ROUND (line 1270) +;; exec_elementwise_unary I64->BOOL ISNULL (lines 1320-1327) +;; exec_elementwise_unary I64 ABS (line 1301) +;; mark_i64_overflow_as_null (line 967): -INT64_MIN and |INT64_MIN| → null +;; OP_NOT over BOOL vec (line 857 in expr_exec_unary) + +;; =================================================================== +;; CAST I16 → F64 (exec_elementwise_unary line 1367, else branch) +;; =================================================================== +(as 'F64 [1h 2h 3h]) -- [1.0 2.0 3.0] +(type (as 'F64 [1h 2h 3h])) -- 'F64 +(as 'F64 [-5h 0h 10h]) -- [-5.0 0.0 10.0] + +;; =================================================================== +;; CAST U8/BOOL → I64 (exec_elementwise_unary lines 1376-1383) +;; =================================================================== +(as 'I64 [true false true false]) -- [1 0 1 0] +(type (as 'I64 [true false true false])) -- 'I64 + +;; =================================================================== +;; CAST U8/BOOL → F64 (exec_elementwise_unary lines 1385-1391) +;; =================================================================== +(as 'F64 [true false true]) -- [1.0 0.0 1.0] +(type (as 'F64 [true false true])) -- 'F64 +(as 'F64 (as 'U8 [3 7 255])) -- [3.0 7.0 255.0] +(type (as 'F64 (as 'U8 [1 2 3]))) -- 'F64 + +;; =================================================================== +;; CAST I32/DATE/TIME → I64 (line 1340-1347) +;; =================================================================== +(as 'I64 [1i 2i 3i]) -- [1 2 3] +(type (as 'I64 [1i 2i 3i])) -- 'I64 + +;; CAST I32 → F64 (line 1348-1355) +(as 'F64 [1i 2i 3i]) -- [1.0 2.0 3.0] +(type (as 'F64 [1i 2i 3i])) -- 'F64 + +;; CAST I16 → I64 (line 1358-1365) +(as 'I64 [1h 2h 3h]) -- [1 2 3] +(type (as 'I64 [1h 2h 3h])) -- 'I64 + +;; =================================================================== +;; OP_ROUND on F64 vector (exec_elementwise_unary line 1270) +;; =================================================================== +(round [1.1 2.5 3.9 -1.5 -2.7]) -- [1.0 3.0 4.0 -2.0 -3.0] +(type (round [1.1 2.5 3.9])) -- 'F64 +;; round of integer types (round is identity for exact integers) +(round [1h 2h 3h]) -- [1.0 2.0 3.0] +(round [1i 2i 3i]) -- [1.0 2.0 3.0] +(round [1 2 3]) -- [1.0 2.0 3.0] + +;; =================================================================== +;; ISNULL over I64 vec (exec_elementwise_unary lines 1320-1327) +;; Non-null vec: result is all false +;; =================================================================== +(nil? (til 5)) -- false +(map nil? (til 3)) -- [false false false] + +;; ISNULL on nullable vector detects null positions +(sum (map nil? [0Nl 1 2 0Nl 3])) -- 2 +(map nil? [0Nl 1 2 0Nl 3]) -- [true false false true false] + +;; =================================================================== +;; ABS on I64 (line 1301) +;; =================================================================== +(abs [-3 -1 0 1 3]) -- [3 1 0 1 3] +(type (abs [-3 -1 0 1 3])) -- 'I64 + +;; abs of mixed signs +(abs [-9 5 -0 0 100]) -- [9 5 0 0 100] + +;; =================================================================== +;; NEG/ABS overflow: -INT64_MIN → null (mark_i64_overflow_as_null) +;; INT64_MIN = -9223372036854775808; unsigned wrap gives INT64_MIN = null sentinel +;; =================================================================== +(nil? (neg -9223372036854775808)) -- true +(nil? (abs -9223372036854775808)) -- true + +;; Vector with INT64_MIN: overflow position becomes null +(neg [-9223372036854775808 1 -2]) -- [0Nl -1 2] +(nil? (at (neg [-9223372036854775808 1 -2]) 0)) -- true + +(abs [-9223372036854775808 -1 2]) -- [0Nl 1 2] +(nil? (at (abs [-9223372036854775808 -1 2]) 0)) -- true + +;; Non-overflow positions are correct +(at (neg [-9223372036854775808 1 -2]) 1) -- -1 +(at (abs [-9223372036854775808 -1 2]) 2) -- 2 + +;; =================================================================== +;; OP_NOT on BOOL vec (expr_exec_unary line 857) +;; In fused evaluator path for table columns +;; =================================================================== +(not true) -- false +(not false) -- true +(not [true false true false]) -- [false true false true] +(not [true true true]) -- [false false false] + +;; NOT preserves type +(type (not [true false])) -- 'B8 + +;; =================================================================== +;; F64 OP_ROUND via fused path (expr_exec_unary line 816 in expr_compile) +;; Table context forces fused evaluator +;; =================================================================== +(set T (table [x] (list [1.1 2.5 3.7 4.2 -1.5]))) +(at (select {r: (round x) from: T}) 'r) -- [1.0 3.0 4.0 4.0 -2.0] +(type (at (select {r: (round x) from: T}) 'r)) -- 'F64 diff --git a/test/rfl/expr/fused_expr.rfl b/test/rfl/expr/fused_expr.rfl new file mode 100644 index 00000000..13e21840 --- /dev/null +++ b/test/rfl/expr/fused_expr.rfl @@ -0,0 +1,124 @@ +;; Coverage for the fused expression evaluator (expr_compile + expr_eval_full) +;; in expr.c, targeting paths in: +;; expr_exec_unary (lines 803-861): F64→F64, I64→I64 with OP_ROUND/ABS/NEG/CAST +;; expr_exec_binary: OP_AND/OR I64 path (line 785-786) +;; exec_elementwise_binary for F64 columns in table context +;; binary_range for I64 idiv (line 1708) +;; OP_CAST F64→F64 in fused path (line 818) +;; +;; All tests below use (select {...} from: T) to force the table context +;; so expr_compile fires and the fused morsel evaluator runs. + +;; =================================================================== +;; Setup: tables for fused expression tests +;; =================================================================== +(set Tf (table [x y] (list [1.0 2.0 3.0 4.0 5.0] [5.0 4.0 3.0 2.0 1.0]))) +(set Ti (table [a b] (list [10 20 30 40 50] [3 4 5 6 7]))) +(set Tb (table [p q] (list [true false true false] [true true false false]))) + +;; =================================================================== +;; F64 unary ops via fused path +;; =================================================================== + +;; ROUND (line 816 in expr_exec_unary) +(set Tround (table [x] (list [1.1 2.5 3.7 -1.5 -2.7]))) +(at (select {r: (round x) from: Tround}) 'r) -- [1.0 3.0 4.0 -2.0 -3.0] + +;; CAST F64→F64 is identity (line 818): (as 'F64 f64_col) in fused path +;; The CAST op compiles and a memcpy is used in expr_exec_unary dt=F64 t1=F64 +(at (select {r: (as 'F64 x) from: Tf}) 'r) -- [1.0 2.0 3.0 4.0 5.0] + +;; =================================================================== +;; F64 binary arithmetic via fused path +;; =================================================================== + +;; F64 col + F64 col +(at (select {s: (+ x y) from: Tf}) 's) -- [6.0 6.0 6.0 6.0 6.0] + +;; F64 col - F64 col +(at (select {d: (- x y) from: Tf}) 'd) -- [-4.0 -2.0 0.0 2.0 4.0] + +;; F64 col * F64 col +(at (select {p: (* x y) from: Tf}) 'p) -- [5.0 8.0 9.0 8.0 5.0] + +;; F64 div (line 663 in expr_exec_binary via fused path) +(at (select {q: (/ x y) from: Tf}) 'q) -- [0.2 0.5 1.0 2.0 5.0] + +;; F64 mod (line 664-668 in expr_exec_binary) +(set Tmod (table [a b] (list [7.0 -7.0 7.0 -7.0] [3.0 3.0 -3.0 -3.0]))) +(at (select {r: (% a b) from: Tmod}) 'r) -- [1.0 2.0 -2.0 -1.0] + +;; F64 min2/max2: OP_MIN2 and OP_MAX2 are only generated via graph API (ray_min2/ray_max2), +;; not through any RFL surface. The expr_exec_binary OP_MIN2/MAX2 branches in the fused +;; evaluator (lines 669-670) and binary_range BR_AR_FAST (lines 1621-1622) are unreachable +;; from RFL. Documented as dead code in test report. + +;; =================================================================== +;; I64 binary via fused path: AND/OR on boolean columns (lines 695-696) +;; When dt=I64 but opcode=AND/OR — hits the i64 branch +;; NOTE: In practice the fused eval produces dt=BOOL for AND/OR ops (RAY_BOOL), +;; so the dt==I64 AND/OR path in expr_exec_binary is dead code. +;; The bool branch (lines 785-786) is the live path. +;; =================================================================== + +;; AND/OR over BOOL columns: dt=BOOL path (line 785-786) +(at (select {r: (and p q) from: Tb}) 'r) -- [true false false false] +(at (select {r: (or p q) from: Tb}) 'r) -- [true true true false] + +;; =================================================================== +;; I64 idiv via binary_range line 1708 (floor div via double path) +;; div maps to OP_IDIV; sign-correct floor division +;; =================================================================== +(at (select {r: (div a b) from: Ti}) 'r) -- [3 5 6 6 7] + +;; Floor div semantics: rounds toward -infinity +(set Tdiv (table [a b] (list [7 -7 7 -7 0] [2 2 -2 -2 3]))) +(at (select {r: (div a b) from: Tdiv}) 'r) -- [3 -4 -4 3 0] + +;; =================================================================== +;; I64 add, sub, mul via fused path on table columns +;; =================================================================== +(at (select {r: (+ a b) from: Ti}) 'r) -- [13 24 35 46 57] +(at (select {r: (- a b) from: Ti}) 'r) -- [7 16 25 34 43] +(at (select {r: (* a b) from: Ti}) 'r) -- [30 80 150 240 350] +(at (select {r: (% a b) from: Ti}) 'r) -- [1 0 0 4 1] + +;; =================================================================== +;; F64 comparisons via fused path (line 759-766 in expr_exec_binary) +;; These hit the NaN-null-aware comparison path +;; =================================================================== +(set Tcmp (table [x y] (list [1.0 2.0 3.0] [3.0 2.0 1.0]))) +(at (select {r: (== x y) from: Tcmp}) 'r) -- [false true false] +(at (select {r: (!= x y) from: Tcmp}) 'r) -- [true false true] +(at (select {r: (< x y) from: Tcmp}) 'r) -- [true false false] +(at (select {r: (> x y) from: Tcmp}) 'r) -- [false false true] +(at (select {r: (<= x y) from: Tcmp}) 'r) -- [true true false] +(at (select {r: (>= x y) from: Tcmp}) 'r) -- [false true true] + +;; =================================================================== +;; Chained fused expr: multiple ops compiled into one pass +;; =================================================================== +;; (a + b) * 2 — compiled as one expression tree +(at (select {r: (* (+ a b) 2) from: Ti}) 'r) -- [26 48 70 92 114] + +;; (x * y) + 0.5 +(at (select {r: (+ (* x y) 0.5) from: Tf}) 'r) -- [5.5 8.5 9.5 8.5 5.5] + +;; Nested arithmetic fused over I64 columns +(set Tdeep (table [a b c] (list [1 2 3 4 5] [10 20 30 40 50] [100 200 300 400 500]))) +(at (select {r: (+ a (+ b c)) from: Tdeep}) 'r) -- [111 222 333 444 555] +(at (select {r: (- (* a b) c) from: Tdeep}) 'r) -- [-90 -160 -210 -240 -250] + +;; =================================================================== +;; Type promotion in fused: I64 col in F64 context (CAST I64→F64 inserted) +;; Exercises expr_ensure_type which inserts OP_CAST into the compiled expr +;; =================================================================== +(set Tmix (table [i f] (list [1 2 3 4 5] [0.1 0.2 0.3 0.4 0.5]))) +(at (select {r: (+ i f) from: Tmix}) 'r) -- [1.1 2.2 3.3 4.4 5.5] +(type (at (select {r: (+ i f) from: Tmix}) 'r)) -- 'F64 + +;; I64 col / F64 col (OP_DIV forces F64 output, CAST inserted for i) +(at (select {r: (/ i f) from: Tmix}) 'r) -- [10.0 10.0 10.0 10.0 10.0] + +;; Comparison mixed: I64 col vs F64 value triggers type promotion +(at (select {r: (> i 2.5) from: Tmix}) 'r) -- [false false true true true] diff --git a/test/rfl/expr/narrow_binary.rfl b/test/rfl/expr/narrow_binary.rfl new file mode 100644 index 00000000..69bae107 --- /dev/null +++ b/test/rfl/expr/narrow_binary.rfl @@ -0,0 +1,140 @@ +;; Coverage for exec_elementwise_binary with narrow integer output types: +;; I32 (RAY_I32/DATE/TIME), I16, U8 paths in binary_range (expr.c ~1714-1752). +;; +;; All operations here use direct vector-vs-vector or vector-vs-scalar forms +;; (no table context) so the fused expr_compile path is bypassed and +;; exec_elementwise_binary → binary_range is called directly. + +;; =================================================================== +;; I32 arithmetic: +, -, *, %, div, and null propagation +;; =================================================================== + +;; I32 vec + I32 vec +(+ [1i 2i 3i] [4i 5i 6i]) -- [5 7 9] +(type (+ [1i 2i 3i] [4i 5i 6i])) -- 'I32 + +;; I32 vec - I32 vec +(- [4i 5i 6i] [1i 2i 3i]) -- [3 3 3] +(type (- [4i 5i 6i] [1i 2i 3i])) -- 'I32 + +;; I32 vec * I32 vec +(* [2i 3i 4i] [3i 4i 5i]) -- [6 12 20] +(type (* [2i 3i 4i] [3i 4i 5i])) -- 'I32 + +;; I32 floor mod — sign follows divisor (Python semantics) +(% [10i 11i 12i] [3i 4i 5i]) -- [1 3 2] +(type (% [10i 11i 12i] [3i 4i 5i])) -- 'I32 + +;; I32 mod sign correction: negative dividend / positive divisor +(% [10i -10i 10i -10i] [3i 3i -3i -3i]) -- [1 2 -2 -1] + +;; I32 floor div (integer path) +(div [10i 11i 12i] [3i 4i 5i]) -- [3 2 2] +(type (div [10i 11i 12i] [3i 4i 5i])) -- 'I32 + +;; I32 floor div rounds toward -infinity +(div [-7i 7i -7i] [2i -2i -2i]) -- [-4 -4 3] + +;; I32 div by zero yields null +(nil? (at (div [10i 11i 12i] [3i 0i 5i]) 1)) -- true +(at (div [10i 11i 12i] [3i 0i 5i]) 0) -- 3 +(at (div [10i 11i 12i] [3i 0i 5i]) 2) -- 2 +(sum (map nil? (div [10i 11i 12i] [3i 0i 5i]))) -- 1 + +;; I32 mod by zero yields null +(nil? (at (% [10i 11i 12i] [3i 0i 5i]) 1)) -- true +(at (% [10i 11i 12i] [3i 0i 5i]) 0) -- 1 +(at (% [10i 11i 12i] [3i 0i 5i]) 2) -- 2 +(sum (map nil? (% [10i 11i 12i] [3i 0i 5i]))) -- 1 + +;; I32 null propagates through +/-/* +(+ [1i 2i 3i] 0Ni) -- [0Ni 0Ni 0Ni] +(sum (map nil? (+ [1i 2i 3i] 0Ni))) -- 3 +(sum (map nil? (- [1i 2i 3i] 0Ni))) -- 3 +(sum (map nil? (* [1i 2i 3i] 0Ni))) -- 3 + +;; =================================================================== +;; I16 arithmetic: +, -, *, %, div +;; =================================================================== + +;; I16 vec + I16 vec +(+ [1h 2h 3h] [4h 5h 6h]) -- [5 7 9] +(type (+ [1h 2h 3h] [4h 5h 6h])) -- 'I16 + +;; I16 vec - I16 vec +(- [4h 5h 6h] [1h 2h 3h]) -- [3 3 3] +(type (- [4h 5h 6h] [1h 2h 3h])) -- 'I16 + +;; I16 vec * I16 vec +(* [2h 3h 4h] [3h 4h 5h]) -- [6 12 20] +(type (* [2h 3h 4h] [3h 4h 5h])) -- 'I16 + +;; I16 floor mod +(% [10h 11h 12h] [3h 4h 5h]) -- [1 3 2] +(type (% [10h 11h 12h] [3h 4h 5h])) -- 'I16 + +;; I16 floor div +(div [10h 11h 12h] [3h 4h 5h]) -- [3 2 2] +(type (div [10h 11h 12h] [3h 4h 5h])) -- 'I16 + +;; I16 div by zero yields null +(nil? (at (div [10h 11h 12h] [3h 0h 5h]) 1)) -- true +(at (div [10h 11h 12h] [3h 0h 5h]) 0) -- 3 +(at (div [10h 11h 12h] [3h 0h 5h]) 2) -- 2 +(sum (map nil? (div [10h 11h 12h] [3h 0h 5h]))) -- 1 + +;; I16 mod by zero yields null +(nil? (at (% [10h 11h 12h] [3h 0h 5h]) 1)) -- true +(at (% [10h 11h 12h] [3h 0h 5h]) 0) -- 1 +(at (% [10h 11h 12h] [3h 0h 5h]) 2) -- 2 +(sum (map nil? (% [10h 11h 12h] [3h 0h 5h]))) -- 1 + +;; I16 null propagates through + +(sum (map nil? (+ [1h 2h 0Nh] 1h))) -- 1 +(sum (map nil? (- [1h 2h 0Nh] [10h 20h 30h]))) -- 1 + +;; =================================================================== +;; U8 arithmetic: +, -, *, %, div +;; =================================================================== + +;; U8 vec + U8 vec +(+ (as 'U8 [1 2 3]) (as 'U8 [4 5 6])) -- [0x05 0x07 0x09] +(type (+ (as 'U8 [1 2 3]) (as 'U8 [4 5 6]))) -- 'U8 + +;; U8 vec - U8 vec +(- (as 'U8 [10 20 30]) (as 'U8 [1 2 3])) -- [0x09 0x12 0x1b] +(type (- (as 'U8 [10 20 30]) (as 'U8 [1 2 3]))) -- 'U8 + +;; U8 vec * U8 vec +(* (as 'U8 [2 3 4]) (as 'U8 [3 4 5])) -- [0x06 0x0c 0x14] +(type (* (as 'U8 [2 3 4]) (as 'U8 [3 4 5]))) -- 'U8 + +;; U8 floor mod +(% (as 'U8 [10 11 12]) (as 'U8 [3 4 5])) -- [0x01 0x03 0x02] +(type (% (as 'U8 [10 11 12]) (as 'U8 [3 4 5]))) -- 'U8 + +;; U8 floor div +(div (as 'U8 [10 11 12]) (as 'U8 [3 4 5])) -- [0x03 0x02 0x02] +(type (div (as 'U8 [10 11 12]) (as 'U8 [3 4 5]))) -- 'U8 + +;; =================================================================== +;; I32/I16 comparison ops (lines 699 region, through binary_range) +;; =================================================================== + +;; I32 comparisons — vec vs vec +(== [1i 2i 3i] [2i 2i 2i]) -- [false true false] +(!= [1i 2i 3i] [2i 2i 2i]) -- [true false true] +(< [1i 2i 3i] [2i 2i 2i]) -- [true false false] +(<= [1i 2i 3i] [2i 2i 2i]) -- [true true false] +(> [1i 2i 3i] [2i 2i 2i]) -- [false false true] +(>= [1i 2i 3i] [2i 2i 2i]) -- [false true true] + +;; I16 comparisons — vec vs vec +(== [1h 2h 3h] [2h 2h 2h]) -- [false true false] +(< [1h 2h 3h] [2h 2h 2h]) -- [true false false] +(> [1h 2h 3h] [2h 2h 2h]) -- [false false true] + +;; U8 comparisons +(== (as 'U8 [1 2 3]) (as 'U8 [2 2 2])) -- [false true false] +(< (as 'U8 [1 2 3]) (as 'U8 [2 2 2])) -- [true false false] +(> (as 'U8 [1 2 3]) (as 'U8 [2 2 2])) -- [false false true] diff --git a/test/rfl/expr/narrow_cast.rfl b/test/rfl/expr/narrow_cast.rfl new file mode 100644 index 00000000..35bafee5 --- /dev/null +++ b/test/rfl/expr/narrow_cast.rfl @@ -0,0 +1,56 @@ +;; Regression: (as 'NARROW expr) inside select{}. +;; +;; Before fix: DAG OP_CAST with narrow output type (I32/I16/U8) was +;; silently broken — fused expr_exec_unary had no dt=I32/I16/U8 branch +;; (scratch stayed uninitialised → memcpy copied garbage to result), +;; and exec_elementwise_unary handled only widening (narrow→I64/F64) +;; with no in_type=I64/F64 → narrow_out case. Result: zeros (or +;; garbage if the non-fused path was reached). +;; +;; After fix: both paths handle I64/F64 → I32/I16/U8/BOOL narrowing +;; via truncating cast (BOOL uses truthy semantics, not low-byte +;; truncation). + +(set T (table [s] (list ["x"]))) + +;; ── const → narrow ─────────────────────────────────────────────── +(at (at (select {x: (as 'I32 5) from: T}) 'x) 0) -- 5 +(at (at (select {x: (as 'I32 99) from: T}) 'x) 0) -- 99 +(at (at (select {x: (as 'I32 -42) from: T}) 'x) 0) -- -42 +(at (at (select {x: (as 'I16 5) from: T}) 'x) 0) -- 5 +(at (at (select {x: (as 'I16 -42) from: T}) 'x) 0) -- -42 +(at (at (select {x: (as 'U8 5) from: T}) 'x) 0) -- 0x05 +(at (at (select {x: (as 'U8 255) from: T}) 'x) 0) -- 0xFF + +;; ── BOOL — truthy, not low-byte truncation ────────────────────── +(at (at (select {x: (as 'BOOL 0) from: T}) 'x) 0) -- false +(at (at (select {x: (as 'BOOL 1) from: T}) 'x) 0) -- true +(at (at (select {x: (as 'BOOL 7) from: T}) 'x) 0) -- true +;; 256 is non-zero — truthy. Low byte = 0, but BOOL cast uses +;; truthy semantics, so result is true (not 0). +(at (at (select {x: (as 'BOOL 256) from: T}) 'x) 0) -- true + +;; ── column → narrow (vector path) ─────────────────────────────── +(set Tc (table [v] (list [1 0 5 100000 0]))) +(set Rb (at (select {b: (as 'BOOL v) from: Tc}) 'b)) +(at Rb 0) -- true +(at Rb 1) -- false +(at Rb 2) -- true +(at Rb 3) -- true +(at Rb 4) -- false +(set Ri (at (select {b: (as 'I32 v) from: Tc}) 'b)) +(at Ri 0) -- 1 +(at Ri 1) -- 0 +(at Ri 2) -- 5 +(at Ri 3) -- 100000 +(at Ri 4) -- 0 + +;; ── F64 → narrow (truncation) ─────────────────────────────────── +(at (at (select {x: (as 'I32 3.7) from: T}) 'x) 0) -- 3 +(at (at (select {x: (as 'I32 -2.5) from: T}) 'x) 0) -- -2 +(at (at (select {x: (as 'BOOL 0.0) from: T}) 'x) 0) -- false +(at (at (select {x: (as 'BOOL 3.14) from: T}) 'x) 0) -- true + +;; ── narrow → narrow (no-op same type) ─────────────────────────── +(set Tn (table [v] (list (as 'I32 [1 2 3])))) +(at (at (select {x: (as 'I32 v) from: Tn}) 'x) 0) -- 1 diff --git a/test/rfl/expr/null_compare.rfl b/test/rfl/expr/null_compare.rfl new file mode 100644 index 00000000..5de2a1da --- /dev/null +++ b/test/rfl/expr/null_compare.rfl @@ -0,0 +1,130 @@ +;; Coverage for null-comparison semantics in expr.c: +;; fix_null_comparisons (line 1132): null-as-minimum convention +;; set_all_null (line 1180): broadcast null when one operand is a null scalar +;; propagate_nulls_binary (line 1222): null propagation from vec inputs +;; +;; Null semantics: null is treated as minimum-value (less than everything). +;; null == null → true +;; null < x → true (for any non-null x) +;; x > null → true +;; null != x → true (for any non-null x) + +;; =================================================================== +;; Scalar vs scalar null comparisons (fix_null_comparisons ln_s && rn_s path) +;; =================================================================== + +;; Both null (line 1163-1165) +(== 0Nl 0Nl) -- true +(<= 0Nl 0Nl) -- true +(>= 0Nl 0Nl) -- true +(!= 0Nl 0Nl) -- false +(< 0Nl 0Nl) -- false +(> 0Nl 0Nl) -- false + +;; Left null only (line 1168-1170) +(< 0Nl 5) -- true +(<= 0Nl 5) -- true +(!= 0Nl 5) -- true +(== 0Nl 5) -- false +(> 0Nl 5) -- false +(>= 0Nl 5) -- false + +;; Right null only (line 1172-1173) +(> 5 0Nl) -- true +(>= 5 0Nl) -- true +(!= 5 0Nl) -- true +(== 5 0Nl) -- false +(< 5 0Nl) -- false +(<= 5 0Nl) -- false + +;; =================================================================== +;; Null scalar vs vector (set_all_null path in propagate_nulls_binary) +;; All elements get the null-semantics fill +;; =================================================================== + +;; rhs null scalar: all results from null semantics +(== [1 2 3] 0Nl) -- [false false false] +(!= [1 2 3] 0Nl) -- [true true true] +(< [1 2 3] 0Nl) -- [false false false] +(> [1 2 3] 0Nl) -- [true true true] +(<= [1 2 3] 0Nl) -- [false false false] +(>= [1 2 3] 0Nl) -- [true true true] + +;; lhs null scalar: null < anything +(== 0Nl [1 2 3]) -- [false false false] +(!= 0Nl [1 2 3]) -- [true true true] +(< 0Nl [1 2 3]) -- [true true true] +(> 0Nl [1 2 3]) -- [false false false] +(<= 0Nl [1 2 3]) -- [true true true] +(>= 0Nl [1 2 3]) -- [false false false] + +;; =================================================================== +;; One-sided null vector (fix_null_comparisons fast path ln 1146-1155) +;; Exactly one side has nulls, the other is non-null scalar +;; =================================================================== + +;; lhs vector has nulls, rhs is a non-null scalar (l_has ^ r_has = true) +(== [0Nl 1 2 3] 2) -- [false false true false] +(!= [0Nl 1 2 3] 2) -- [true true false true] +(< [0Nl 1 2 3] 2) -- [true true false false] +(> [0Nl 1 2 3] 2) -- [false false false true] +(<= [0Nl 1 2 3] 2) -- [true true true false] +(>= [0Nl 1 2 3] 2) -- [false false true true] + +;; rhs vector has nulls, lhs is a non-null scalar +(== 2 [0Nl 1 2 3]) -- [false false true false] +(!= 2 [0Nl 1 2 3]) -- [true true false true] +(< 2 [0Nl 1 2 3]) -- [false false false true] +(> 2 [0Nl 1 2 3]) -- [true true false false] + +;; =================================================================== +;; Two-sided null vectors (general loop line 1158+) +;; Both lhs and rhs vectors have nulls at different positions +;; =================================================================== + +;; Nulls on different sides +;; pos 0: lhs=null, rhs=1 -> null==1 = false, null!=1 = true, null<1 = true, null>1 = false +;; pos 1: lhs=1, rhs=null -> 1==null = false, 1!=null = true, 1null = true +;; pos 2: both non-null +(== [0Nl 1 2] [1 0Nl 2]) -- [false false true] +(!= [0Nl 1 2] [1 0Nl 2]) -- [true true false] +(< [0Nl 1 2] [1 0Nl 2]) -- [true false false] +(> [0Nl 1 2] [1 0Nl 2]) -- [false true false] + +;; Both null at same position (both-null branch) +(== [0Nl 1 2] [0Nl 1 2]) -- [true true true] +(<= [0Nl 1 2] [0Nl 1 2]) -- [true true true] +(>= [0Nl 1 2] [0Nl 1 2]) -- [true true true] +(!= [0Nl 1 2] [0Nl 1 2]) -- [false false false] +(< [0Nl 1 2] [0Nl 1 2]) -- [false false false] +(> [0Nl 1 2] [0Nl 1 2]) -- [false false false] + +;; Mix: one both-null position, one left-null, one right-null +;; pos 0: lhs=null, rhs=1 -> null<1 = true; null>=1 = false +;; pos 1: lhs=1, rhs=null -> 1=null = true +;; pos 2: both null -> null=null = true +(< [0Nl 1 0Nl] [1 0Nl 0Nl]) -- [true false false] +(>= [0Nl 1 0Nl] [1 0Nl 0Nl]) -- [false true true] + +;; =================================================================== +;; set_all_null for different result types: +;; F64 null scalar in arithmetic forces all-null result (line 1184-1187) +;; =================================================================== + +;; F64 null propagates through arithmetic +(+ [1.0 2.0 3.0] 0Nf) -- [0Nf 0Nf 0Nf] +(sum (map nil? (+ [1.0 2.0 3.0] 0Nf))) -- 3 +(sum (map nil? (* [1.0 2.0] 0Nf))) -- 2 + +;; I32 null propagates through arithmetic (set_all_null I32 path, line 1199) +(+ [1i 2i 3i] 0Ni) -- [0Ni 0Ni 0Ni] +(sum (map nil? (+ [1i 2i 3i] 0Ni))) -- 3 +(sum (map nil? (* [1i 2i 3i] 0Ni))) -- 3 +(sum (map nil? (- [1i 2i 3i] 0Ni))) -- 3 + +;; I16 null propagates through arithmetic (set_all_null I16 path, line 1204) +(sum (map nil? (+ [1h 2h 3h] 0Nh))) -- 3 +(sum (map nil? (- [1h 2h 3h] 0Nh))) -- 3 + +;; I64 null propagates (confirmed working baseline) +(sum (map nil? (+ [1 2 3] 0Nl))) -- 3 diff --git a/test/rfl/expr/null_propagation.rfl b/test/rfl/expr/null_propagation.rfl new file mode 100644 index 00000000..837ac824 --- /dev/null +++ b/test/rfl/expr/null_propagation.rfl @@ -0,0 +1,135 @@ +;; Coverage for null propagation through element-wise ops in expr.c: +;; propagate_nulls (line 1092): called when source vector has nulls +;; propagate_nulls_binary (line 1222): binary ops, both vec and scalar inputs +;; exec_elementwise_unary null propagation (line 1398-1406) +;; div/mod zero-divisor null marking (lines 1999-2022) + +;; =================================================================== +;; Arithmetic: null in lhs/rhs propagates to result +;; =================================================================== + +;; lhs vec null → result null at that position +(+ [1 0Nl 3] 10) -- [11 0Nl 13] +(nil? (at (+ [1 0Nl 3] 10) 1)) -- true + +;; rhs vec null propagates +(+ 10 [1 0Nl 3]) -- [11 0Nl 13] +(nil? (at (+ 10 [1 0Nl 3]) 1)) -- true + +;; Both vectors with nulls at different positions +(+ [0Nl 1 2] [3 0Nl 5]) -- [0Nl 0Nl 7] +(sum (map nil? (+ [0Nl 1 2] [3 0Nl 5]))) -- 2 + +;; Null propagates through mul/sub/mod +(* [1 0Nl 3] 2) -- [2 0Nl 6] +(nil? (at (* [1 0Nl 3] 2) 1)) -- true + +(- [0Nl 5 10] [1 2 3]) -- [0Nl 3 7] +(nil? (at (- [0Nl 5 10] [1 2 3]) 0)) -- true + +(% [10 0Nl 12] 3) -- [1 0Nl 0] +(nil? (at (% [10 0Nl 12] 3) 1)) -- true + +;; =================================================================== +;; Unary ops with null input: propagate_nulls called (line 1398-1406) +;; =================================================================== + +;; neg with null +(neg [0Nl 1 -2]) -- [0Nl -1 2] +(nil? (at (neg [0Nl 1 -2]) 0)) -- true + +;; abs with null +(abs [0Nl -1 2]) -- [0Nl 1 2] +(nil? (at (abs [0Nl -1 2]) 0)) -- true + +;; sqrt with null +(sqrt [0Nl 4.0 9.0]) -- [0Nf 2.0 3.0] +(nil? (at (sqrt [0Nl 4.0 9.0]) 0)) -- true + +;; log with null +(log [0Nl 1.0]) -- [0Nf 0.0] +(nil? (at (log [0Nl 1.0]) 0)) -- true + +;; exp with null +(exp [0Nl 0.0]) -- [0Nf 1.0] +(nil? (at (exp [0Nl 0.0]) 0)) -- true + +;; ceil/floor with null +(ceil [0Nl 1.2 2.8]) -- [0Nf 2.0 3.0] +(nil? (at (ceil [0Nl 1.2 2.8]) 0)) -- true + +(floor [0Nl 1.2 2.8]) -- [0Nf 1.0 2.0] +(nil? (at (floor [0Nl 1.2 2.8]) 0)) -- true + +;; =================================================================== +;; ISNULL: special case — flip null to 1 (line 1399-1403) +;; =================================================================== + +;; isnull over nullable vector +(map nil? [0Nl 1 2 0Nl 3]) -- [true false false true false] + +;; sum of nulls +(sum (map nil? [0Nl 1 2 0Nl 3])) -- 2 + +;; isnull over non-null vector: all false +(map nil? [1 2 3 4 5]) -- [false false false false false] + +;; =================================================================== +;; div/mod zero-divisor null marking (line 1999-2022) +;; =================================================================== + +;; I64 vec divisor with zeros (line 2001-2004) +(div [10 11 12 13] [3 0 5 0]) -- [3 0Nl 2 0Nl] +(nil? (at (div [10 11 12 13] [3 0 5 0]) 1)) -- true +(nil? (at (div [10 11 12 13] [3 0 5 0]) 3)) -- true + +;; I64 vec mod with zero (same null-marking) +(% [10 11 12] [3 0 5]) -- [1 0Nl 2] +(nil? (at (% [10 11 12] [3 0 5]) 1)) -- true + +;; I32 vec divisor with zero (line 2005-2008) +(nil? (at (div [10i 11i 12i] [3i 0i 5i]) 1)) -- true +(at (div [10i 11i 12i] [3i 0i 5i]) 0) -- 3 +(at (div [10i 11i 12i] [3i 0i 5i]) 2) -- 2 +(sum (map nil? (div [10i 11i 12i] [3i 0i 5i]))) -- 1 + +;; I32 vec mod with zero +(nil? (at (% [10i 11i 12i] [3i 0i 5i]) 1)) -- true +(at (% [10i 11i 12i] [3i 0i 5i]) 0) -- 1 +(at (% [10i 11i 12i] [3i 0i 5i]) 2) -- 2 +(sum (map nil? (% [10i 11i 12i] [3i 0i 5i]))) -- 1 + +;; Scalar zero divisor — all results become null (line 2011-2021) +;; Integer scalar divisor zero +(div [10 11 12] 0) -- [0Nl 0Nl 0Nl] +(sum (map nil? (div [10 11 12] 0))) -- 3 + +(% [10 11 12] 0) -- [0Nl 0Nl 0Nl] +(sum (map nil? (% [10 11 12] 0))) -- 3 + +;; F64 scalar divisor zero +(/ [1.0 2.0 3.0] 0.0) -- [0Nf 0Nf 0Nf] +(sum (map nil? (/ [1.0 2.0 3.0] 0.0))) -- 3 + +;; Atom div by zero +(div 10 0) -- 0Nl +(nil? (div 10 0)) -- true + +(% 10 0) -- 0Nl +(nil? (% 10 0)) -- true + +(/ 5 0) -- 0Nf +(nil? (/ 5 0)) -- true + +;; =================================================================== +;; Null propagation through comparisons: no-propagate for EQ..OR +;; (op_propagates_null returns false for comparison/logical ops) +;; =================================================================== + +;; Comparison: null in vector does NOT propagate — instead fix_null_comparisons applies +(count (== [0Nl 1 2] 1)) -- 3 +(count (!= [0Nl 1 2] 1)) -- 3 + +;; Arithmetic: null DOES propagate +(count (+ [0Nl 1 2] 1)) -- 3 +(sum (map nil? (+ [0Nl 1 2] 1))) -- 1 diff --git a/test/rfl/graph/traverse_coverage.rfl b/test/rfl/graph/traverse_coverage.rfl new file mode 100644 index 00000000..decc7afd --- /dev/null +++ b/test/rfl/graph/traverse_coverage.rfl @@ -0,0 +1,238 @@ +;; Coverage tests for src/ops/traverse.c — targeting uncovered regions +;; not exercised by graph_basic.rfl and graph_advanced.rfl. +;; +;; Key areas targeted: +;; 1. exec_cluster_coeff — degree>=2 triangle counting branch +;; 2. exec_connected_comp — disconnected graph (multiple components) +;; 3. exec_dijkstra — negative weight domain error + out-of-range +;; 4. exec_shortest_path — direction==2 (bidirectional) + src==dst case +;; 5. exec_var_expand — direction==2 (both CSRs), direction==1, start OOB +;; 6. exec_topsort — cycle detection domain error +;; 7. exec_degree_cent — full _in_degree/_out_degree/_degree columns +;; 8. exec_expand — zero-degree source node (empty result) +;; 9. exec_dfs — from sink node + range check +;; 10. exec_random_walk — range error for OOB start +;; 11. exec_shortest_path — unreachable dst (range error return) +;; 12. exec_mst — graph with equal-weight edges +;; 13. exec_louvain — two_m==0 guard (single isolated node) +;; 14. exec_pagerank — dangling nodes (sink with no out-edges) +;; 15. exec_betweenness — node with no fwd edges but rev edges + +;; ===================================================================== +;; Fixture A: undirected K4 (complete graph on 4 nodes). +;; Every node has 3 undirected neighbors -> LCC = 1.0 for all nodes. +;; Edges: bidirectional 0<->1, 0<->2, 0<->3, 1<->2, 1<->3, 2<->3 +;; ===================================================================== +(set K4e (table [src dst w] (list [0 1 0 2 0 3 1 2 1 3 2 3] [1 0 2 0 3 0 2 1 3 1 3 2] [1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0]))) +(set K4 (.graph.build K4e 'src 'dst 'w)) + +;; --- exec_cluster_coeff: deg>=2 branch -> triangles counted --- +(set Ck (.graph.cluster K4)) +(count Ck) -- 4 +(count (distinct (at Ck '_node))) -- 4 +;; In K4 every node has all its neighbors connected -> LCC = 1.0 +(>= (min (at Ck '_coefficient)) 0.9) -- true +(<= (max (at Ck '_coefficient)) 1.0) -- true + +;; --- exec_cluster_coeff: deg<2 path -> coefficient = 0.0 --- +;; Simple chain: 0->1->2 (no triangles; node 0 and 2 have deg<2) +(set ChainE (table [src dst w] (list [0 1] [1 2] [1.0 1.0]))) +(set Chain (.graph.build ChainE 'src 'dst 'w)) +(set Cc (.graph.cluster Chain)) +(count Cc) -- 3 +;; Node 1 is interior but fwd + rev neighbors are {0, 2} - no fwd edge 0->2 +;; so triangle count = 0, LCC = 0 +(>= (min (at Cc '_coefficient)) 0.0) -- true + +;; ===================================================================== +;; Fixture B: Disconnected graph. +;; Component {0,1}, isolated node 2, component {3,4}. +;; ===================================================================== +(set DiscE (table [src dst w] (list [0 1 3] [1 0 4] [1.0 1.0 1.0]))) +(set Disc (.graph.build DiscE 'src 'dst 'w)) + +;; --- exec_connected_comp: multiple components --- +(set Comp (.graph.connected Disc)) +(count Comp) -- 5 +;; Exactly 3 distinct component IDs: {0,1} form one, 2 is isolated, {3,4} another +(count (distinct (at Comp '_component))) -- 3 + +;; ===================================================================== +;; Fixture C: simple 3-node path 0->1->2 with unit weights. +;; ===================================================================== +(set PathE (table [src dst w] (list [0 1] [1 2] [1.0 1.0]))) +(set Path (.graph.build PathE 'src 'dst 'w)) + +;; --- exec_dijkstra: negative weight domain error --- +(set NegE (table [src dst w] (list [0] [1] [-1.0]))) +(set NegG (.graph.build NegE 'src 'dst 'w)) +(.graph.dijkstra NegG 0) !- domain + +;; --- exec_dijkstra: out-of-range source --- +(.graph.dijkstra Path 99) !- range + +;; --- exec_shortest_path: src==dst returns single-node path --- +(set Sp00 (.graph.shortest-path Path 1 1)) +(count Sp00) -- 1 +(first (at Sp00 '_node)) -- 1 +(first (at Sp00 '_depth)) -- 0 + +;; --- exec_shortest_path: unreachable destination (range error) --- +;; In directed path 0->1->2, going from 2 back to 0 is impossible. +(.graph.shortest-path Path 2 0) !- range + +;; --- exec_shortest_path: out-of-range node --- +(.graph.shortest-path Path 0 99) !- range + +;; ===================================================================== +;; Fixture D: bidirectional 3-node ring 0<->1<->2<->0 +;; ===================================================================== +(set RingE (table [src dst w] (list [0 1 1 2 2 0] [1 0 2 1 0 2] [1.0 1.0 1.0 1.0 1.0 1.0]))) +(set Ring (.graph.build RingE 'src 'dst 'w)) + +;; --- exec_var_expand direction==2 (both CSRs expanded) --- +;; From node 0 expanding both fwd and rev, max depth 1 +;; Fwd neighbors of 0: {1}; Rev neighbors of 0: {2} +(set VeBoth (.graph.var-expand Ring 0 1 1 2)) +(count VeBoth) -- 2 +(first (at VeBoth '_start)) -- 0 +(min (at VeBoth '_depth)) -- 1 +(max (at VeBoth '_depth)) -- 1 + +;; direction==2, depth 2 covers all nodes from 0 +(>= (count (.graph.var-expand Ring 0 1 2 2)) 2) -- true + +;; --- exec_topsort: cycle detection -> domain error --- +(set CycE (table [src dst w] (list [0 1 2] [1 2 0] [1.0 1.0 1.0]))) +(set Cyc (.graph.build CycE 'src 'dst 'w)) +(.graph.topsort Cyc) !- domain + +;; ===================================================================== +;; Fixture E: star graph. Node 0 is hub with 5 spokes (out-only). +;; ===================================================================== +(set StarE (table [src dst w] (list [0 0 0 0 0] [1 2 3 4 5] [1.0 1.0 1.0 1.0 1.0]))) +(set Star (.graph.build StarE 'src 'dst 'w)) + +;; --- exec_degree_cent: _in_degree, _out_degree, _degree columns --- +(set Dg (.graph.degree Star)) +(count Dg) -- 6 +;; Hub node 0: out=5, in=0, total=5 +(set Dg_node (at Dg '_node)) +(set Dg_out (at Dg '_out_degree)) +(set Dg_in (at Dg '_in_degree)) +(set Dg_tot (at Dg '_degree)) +(at Dg_out (at (where (== Dg_node 0)) 0)) -- 5 +(at Dg_in (at (where (== Dg_node 0)) 0)) -- 0 +(at Dg_tot (at (where (== Dg_node 0)) 0)) -- 5 +;; Spoke node 1: out=0, in=1, total=1 +(at Dg_out (at (where (== Dg_node 1)) 0)) -- 0 +(at Dg_in (at (where (== Dg_node 1)) 0)) -- 1 +(at Dg_tot (at (where (== Dg_node 1)) 0)) -- 1 + +;; --- exec_pagerank: dangling node correction (nodes with no out-edges) --- +;; In Star, nodes 1-5 are dangling (no out-edges). PageRank must +;; redistribute their rank each iteration. +(set Pr (.graph.pagerank Star 10 0.85)) +(count Pr) -- 6 +;; All ranks positive +(> (min (at Pr '_rank)) 0.0) -- true +;; Ranks sum to ~1 +(>= (sum (at Pr '_rank)) 0.99) -- true +(<= (sum (at Pr '_rank)) 1.01) -- true + +;; --- exec_expand on hub (out-degree=5) --- +(count (.graph.expand Star 0)) -- 5 +;; --- exec_expand on spoke 1 (out-degree=0) -> empty result --- +(count (.graph.expand Star 1)) -- 0 + +;; --- exec_dfs with out-of-range src -> range error --- +(.graph.dfs Star 99) !- range + +;; --- exec_random_walk with out-of-range src -> range error --- +(.graph.random-walk Star 99) !- range + +;; ===================================================================== +;; Fixture F: bipartite graph K2,3. +;; Left: {0,1}, Right: {2,3,4}. Edges: all 0->right plus all 1->right. +;; ===================================================================== +(set K23e (table [src dst w] (list [0 0 0 1 1 1] [2 3 4 2 3 4] [1.0 1.0 1.0 1.0 1.0 1.0]))) +(set K23 (.graph.build K23e 'src 'dst 'w)) + +;; --- exec_betweenness on bipartite graph --- +(set Bet (.graph.betweenness K23)) +(count Bet) -- 5 +;; All centralities >= 0 +(>= (min (at Bet '_centrality)) 0.0) -- true + +;; --- exec_closeness on bipartite graph --- +(set Clo (.graph.closeness K23)) +(count Clo) -- 5 +(>= (min (at Clo '_centrality)) 0.0) -- true + +;; --- exec_var_expand direction==1 (reverse) --- +;; From node 2, direction=1 (reverse) expands to {0, 1} at depth 1 +(set VeRev (.graph.var-expand K23 2 1 1 1)) +(count VeRev) -- 2 +(first (at VeRev '_start)) -- 2 + +;; --- exec_dijkstra single-source all-reachable --- +(set DjK (.graph.dijkstra K23 0)) +;; From node 0 (fwd): 0 itself + nodes 2,3,4 reachable = 4 nodes +(count DjK) -- 4 +;; Node 0 distance=0 +(set DjK_node (at DjK '_node)) +(set DjK_dist (at DjK '_dist)) +(at DjK_dist (at (where (== DjK_node 0)) 0)) -- 0.0 +;; All reachable nodes have dist < infinity +(<= (max DjK_dist) 2.0) -- true + +;; ===================================================================== +;; Fixture G: simple two-node graph (exercises louvain two_m==0 guard). +;; Single node with no edges. +;; ===================================================================== +(set Solo (table [src dst w] (list [0] [0] [1.0]))) +;; Note: self-loop edge is needed just to build a non-empty edge table, +;; but we want a 1-node graph. Use node IDs 0 only. +(set SoloE (table [src dst w] (list [] [] []))) +;; Build from empty list fails; use single self-loop +(set IsoE (table [src dst w] (list [0] [1] [1.0]))) +(set IsoG (.graph.build IsoE 'src 'dst 'w)) +;; Louvain on 2-node graph (small m) +(set Liso (.graph.louvain IsoG)) +(count Liso) -- 2 +(>= (min (at Liso '_community)) 0) -- true + +;; MST on disconnected graph (returns forest, fewer than n-1 edges) +(set MstDisc (.graph.mst Disc)) +;; Disc has 3 edges: {0->1, 1->0, 3->4}. MST/forest = 2 edges (one per component) +(>= (count MstDisc) 1) -- true +(<= (count MstDisc) 4) -- true + +;; --- exec_mst: equal-weight edges --- +(set EqE (table [src dst w] (list [0 1 2 3] [1 2 3 0] [2.0 2.0 2.0 2.0]))) +(set EqG (.graph.build EqE 'src 'dst 'w)) +(set EqMst (.graph.mst EqG)) +;; MST of a 4-cycle has 3 edges +(count EqMst) -- 3 +;; All weights are 2.0 +(== (min (at EqMst '_weight)) 2.0) -- true + +;; --- exec_k_shortest: disconnected returns empty table --- +(set Knone (.graph.k-shortest Path 2 0 3)) +(count Knone) -- 0 + +;; --- exec_var_expand: start node OOB -> 0 rows --- +(count (.graph.var-expand Path 99 1 3)) -- 0 + +;; Cleanup +(.graph.free K4) +(.graph.free Chain) +(.graph.free Disc) +(.graph.free Path) +(.graph.free NegG) +(.graph.free Ring) +(.graph.free Cyc) +(.graph.free Star) +(.graph.free K23) +(.graph.free IsoG) +(.graph.free EqG) diff --git a/test/rfl/hof/eval_coverage.rfl b/test/rfl/hof/eval_coverage.rfl new file mode 100644 index 00000000..3cb16516 --- /dev/null +++ b/test/rfl/hof/eval_coverage.rfl @@ -0,0 +1,114 @@ +;; eval.c coverage — exercises paths not yet hit by existing tests. + +;; ═══════════════════════════════════════════════════════════════════ +;; 1. NULL handling in unary dispatch (ray_eval RAY_UNARY branch) +;; Only type / nil? / ser should handle null; others → type error. +;; ═══════════════════════════════════════════════════════════════════ +(nil? null) -- true +(nil? 42) -- false +(type "hello") -- 'str +(type 42) -- 'i64 + +;; ── null passed to non-null-safe unary → type error ────────────── +(sum null) !- type +(neg null) !- type +(abs null) !- type +(not null) !- type + +;; ═══════════════════════════════════════════════════════════════════ +;; 2. NULL handling in binary dispatch (== and != are null-safe) +;; ═══════════════════════════════════════════════════════════════════ +(== null null) -- true +(!= null null) -- false +(== null 1) -- false +(!= null 1) -- true +(== 1 null) -- false +(!= 1 null) -- true + +;; ── binary op with null: type error (not == / !=) ──────────────── +(try (+ null 1) (fn [e] -1)) -- -1 +(try (- 5 null) (fn [e] -1)) -- -1 +(try (/ 1 null) (fn [e] -1)) -- -1 +(try (* null 2) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 3. Arity errors in eval dispatch +;; ═══════════════════════════════════════════════════════════════════ +;; UNARY called with wrong argc +(neg 1 2) !- arity + +;; BINARY called with wrong argc +(+ 1 2 3) !- arity + +;; LAMBDA arity mismatch +((fn [x] x) 1 2 3) !- arity +((fn [a b] (+ a b)) 1) !- arity + +;; VARY with > 64 args: domain error +(list 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65) !- domain + +;; ═══════════════════════════════════════════════════════════════════ +;; 4. Head evaluates to non-callable type → type error +;; ═══════════════════════════════════════════════════════════════════ +(try (42 1 2) (fn [e] -1)) -- -1 +(try ("hello" 1) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 5. Name resolution errors +;; ═══════════════════════════════════════════════════════════════════ +undefined_xyz !- name +(undefined_fn 1 2) !- name + +;; ═══════════════════════════════════════════════════════════════════ +;; 6. let / if with lazy materialization +;; ═══════════════════════════════════════════════════════════════════ +;; let should materialize lazy values before binding +(let lz_val (sum [1 2 3 4 5])) +lz_val -- 15 + +;; if should materialize lazy condition +(if (sum [1 2 3]) "truthy" "falsy") -- "truthy" +(if (sum []) "truthy" "falsy") -- "falsy" + +;; ═══════════════════════════════════════════════════════════════════ +;; 7. do form +;; ═══════════════════════════════════════════════════════════════════ +(do (let a 1) (let b 2) (+ a b)) -- 3 +(do (+ 1 1) (+ 2 2) (+ 3 3)) -- 6 + +;; ═══════════════════════════════════════════════════════════════════ +;; 8. Empty list self-evaluates +;; ═══════════════════════════════════════════════════════════════════ +(count ()) -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 9. Special form (BINARY with RAY_FN_SPECIAL_FORM): set / let +;; ═══════════════════════════════════════════════════════════════════ +(set sf_x 77) +sf_x -- 77 +(let sf_y 88) +sf_y -- 88 + +;; ═══════════════════════════════════════════════════════════════════ +;; 10. Error propagation in arg evaluation for BINARY +;; ═══════════════════════════════════════════════════════════════════ +(+ undefined_a 1) !- name +(+ 1 undefined_b) !- name + +;; ═══════════════════════════════════════════════════════════════════ +;; 11. Error propagation in arg evaluation for VARY +;; ═══════════════════════════════════════════════════════════════════ +(list undefined_c 1 2) !- name +(list 1 undefined_d 2) !- name + +;; ═══════════════════════════════════════════════════════════════════ +;; 12. ser handles null (cover ser in null-safe unary set) +;; ═══════════════════════════════════════════════════════════════════ +(count (ser null)) -- 17 + +;; ═══════════════════════════════════════════════════════════════════ +;; 13. Recursion depth guard +;; ═══════════════════════════════════════════════════════════════════ +;; A deeply-recursive call should hit the depth limit gracefully +(set deep_fn (fn [n] (if (<= n 0) 0 (+ 1 (deep_fn (- n 1)))))) +(try (deep_fn 1000) (fn [e] -1)) -- -1 diff --git a/test/rfl/hof/try_raise.rfl b/test/rfl/hof/try_raise.rfl new file mode 100644 index 00000000..5f541c71 --- /dev/null +++ b/test/rfl/hof/try_raise.rfl @@ -0,0 +1,39 @@ +;; try / raise coverage — exercises ray_try_fn and related error-handling paths +;; in eval.c. + +;; ── basic: no error → result returned directly ────────────────────────────── +(try (+ 1 2) (fn [e] -1)) -- 3 +(try (sum [1 2 3]) (fn [e] -1)) -- 6 +(try 42 neg) -- 42 + +;; ── raise + lambda handler ─────────────────────────────────────────────────── +(try (raise 99) (fn [e] e)) -- 99 +(try (raise 10) (fn [e] (+ e 1))) -- 11 +(try (raise 5) (fn [e] (* e 2))) -- 10 + +;; ── raise + unary fn handler (RAY_UNARY handler branch) ───────────────────── +(try (raise 42) neg) -- -42 +(try (raise 7) abs) -- 7 +(try (raise -3) abs) -- 3 + +;; ── runtime error (no raise) → err_val defaults to 0 ──────────────────────── +(try (/ 1 "x") (fn [e] -1)) -- -1 +(try (sum null) (fn [e] -99)) -- -99 +(try (neg null) (fn [e] -2)) -- -2 + +;; ── undefined name error trapped ──────────────────────────────────────────── +(try undefined_name_xyz (fn [e] 0)) -- 0 + +;; ── handler expression itself fails → propagates that error ───────────────── +(try (raise 1) no_such_fn) !- name + +;; ── type error handler: invalid handler type ──────────────────────────────── +(try (raise 1) "not-a-fn") !- type + +;; ── nested try ────────────────────────────────────────────────────────────── +(try (try (raise 5) (fn [e] (raise (+ e 10)))) (fn [e] e)) -- 15 + +;; ── raise inside a lambda called from try ──────────────────────────────────── +(set err_fn (fn [x] (if (< x 0) (raise x) x))) +(try (err_fn -7) (fn [e] (neg e))) -- 7 +(try (err_fn 3) (fn [e] (neg e))) -- 3 diff --git a/test/rfl/hof/vm_coverage.rfl b/test/rfl/hof/vm_coverage.rfl new file mode 100644 index 00000000..bca8ceb8 --- /dev/null +++ b/test/rfl/hof/vm_coverage.rfl @@ -0,0 +1,234 @@ +;; VM-level coverage: OP_TRAP, OP_CALLS (self-recursion), OP_CALLF paths, +;; OP_JMP backward (interrupt check), and call_fn1/call_fn2 error paths. + +;; ═══════════════════════════════════════════════════════════════════ +;; 1. OP_TRAP / OP_TRAP_END via compiled try inside lambda +;; Note: (raise x) inside compiled lambda has a bug — vm_err_obj +;; shadows __raise_val so the user value is lost. Use runtime +;; errors (type/name errors) instead of raise to test trap path. +;; ═══════════════════════════════════════════════════════════════════ +(set trap_fn (fn [x] (try (if (< x 0) (+ x "y") x) (fn [e] -1)))) +(trap_fn 5) -- 5 +(trap_fn -3) -- -1 + +;; No-error path: OP_TRAP_END executed (trap frame popped on success) +(set trap_noop (fn [] (try (+ 1 2) (fn [e] -1)))) +(trap_noop) -- 3 + +;; Type error caught inside lambda +(set trap_type (fn [] (try (+ 1 "x") (fn [e] -1)))) +(trap_type) -- -1 + +;; Name error caught inside lambda +(set trap_name (fn [] (try really_undefined_xyz_abc (fn [e] 0)))) +(trap_name) -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 2. (raise x) inside compiled lambda — user value reaches handler. +;; Regression for prior bug where vm_err_obj (the error sentinel +;; returned by ray_raise_fn) took priority over __raise_val in the +;; VM trap path, so the handler received the sentinel not the +;; user value. Fixed in src/lang/eval.c by preferring __raise_val +;; when set. +;; ═══════════════════════════════════════════════════════════════════ +(set f_raise (fn [x] (try (raise x) (fn [e] e)))) +(f_raise 99) -- 99 +(f_raise "hello") -- "hello" +(f_raise [1 2 3]) -- [1 2 3] +;; Non-raise errors still go through vm_err_obj path. +(set f_arity_err (fn [] (try ((fn [a b] a) 1) (fn [e] -1)))) +(f_arity_err) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 3. OP_CALLS (self-recursive call) +;; ═══════════════════════════════════════════════════════════════════ +(set fib (fn [n] (if (<= n 1) n (+ (fib (- n 1)) (fib (- n 2)))))) +(fib 0) -- 0 +(fib 1) -- 1 +(fib 7) -- 13 + +;; Tail-style recursion with accumulator +(set sum_r (fn [n acc] (if (<= n 0) acc (sum_r (- n 1) (+ acc n))))) +(sum_r 10 0) -- 55 +(sum_r 100 0) -- 5050 + +;; ═══════════════════════════════════════════════════════════════════ +;; 4. OP_CALLF with stored non-lambda fn objects +;; ═══════════════════════════════════════════════════════════════════ +;; Stored unary fn called via OP_CALLF RAY_UNARY path +(set my_neg neg) +(set f_neg (fn [x] (my_neg x))) +(f_neg 5) -- -5 +(f_neg -3) -- 3 + +;; ── arity error for stored UNARY fn via OP_CALLF ───────────────── +(set my_abs abs) +(set f_arity (fn [x y] (my_abs x y))) +(try (f_arity 5 10) (fn [e] -1)) -- -1 + +;; Stored binary fn +(set my_add +) +(set f_add (fn [a b] (my_add a b))) +(f_add 3 4) -- 7 +(f_add 100 200) -- 300 + +;; ── arity error for stored BINARY fn via OP_CALLF ───────────────── +(set my_sub -) +(set f_sub3 (fn [a b c] (my_sub a b c))) +(try (f_sub3 1 2 3) (fn [e] -1)) -- -1 + +;; Stored vary fn +(set my_list list) +(set f_list (fn [a b c] (count (my_list a b c)))) +(f_list 1 2 3) -- 3 + +;; ═══════════════════════════════════════════════════════════════════ +;; 5. call_fn1 and call_fn2 via HOF +;; ═══════════════════════════════════════════════════════════════════ +(fold + 0 [1 2 3 4 5]) -- 15 +(fold * 1 [1 2 3 4]) -- 24 +(scan + [1 2 3 4]) -- [1 3 6 10] +(map neg [1 2 3]) -- [-1 -2 -3] + +;; ── call_fn1 with non-unary/non-lambda → type error ────────────── +(try (map 42 [1 2 3]) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 6. call_lambda arity guard (via call_fn or direct call) +;; ═══════════════════════════════════════════════════════════════════ +(set f2 (fn [a b] (+ a b))) +(try (f2 1) (fn [e] -1)) -- -1 +(try (f2 1 2 3) (fn [e] -1)) -- -1 +(f2 10 20) -- 30 + +;; ═══════════════════════════════════════════════════════════════════ +;; 7. VM arity errors for stored fn called via OP_CALLF +;; ═══════════════════════════════════════════════════════════════════ +(set u_fn (fn [x] (* x 2))) +(u_fn 7) -- 14 +(try (u_fn) (fn [e] -1)) -- -1 +(try (u_fn 1 2) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 8. VM name resolution error inside lambda +;; ═══════════════════════════════════════════════════════════════════ +(set resolve_fn (fn [] really_undefined_name_xyz)) +(try (resolve_fn) (fn [e] 999)) -- 999 + +;; ═══════════════════════════════════════════════════════════════════ +;; 9. Error propagation out of lambda with eval error frame +;; ═══════════════════════════════════════════════════════════════════ +(set err_from_fn (fn [x] (+ x "not-a-num"))) +(try (err_from_fn 5) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 10. OP_CALL1 null arg handling +;; NOTE: op_call1 null paths (lines 1892-1894) check fn pointer +;; equality against ray_nil_fn / ray_type_fn. Passing `null` to +;; a lambda from the tree-walk evaluator hits !args[0] check +;; (eval.c:3089-3090) before the lambda runs, so the VM null +;; path is unreachable from plain RFL. Cover non-null paths: +;; ═══════════════════════════════════════════════════════════════════ +;; non-null arg via op_call1: compiled UNARY call with scalar → else branch +(set call1_neg (fn [x] (neg x))) +(call1_neg 5) -- -5 +(call1_neg -3) -- 3 + +;; op_call1 ATOMIC path: arg is a vec → atomic_map_unary branch +(set call1_neg_v (fn [x] (neg x))) +(at (call1_neg_v [1 2 3]) 0) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 11. OP_CALL2 non-null paths (compiled lambda calling BINARY fn) +;; ═══════════════════════════════════════════════════════════════════ +;; op_call2 with scalars → else branch +(set call2_add (fn [x y] (+ x y))) +(call2_add 3 4) -- 7 + +;; op_call2 ATOMIC path: vec arg → atomic_map_binary_op branch +(set call2_mul (fn [x y] (* x y))) +(at (call2_mul [1 2 3] 2) 1) -- 4 + +;; op_call2 error → vm_error path +(set call2_div (fn [x y] (/ x y))) +(try (call2_div 1 "bad") (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 12. OP_DUP via (let) inside compiled lambda +;; (let x val) compiles to: eval val, OP_DUP, OP_STOREENV slot +;; ═══════════════════════════════════════════════════════════════════ +(set dup_fn (fn [x] (let y (* x 2)) (+ y 1))) +(dup_fn 5) -- 11 +(dup_fn -3) -- -5 + +;; ═══════════════════════════════════════════════════════════════════ +;; 13. call_fn2 RAY_UNARY fallback (line 1094-1097) +;; fold calls call_fn2(fn, acc, elem); if fn is UNARY, only acc is used +;; ═══════════════════════════════════════════════════════════════════ +;; (fold neg 5 [1 2 3]): call_fn2(neg, 5, 1) → neg(5)=-5 +;; call_fn2(neg, -5, 2) → neg(-5)=5 +;; call_fn2(neg, 5, 3) → neg(5)=-5 → result -5 +(fold neg 5 [1 2 3]) -- -5 + +;; ═══════════════════════════════════════════════════════════════════ +;; 14. call_lambda tree-walk fallback (eval.c lines 1668-1704) +;; NOTE: lines 1703-1704 (success path) are structurally +;; unreachable from RFL: +;; - dot-reserved names (compile.c:244) also fail in tree-walk +;; - jump overflow (compile.c:188) requires > 32767 byte body +;; - >256 locals (compile.c:251) also fails at FRAME_CAP=64 in +;; tree-walk (env.c) +;; - OOM paths (lines 105, 144) cannot be triggered from RFL +;; All non-OOM compilation failures also fail in tree-walk. +;; Coverage of error-path lines 1697-1700 is covered via +;; existing error propagation tests above. +;; ═══════════════════════════════════════════════════════════════════ + +;; ═══════════════════════════════════════════════════════════════════ +;; 15. OP_CALLF VM stack overflow (eval.c lines 1989-1994) +;; A compiled lambda with many locals (large callee_locals) triggers +;; vm.sp + callee_locals >= VM_STACK_SIZE before PUSH can fire. +;; The cleanup loop releases fn_args before goto vm_error_limit. +;; ═══════════════════════════════════════════════════════════════════ +;; Lambda with 50 let bindings → callee_locals=51 → fills VM stack in ~20 calls +(set g_deep (fn [n] (let _a1 1) (let _a2 2) (let _a3 3) (let _a4 4) (let _a5 5) (let _a6 6) (let _a7 7) (let _a8 8) (let _a9 9) (let _a10 10) (let _a11 11) (let _a12 12) (let _a13 13) (let _a14 14) (let _a15 15) (let _a16 16) (let _a17 17) (let _a18 18) (let _a19 19) (let _a20 20) (let _a21 21) (let _a22 22) (let _a23 23) (let _a24 24) (let _a25 25) (let _a26 26) (let _a27 27) (let _a28 28) (let _a29 29) (let _a30 30) (let _a31 31) (let _a32 32) (let _a33 33) (let _a34 34) (let _a35 35) (let _a36 36) (let _a37 37) (let _a38 38) (let _a39 39) (let _a40 40) (let _a41 41) (let _a42 42) (let _a43 43) (let _a44 44) (let _a45 45) (let _a46 46) (let _a47 47) (let _a48 48) (let _a49 49) (let _a50 50) (if (<= n 0) _a1 (g_deep (- n 1))))) +(try (g_deep 100) (fn [e] -1)) -- -1 + +;; ═══════════════════════════════════════════════════════════════════ +;; 16. OP_CALL1 lazy arg materialization (eval.c lines 1885-1891) +;; When a compiled lambda calls a non-lazy-aware UNARY fn (e.g. neg) +;; on a lazy value (e.g. sum result), op_call1 materializes it. +;; Note: lines 1886-1890 (materialization failure) require OOM — +;; only line 1885 (successful materialization) is coverable from RFL. +;; ═══════════════════════════════════════════════════════════════════ +;; (neg (sum v)): compiled to LOADCONST neg, LOADCONST sum, RESOLVE v, OP_CALL1, OP_CALL1 +;; The inner OP_CALL1 pushes a lazy result; the outer OP_CALL1 gets lazy arg → line 1885 +(set lazy_neg_fn (fn [v] (neg (sum v)))) +(lazy_neg_fn [1 2 3 4 5]) -- -15 +(lazy_neg_fn [10 20]) -- -30 + +;; OP_CALL2 lazy arg materialization (eval.c lines 1919-1930): +;; When a compiled lambda calls a BINARY fn with a lazy left or right arg. +;; (+ (sum v) 1): outer OP_CALL2 receives lazy left arg from sum → line 1916-1922 +(set lazy_add_fn (fn [v] (+ (sum v) 1))) +(lazy_add_fn [1 2 3]) -- 7 +;; (+ 1 (sum v)): outer OP_CALL2 receives lazy right arg → line 1924-1930 +(set lazy_add_fn2 (fn [v] (+ 1 (sum v)))) +(lazy_add_fn2 [1 2 3]) -- 7 + +;; ═══════════════════════════════════════════════════════════════════ +;; 17. OP_CALLF lazy arg materialization (eval.c lines 2043-2062) +;; When a compiled lambda calls a stored UNARY or BINARY fn via +;; OP_CALLF (fn not resolved at compile time), lazy args are +;; materialized in op_callf before dispatch. +;; ═══════════════════════════════════════════════════════════════════ +;; f is compiled BEFORE my_cf_neg is defined → my_cf_neg unresolved at compile time +;; → OP_CALLF is emitted for (my_cf_neg (sum v)) → lines 2043-2044 fire +(set _callf_f (fn [v] (my_cf_neg (sum v)))) +(set my_cf_neg neg) +(_callf_f [1 2 3]) -- -6 +;; OP_CALLF with lazy left arg for stored BINARY fn +;; _callf_add compiled before my_cf_add → OP_CALLF → lines 2055-2062 fire +(set _callf_add (fn [v] (my_cf_add (sum v) 1))) +(set my_cf_add +) +(_callf_add [1 2 3]) -- 7 diff --git a/test/rfl/io/csv_types.rfl b/test/rfl/io/csv_types.rfl new file mode 100644 index 00000000..f0f31831 --- /dev/null +++ b/test/rfl/io/csv_types.rfl @@ -0,0 +1,309 @@ +;; Coverage for src/io/csv.c — type inference, edge cases, parted writer. +;; +;; Targets (by approximate line number): +;; promote_csv_type line 242: DATE+I64 → STR (other mixed types) +;; promote_csv_type BOOL+I64 → I64 +;; fast_bool: is_null=true path (line 505-506) — invalid boolean string +;; hex_nibble: A-F uppercase case (line 517) +;; fast_guid: bad shape (line 525-526) — uppercase + invalid +;; fast_date/fast_time: out-of-range null paths (lines 425, 436) +;; fast_timestamp: invalid time component (line 487) +;; csv_resolve_inferred_type: B8 inferred (CSV_TYPE_BOOL) +;; build_row_offsets: slow path for embedded newlines +;; build_row_offsets_limited: slow path (quoted data) + fast-path end-of-buf +;; p >= row_end guard (lines 918+): short rows with multiple types +;; ray_csv_save_parted_named_opts (lines 2171+): .csv.parted builtin +;; csv_write_cell LIST type (lines 2650-2665) +;; auto-detect TSV vs CSV +;; CRLF line endings +;; NaN / Inf literal detection and parse +;; +;; ── cleanup stale state from a prior run ──────────────────────────────────── +(.sys.exec "rm -rf rf_test_csv_types_parted rf_test_csv_types_parted2 rf_test_csv_types_parted_q") -- 0 +(.sys.exec "rm -f rf_test_csv_types_*.csv") -- 0 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 1. Type-inference: promote_csv_type — DATE+TIMESTAMP → TIMESTAMP (line 234) +;; A column that has DATE in one row and TIMESTAMP in another must +;; resolve to TIMESTAMP so that no information is lost. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'ts\n2000-01-01\n2001-01-01T12:00:00\n2002-06-15T08:30:45\n' > rf_test_csv_types_mixed_ts.csv") -- 0 +(set Tmts (.csv.read "rf_test_csv_types_mixed_ts.csv")) +(count Tmts) -- 3 +;; DATE+TIMESTAMP promote → TIMESTAMP; second row has time component +(at (at Tmts 'ts) 1) -- 2001.01.01D12:00:00.000000000 +(at (at Tmts 'ts) 2) -- 2002.06.15D08:30:45.000000000 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 2. Type-inference: promote_csv_type — DATE+I64 → STR (line 242, other mixed) +;; DATE and I64 are incompatible temporally; the column must widen to SYM. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'x\n2000-01-01\n42\n2001-06-15\n' > rf_test_csv_types_date_int.csv") -- 0 +(set Tdi (.csv.read "rf_test_csv_types_date_int.csv")) +;; DATE+I64 are incompatible → col resolves to SYM; values preserved as strings +(count Tdi) -- 3 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 3. Type-inference: TIME+I64 → STR (another line-242 hit via different types) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'y\n10:00:00\n42\n' > rf_test_csv_types_time_int.csv") -- 0 +(set Tti (.csv.read "rf_test_csv_types_time_int.csv")) +;; TIME+I64 incompatible → SYM +(count Tti) -- 2 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 4. Type-inference: BOOL+I64 → I64 (numeric promotion path, line 238) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\ntrue\n1\n0\nfalse\n' > rf_test_csv_types_bool_int.csv") -- 0 +(set Tbi (.csv.read "rf_test_csv_types_bool_int.csv")) +;; BOOL+I64 numeric promotion → I64; true→null (bool parsed as I64 returns null) +(count Tbi) -- 4 +(sum (at Tbi 'a)) -- 1 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 5. Type-inference: BOOL+F64 → F64 (numeric promotion, line 237) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\ntrue\n1.5\nfalse\n' > rf_test_csv_types_bool_f64.csv") -- 0 +(set Tbf (.csv.read "rf_test_csv_types_bool_f64.csv")) +;; BOOL+F64 numeric promotion → F64 +(count Tbf) -- 3 +(sum (at Tbf 'a)) -- 1.5 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 6. NaN and Inf literal detection in detect_type (lines 155-162) +;; Both the 3-char form (NaN/Inf) and the 4-char signed form (+Inf/-Inf). +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\nNaN\nInf\n+Inf\n-Inf\nnan\ninf\n1.5\n' > rf_test_csv_types_naninf.csv") -- 0 +(set Tni (.csv.read "rf_test_csv_types_naninf.csv")) +;; NaN/Inf literals all infer as F64 +(count Tni) -- 7 +;; 1.5 is the only finite non-special value; others are NaN/±Inf +(at (at Tni 'a) 6) -- 1.5 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 7. Null sentinel strings: N/A, n/a, NA, na, null, NULL, None, none, . +;; All become UNKNOWN during inference; with only null-sentinels a column +;; resolves to SYM (default: in csv_resolve_inferred_type). +;; Mixing a numeric value forces I64, sentinels become NULL_I64. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\nN/A,1\nn/a,2\nNA,3\nna,4\nnull,5\nNULL,6\nNone,7\nnone,8\n.,9\n1,10\n' > rf_test_csv_types_nulls.csv") -- 0 +(set Tns (.csv.read "rf_test_csv_types_nulls.csv")) +;; column a: null-sentinels become NULL_I64; one real value; column b always I64 +(count Tns) -- 10 +(sum (at Tns 'b)) -- 55 +;; only 1 non-null value in col a +(sum (at Tns 'a)) -- 1 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 8. fast_bool: explicit B8 schema with "maybe" → is_null=true path +;; This hits lines 505-506: *is_null=true; return 0. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'f\ntrue\nmaybe\nfalse\n1\n0\nTRUE\nFALSE\n' > rf_test_csv_types_boolx.csv") -- 0 +(set Tbx (.csv.read [B8] "rf_test_csv_types_boolx.csv")) +(count Tbx) -- 7 +;; "true"=1, "maybe"→fast_bool returns 0, "false"=0, "1"=1, "0"=0, "TRUE"=1, "FALSE"=0 +(sum (at Tbx 'f)) -- 3 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 9. hex_nibble: uppercase A-F (line 517) +;; GUIDs with uppercase hex characters force the A-F branch. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'g\nAABBCCDD-EEFF-1122-3344-AABBCCDDEEFF\nAABBCCDD-EEFF-AABB-CCDD-112233445566\n' > rf_test_csv_types_guid_upper.csv") -- 0 +(set Tgu (.csv.read [GUID] "rf_test_csv_types_guid_upper.csv")) +(count Tgu) -- 2 +;; Write+re-read round-trip confirms GUIDs were parsed correctly (not all-zero) +(.csv.write Tgu "rf_test_csv_types_guid_upper_out.csv") -- 0 +(set rawgu (read "rf_test_csv_types_guid_upper_out.csv")) +(count (split rawgu "\n")) -- 4 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 10. hex_nibble: return -1 (line 518) + fast_guid: bad shape (lines 525-526) +;; Invalid GUID strings → null (0-bytes) with HAS_NULLS. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'g\n550e8400-e29b-41d4-a716-446655440000\nbad-guid\n00000000-ZZZZ-0000-0000-000000000000\nshort\n550e8400-e29b-41d4-a716-446655440000\n' > rf_test_csv_types_guid_bad.csv") -- 0 +(set Tgb (.csv.read [GUID] "rf_test_csv_types_guid_bad.csv")) +(count Tgb) -- 5 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 11. fast_date: out-of-range m>12 and d>31 → is_null=true (line 425) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\n2000-13-01\n2000-00-15\n2000-12-32\n2000-01-01\n' > rf_test_csv_types_baddate.csv") -- 0 +(set Tbd (.csv.read [DATE] "rf_test_csv_types_baddate.csv")) +(count Tbd) -- 4 +;; First three rows are null; last is valid +;; There is no direct way to compare DATE atoms yet; check non-null count via sum +;; Note: NULL_I32 sentinel so sum excludes nulls + +;; ════════════════════════════════════════════════════════════════════════════ +;; 12. fast_time: h>23, mi>59, s>59 → is_null=true (line 436) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\n24:00:00\n12:60:00\n12:00:60\n12:30:45\n' > rf_test_csv_types_badtime.csv") -- 0 +(set Tbt (.csv.read [TIME] "rf_test_csv_types_badtime.csv")) +(count Tbt) -- 4 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 13. fast_timestamp: invalid time component → is_null (line 487) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\n2000-01-01T25:00:00\n2000-01-01T12:61:00\n2000-01-01T12:00:60\n2000-01-01T12:00:00\n' > rf_test_csv_types_badts.csv") -- 0 +(set Tts (.csv.read [TIMESTAMP] "rf_test_csv_types_badts.csv")) +(count Tts) -- 4 +(at (at Tts 'a) 3) -- 2000.01.01D12:00:00.000000000 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 14. TIME with fractional seconds (line 439-447: frac loop + pad loop) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\n10:30:45.123\n23:59:59.999\n00:00:00.000\n12:00:00.5\n08:15:30.12\n' > rf_test_csv_types_timefrac.csv") -- 0 +(set Ttf (.csv.read "rf_test_csv_types_timefrac.csv")) +;; TIME column inferred; fractional-second parser covers loop+pad +(count Ttf) -- 5 +(at (at Ttf 'a) 0) -- 10:30:45.123 +(at (at Ttf 'a) 3) -- 12:00:00.500 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 15. TIMESTAMP with fractional nanoseconds (lines 467-475: frac loop + pad) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'ts\n2000-01-01T12:00:00.123456789\n2001-06-15 08:30:00.000000001\n2002-03-04T00:00:00.1\n' > rf_test_csv_types_tsfrac.csv") -- 0 +(set Ttsf (.csv.read "rf_test_csv_types_tsfrac.csv")) +;; TIMESTAMP column; nanosecond fractional parser covers loop+pad +(count Ttsf) -- 3 +(at (at Ttsf 'ts) 0) -- 2000.01.01D12:00:00.123456789 +(at (at Ttsf 'ts) 2) -- 2002.03.04D00:00:00.100000000 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 16. CRLF line endings: csv parser strips \r before \n +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\r\n1,2\r\n3,4\r\n5,6\r\n' > rf_test_csv_types_crlf.csv") -- 0 +(set Tcrlf (.csv.read "rf_test_csv_types_crlf.csv")) +(count Tcrlf) -- 3 +(sum (at Tcrlf 'a)) -- 9 +(sum (at Tcrlf 'b)) -- 12 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 17. TSV auto-detection (tabs > commas → delimiter = '\t') +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a\tb\n1\t2\n3\t4\n5\t6\n' > rf_test_csv_types_tsv.csv") -- 0 +(set Ttsv (.csv.read "rf_test_csv_types_tsv.csv")) +(count Ttsv) -- 3 +(key Ttsv) -- ['a 'b] +(sum (at Ttsv 'a)) -- 9 +(sum (at Ttsv 'b)) -- 12 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 18. Short row guard (p >= row_end path, lines 918+/1086+) +;; Row 1 has fewer fields than header → remaining columns get null/default. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b,c\n1\n2,3,4\n5,6\n' > rf_test_csv_types_short.csv") -- 0 +(set Tsh (.csv.read "rf_test_csv_types_short.csv")) +(count Tsh) -- 3 +;; col a: 1, 2, 5 +(sum (at Tsh 'a)) -- 8 +;; col b, c: row0 is null, row2 col-c is null +(at (at Tsh 'b) 0) -- 0Nl +(at (at Tsh 'c) 0) -- 0Nl +(at (at Tsh 'c) 2) -- 0Nl + +;; ════════════════════════════════════════════════════════════════════════════ +;; 19. Embedded newline in quoted field: build_row_offsets slow path +;; The file contains a quote → has_quotes=true → slow byte-by-byte path. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\n\"line1\\nline2\",10\nhello,20\n' > rf_test_csv_types_embnl.csv") -- 0 +(set Tenl (.csv.read "rf_test_csv_types_embnl.csv")) +(count Tenl) -- 2 +(sum (at Tenl 'b)) -- 30 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 20. Quoted field with embedded delimiter (slow path + scan_field_quoted) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\n\"hello,world\",1\n\"foo,bar,baz\",2\n' > rf_test_csv_types_embdelim.csv") -- 0 +(set Ted (.csv.read "rf_test_csv_types_embdelim.csv")) +(count Ted) -- 2 +(sum (at Ted 'b)) -- 3 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 21. Escaped quotes within quoted field: scan_field_quoted has_escape=true +;; "he""llo" → he"llo (double-quote escaping, lines 305-341) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\n\"he\"\"llo\",1\n\"say \"\"hi\"\"\",2\n' > rf_test_csv_types_escaped.csv") -- 0 +(set Tesc (.csv.read "rf_test_csv_types_escaped.csv")) +(count Tesc) -- 2 +(sum (at Tesc 'b)) -- 3 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 22. csv_write_cell: LIST column type (lines 2650-2665) +;; A table with a LIST-type column should write each cell as a string. +;; ════════════════════════════════════════════════════════════════════════════ +(set v123 [1 2 3]) +(set Tlist (table [a b] (list (list v123 v123 v123) (list 10 20 30)))) +(.csv.write Tlist "rf_test_csv_types_list.csv") -- 0 +(set rawl (read "rf_test_csv_types_list.csv")) +(set linesl (split rawl "\n")) +;; header + 3 data rows + trailing empty = 5 lines +(count linesl) -- 5 +(at linesl 0) -- "a,b" +(at linesl 1) -- "[1 2 3],10" +(at linesl 2) -- "[1 2 3],20" +(at linesl 3) -- "[1 2 3],30" + +;; ════════════════════════════════════════════════════════════════════════════ +;; 23. .csv.parted basic round-trip (lines 2171+) +;; ray_csv_save_parted_named_opts: single part, integer + float + sym cols. +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b,s\n1,1.5,foo\n2,2.5,bar\n3,3.5,baz\n4,4.5,foo\n5,5.5,bar\n' > rf_test_csv_types_parted.csv") -- 0 +(set Rpart (.csv.parted "rf_test_csv_types_parted.csv" "rf_test_csv_types_parted" 'tbl)) +(count Rpart) -- 5 +(sum (at Rpart 'a)) -- 15 +(sum (at Rpart 'b)) -- 17.5 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 24. .csv.parted with quoted data → build_row_offsets_limited slow path +;; (lines 682-710: has_quotes=true branch in limited offset builder) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\n\"hello,world\",1\n\"foo,bar\",2\n\"baz\",3\n' > rf_test_csv_types_parted_q.csv") -- 0 +(set Rpq (.csv.parted "rf_test_csv_types_parted_q.csv" "rf_test_csv_types_parted_q" 'tq)) +(count Rpq) -- 3 +(sum (at Rpq 'b)) -- 6 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 25. .csv.parted with CRLF and timestamp (parted covers infer + many types) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'id,ts\r\n1,2000-01-01T12:00:00\r\n2,2001-06-15T08:30:00\r\n3,2002-12-31T23:59:59\r\n' > rf_test_csv_types_parted2.csv") -- 0 +(set Rp2 (.csv.parted "rf_test_csv_types_parted2.csv" "rf_test_csv_types_parted2" 'tp)) +(count Rp2) -- 3 +(sum (at Rp2 'id)) -- 6 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 26. No trailing newline (build_row_offsets_limited: p=end fast path exit) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\n1,2\n3,4' > rf_test_csv_types_nonl.csv") -- 0 +(set Tnonl (.csv.read "rf_test_csv_types_nonl.csv")) +(count Tnonl) -- 2 +(sum (at Tnonl 'a)) -- 4 +(sum (at Tnonl 'b)) -- 6 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 27. GUID round-trip write+read: csv_write_guid + fast_guid uppercase +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'g\n550e8400-e29b-41d4-a716-446655440000\n6ba7b810-9dad-11d1-80b4-00c04fd430c8\nAABBCCDD-EEFF-1122-3344-AABBCCDDEEFF\n' > rf_test_csv_types_guid_rt.csv") -- 0 +(set Tgrt (.csv.read [GUID] "rf_test_csv_types_guid_rt.csv")) +(count Tgrt) -- 3 +;; write back and re-read +(.csv.write Tgrt "rf_test_csv_types_guid_written.csv") -- 0 +(set Tgw (.csv.read [GUID] "rf_test_csv_types_guid_written.csv")) +(count Tgw) -- 3 + +;; ════════════════════════════════════════════════════════════════════════════ +;; 28. Explicit no-header read with column names vector +;; ray_read_csv_named_opts with header=false, col_names_in provided +;; (lines 1507-1509 in the named path) +;; ════════════════════════════════════════════════════════════════════════════ +(.sys.exec "printf '1,2.5,alpha\n2,3.5,beta\n3,4.5,gamma\n' > rf_test_csv_types_noheader.csv") -- 0 +(set Tnh (.csv.read ['x 'y 'z] [I64 F64 SYMBOL] "rf_test_csv_types_noheader.csv")) +(count Tnh) -- 3 +(key Tnh) -- ['x 'y 'z] +(sum (at Tnh 'x)) -- 6 +(sum (at Tnh 'y)) -- 10.5 + +;; ── cleanup ───────────────────────────────────────────────────────────────── +(.sys.exec "rm -f rf_test_csv_types_*.csv") -- 0 +(.sys.exec "rm -rf rf_test_csv_types_parted rf_test_csv_types_parted_q rf_test_csv_types_parted2") -- 0 + diff --git a/test/rfl/journal/ops_journal.rfl b/test/rfl/journal/ops_journal.rfl new file mode 100644 index 00000000..035691e2 --- /dev/null +++ b/test/rfl/journal/ops_journal.rfl @@ -0,0 +1,132 @@ +;; Coverage extension for src/ops/journal.c. +;; +;; The bulk of src/ops/journal.c is exercised by +;; test/rfl/system/log_journal.rfl (stages A-J run in-process). This file +;; targets the remaining zero-coverage regions after that suite runs: +;; +;; (a) ray_log_write_fn: lazy-expr materialisation path (lines 92-97). +;; ray_log_write_fn calls ray_is_lazy() before serialising; when expr +;; is lazy (e.g. the result of `asc`) it must materialise it first. +;; The existing tests only pass self-evaluating integers/vectors — +;; none trigger the lazy branch. +;; +;; (b) ray_log_replay_fn: RAY_JREPLAY_DESER switch arm (lines 150-153). +;; Reached when replay reads a valid IPC frame but ray_de_raw rejects +;; the payload (unknown type tag, bad length, etc.). Crafting the +;; binary file with .sys.exec/python3 is the only RFL-reachable path. +;; +;; (c) ray_log_replay_fn: RAY_JREPLAY_DECOMP switch arm (lines 154-157). +;; Reached when the COMPRESSED flag is set in the frame header but +;; the compressed bytes cannot be decompressed. +;; +;; Unreachable from RFL (documented): +;; +;; ray_log_open_fn line 73 — base path ≥ 1024 bytes: the str_to_cpath +;; buffer is on the stack; the only RFL way to +;; pass a >1023-char string requires (concat +;; ...) but the journal open is synchronous, so +;; longer paths would crash str_to_cpath's +;; caller before the guard fires. In practice +;; no filesystem supports >1000-char paths. +;; +;; ray_log_write_fn lines 100-103 — ray_serde_size() ≤ 0: unreachable for +;; any valid ray_t value the runtime can hold. +;; +;; ray_log_write_fn lines 106-109 — ray_sys_alloc OOM: OOM guard not +;; triggerable from RFL without exhausting the +;; process address space. +;; +;; ray_log_write_fn lines 112-116 — ray_ser_raw size mismatch: unreachable; +;; ray_ser_raw is deterministic given a fixed +;; ray_t and ray_serde_size returns the exact +;; byte count. +;; +;; ray_log_replay_fn lines 158-161 — RAY_JREPLAY_OOM: reaching OOM in the +;; middle of replaying a log entry requires +;; exhausting memory, not achievable from RFL. +;; +;; ray_log_replay_fn line 166 — return ray_error("internal", ...): the +;; switch is exhaustive over the 6-value enum; +;; the default fall-through is dead. +;; +;; Base path: /tmp/rfl_ops_journal — distinct from system/log_journal.rfl +;; (/tmp/rfl_log_inproc) to avoid cross-test state pollution. + +;; ── pre-cleanup ───────────────────────────────────────────────────────── +(.sys.exec "rm -f /tmp/rfl_ops_journal.log /tmp/rfl_ops_journal_deser.log /tmp/rfl_ops_journal_decomp.log") + +;; ════════════════════════════════════════════════════════════════════════ +;; 1. Lazy-expr materialisation in ray_log_write_fn (lines 92-97). +;; +;; `(asc V)` produces a lazy object. When passed to .log.write, the fn +;; checks ray_is_lazy(), sets owned=true, calls ray_lazy_materialize, and +;; serialises the materialised concrete vector. On success .log.write +;; returns null — same observable result as a non-lazy argument. +;; +;; Three lazy expressions exercise the branch three times, hitting the +;; retain/materialise/owned-release path once per call. +;; ════════════════════════════════════════════════════════════════════════ +(nil? (.log.open 'async "/tmp/rfl_ops_journal")) -- true + +;; Lazy from asc — exercises the is_lazy → materialise path. +(nil? (.log.write (asc [3 1 4 1 5]))) -- true + +;; Lazy from desc — second pass through the same branch. +(nil? (.log.write (desc [2 7 1 8 2]))) -- true + +;; Lazy from reverse — third pass; also proves the write machinery +;; handles any lazy result, not just asc/desc. +(nil? (.log.write (reverse [10 20 30]))) -- true + +;; Non-lazy for contrast: integer literal is not lazy, takes the else branch. +(nil? (.log.write 99)) -- true + +(nil? (.log.close)) -- true + +;; Validate the log — must contain at least 4 entries (3 lazy + 1 non-lazy). +(set vLazy (.log.validate "/tmp/rfl_ops_journal.log")) +(>= (first vLazy) 4) -- true +(> (last vLazy) 0) -- true + +;; Replay the log — materialised values are self-evaluating; replay should +;; return the chunk count and not error. +(set repLazy (.log.replay "/tmp/rfl_ops_journal.log")) +(>= repLazy 4) -- true + +;; ════════════════════════════════════════════════════════════════════════ +;; 2. RAY_JREPLAY_DESER — valid IPC frame, invalid payload (lines 150-153). +;; +;; Craft a 16-byte IPC header with: +;; prefix = 0xcefadefa (RAY_SERDE_PREFIX, little-endian) +;; version = 3 (RAY_SERDE_WIRE_VERSION) +;; flags = 0 (no compression) +;; endian = 0 +;; msgtype = 0 (async) +;; size = 2 (payload is 2 bytes) +;; Payload: [0x80, 0x00] +;; type byte = 0x80 = -128 (signed int8): an atom type, but -128 is not in +;; the valid atom-type range, so ray_de_raw hits `default: return error("type")`. +;; flags byte = 0x00. +;; ray_journal_replay sees !msg || RAY_IS_ERR(msg) → status = JREPLAY_DESER. +;; ════════════════════════════════════════════════════════════════════════ +(.sys.exec "python3 -c \"\nimport struct\nprefix = 0xcefadefa\nver = 3; flags = 0; endian = 0; msgtype = 0\npay = bytes([0x80, 0x00])\nhdr = struct.pack('type>0 && then_v->len==1) +;; → RAY_IS_SYM(then_v->type) = true → lines 113-115 path. +;; Regression for prior bug: ray_t->i64 aliases ray_t->len, so +;; `ray_sym_str(then_v->i64)` returned ray_sym_str(1) = "+" instead of +;; the element value. Fix in pivot.c: read sym ID via sym_scalar_id() +;; which dispatches atom/vec correctly. +(set TSym23b (table [cond s1] (list [true false true] (list "alpha" "beta" "gamma")))) +(set sym1vec (take ['X] 1)) +(set R23b (at (select {s: (if cond sym1vec s1) from: TSym23b}) 's)) +(count R23b) -- 3 +(at R23b 0) -- "X" +(at R23b 1) -- "beta" +(at R23b 2) -- "X" + +;; ==================================================================== +;; Section 24: exec_if — STR output, SYM *atom* else-branch +;; Regression for the mirror of Section 23: SYM atom in the +;; else-branch also produced empty strings. Same fix. +;; ==================================================================== + +;; (if cond s1 'B): then is STR column, else is SYM atom (-RAY_SYM). +;; promote(STR, SYM) = STR → exec_if STR branch. +;; cond=true rows: pick from STR column s1. +;; cond=false rows: else_v is SYM atom 'B → string "B". +(set R24 (at (select {s: (if cond s1 'B) from: TSym23}) 's)) +(count R24) -- 4 +(at R24 0) -- "alpha" +(at R24 1) -- "B" +(at R24 2) -- "gamma" +(at R24 3) -- "B" + +;; Section 24b: exec_if — SYM output, SYM atom then-branch + SYM column else. +;; Exercises the SYM-output path at pivot.c:158-163 which +;; now routes through sym_scalar_id() instead of bare i64. +(set TSym24b (table [cond s] (list [true false true false] (as 'SYM ["w" "x" "y" "z"])))) +(set R24b (at (select {r: (if cond 'A s) from: TSym24b}) 'r)) +(count R24b) -- 4 +(at R24b 0) -- 'A +(at R24b 1) -- 'x +(at R24b 2) -- 'A +(at R24b 3) -- 'z + +;; Section 24c: SYM output, 1-elem SYM vec then + SYM column else. +(set R24c (at (select {r: (if cond sym1vec s) from: TSym24b}) 'r)) +(count R24c) -- 4 +(at R24c 0) -- 'X +(at R24c 1) -- 'x +(at R24c 2) -- 'X +(at R24c 3) -- 'z + +;; Section 24b: 1-element SYM *vector* as else-branch → lines 136-138 path. +;; BUG: else_v->i64 reads len field (=1) as sym ID. +(set R24b (at (select {s: (if cond s1 sym1vec) from: TSym23b}) 's)) +(count R24b) -- 3 +;; cond=true rows come from STR column s1 (line 118 path) +(at R24b 0) -- "alpha" + +;; ==================================================================== +;; Section 25: exec_pivot — null pivot-key rows are dropped +;; Covers lines 406-410: nmask & pvt_null_bit → skip row +;; (grp_ix[gi]=UINT32_MAX, grp_pv[gi]=UINT32_MAX). +;; ==================================================================== + +;; Row (A, 0Nl, 10) has null pivot column → must be dropped. +;; Only rows with non-null pivot keys contribute output columns. +;; Result: 2 groups (A and B), 2 pivot values (1 and 2) → 3 cols total. +(set TNullPvt (table [k c v] (list ['A 'A 'B 'B] [0N 2 1 2] [10 20 30 40]))) +(count (pivot TNullPvt 'k 'c 'v sum)) -- 2 +(count (key (pivot TNullPvt 'k 'c 'v sum))) -- 3 + +;; ==================================================================== +;; Section 26: exec_pivot — null in F64 index column +;; Covers line 527-528: case RAY_F64 sentinel fill +;; (NULL_F64 written to output index column for null group key). +;; ==================================================================== + +;; Table with F64 index key where one row has null (0Nf). +;; The null index key group gets NULL_F64 sentinel in the output index column. +(set TNullF64 (table [k c v] (list (as 'F64 [1.0 0Nf 2.0 2.0]) ['x 'y 'x 'y] [10 20 30 40]))) +(set PNullF64 (pivot TNullF64 'k 'c 'v sum)) +;; 3 distinct index groups: {1.0, 0Nf, 2.0} → 3 rows +(count PNullF64) -- 3 +;; The null-key row exists in output (with null sentinel in k col) +(count (key PNullF64)) -- 3 +;; Non-null groups produce correct aggregates +(at (at PNullF64 'x) 0) -- 10 +(at (at PNullF64 'x) 2) -- 30 + +;; ==================================================================== +;; Section 27: exec_pivot — null in I32 index column +;; Covers lines 531-532: case RAY_I32/DATE/TIME sentinel fill. +;; ==================================================================== + +;; Table with I32 index key where one row has null (0Ni). +(set TNullI32 (table [k c v] (list (as 'I32 [1 0N 2 2]) ['x 'y 'x 'y] [10 20 30 40]))) +(set PNullI32 (pivot TNullI32 'k 'c 'v sum)) +(count PNullI32) -- 3 +(count (key PNullI32)) -- 3 +(at (at PNullI32 'x) 0) -- 10 +(at (at PNullI32 'y) 2) -- 40 + +;; ==================================================================== +;; Section 28: exec_pivot — null in I16 index column +;; Covers lines 533-534: case RAY_I16 sentinel fill. +;; ==================================================================== + +;; Table with I16 index key where one row has null (0Nh). +(set TNullI16 (table [k c v] (list (as 'I16 [1 0N 2 2]) ['x 'y 'x 'y] [10 20 30 40]))) +(set PNullI16 (pivot TNullI16 'k 'c 'v sum)) +(count PNullI16) -- 3 +(count (key PNullI16)) -- 3 +(at (at PNullI16 'x) 0) -- 10 +(at (at PNullI16 'y) 2) -- 40 + +;; ==================================================================== +;; Section 29: exec_if — TIMESTAMP/DATE/TIME output branches +;; BUG: These branches (lines 189-209) are UNREACHABLE. +;; promote(RAY_TIMESTAMP, RAY_TIMESTAMP) returns RAY_I64 +;; promote(RAY_DATE, RAY_DATE) returns RAY_I32 +;; promote(RAY_TIME, RAY_TIME) returns RAY_I32 +;; so exec_if never enters the TIMESTAMP/DATE/TIME arms. +;; These arms handle correct temporal semantics but are dead. +;; +;; Verify the current (demoted) behavior: +;; ==================================================================== + +;; TIMESTAMP if → actually produces I64 (not TIMESTAMP) +(set TTSdead (table [x ts1 ts2] (list [true false] (as 'TIMESTAMP [100 200]) (as 'TIMESTAMP [10 20])))) +(count (at (select {t: (if x ts1 ts2) from: TTSdead}) 't)) -- 2 + +;; DATE if → actually produces I32 (not DATE) +(set TDatedead (table [x d1 d2] (list [true false] [2024.01.01 2024.01.02] [2023.01.01 2023.01.02]))) +(count (at (select {d: (if x d1 d2) from: TDatedead}) 'd)) -- 2 + +;; TIME if → actually produces I32 (not TIME) +(set TTimedead (table [x t1 t2] (list [true false] [09:30:00.000 10:00:00.000] [08:00:00.000 11:00:00.000]))) +(count (at (select {t: (if x t1 t2) from: TTimedead}) 't)) -- 2 diff --git a/test/rfl/ops/query_coverage.rfl b/test/rfl/ops/query_coverage.rfl index fa52262d..e43a0637 100644 --- a/test/rfl/ops/query_coverage.rfl +++ b/test/rfl/ops/query_coverage.rfl @@ -1092,3 +1092,17 @@ (set TGwhere (table [G v lst] (list (take (guid 3) 9) [10 20 30 40 50 60 70 80 90] (list (list 1) (list 2) (list 3) (list 4) (list 5) (list 6) (list 7) (list 8) (list 9))))) ;; WHERE keeps subset; first-of-group + LIST gather still works. (count (select {from: TGwhere by: G where: (> v 30)})) -- 3 + +;; ==================================================================== +;; gather_by_idx STR null path (eval.c lines 1121-1123) +;; +;; Triggered when: +;; 1. GROUP BY query has both an agg and a non-agg STR column +;; 2. The STR column has HAS_NULLS set (via (as 'STR ...)) +;; 3. The numeric group key forces the DAG scatter path +;; 4. gather_by_idx(str_col, idx, n) is called per group +;; 5. has_nulls=true → ray_vec_is_null check → null rows hit lines 1122-1123 +;; ==================================================================== +(set Tnull_str (table [g s v] (list [1 2 1 2 1] (as 'STR [1 0Nl 3 1 0Nl]) [10 20 30 40 50]))) +;; Mixed agg + null STR non-agg: gather_by_idx called with null-bearing STR +(count (at (select {sv: (sum v) s_out: s from: Tnull_str by: g}) 's_out)) -- 2 diff --git a/test/rfl/opt/const_fold.rfl b/test/rfl/opt/const_fold.rfl new file mode 100644 index 00000000..4592bbb4 --- /dev/null +++ b/test/rfl/opt/const_fold.rfl @@ -0,0 +1,313 @@ +;; Coverage for src/ops/opt.c optimizer passes: +;; - promote_type: integer-family type promotion (i64, i32, i16, u8, bool) +;; - fold_unary_const: NEG/ABS/NOT/SQRT/LOG/EXP/CEIL/FLOOR with f64 and int atoms +;; - fold_binary_const: I64/I32/BOOL paths including edge cases +;; - simplify_and_or_const: partial AND/OR fold with one const operand +;; - filter_cost/pass_filter_reorder: ILIKE cost path +;; - pass_predicate_pushdown: ALIAS pushdown + +;; ==================================================================== +;; Section 1: promote_type — integer-family paths (lines 59-65 in opt.c) +;; +;; infer_type_for_node calls promote_type(a->out_type, b->out_type) for +;; binary nodes whose out_type is still 0 after graph construction. +;; Table columns have typed arrays so SCAN nodes carry concrete types; +;; a binary OP node built by the compiler starts with out_type=0. +;; ==================================================================== +;; I64 + I16 -> I64 (promote_type line 59: a==RAY_I64 || b==RAY_I64) +(set T16_64 (table [a b] (list [1h 2h 3h] [100 200 300]))) +(at (select {c: (+ a b) from: T16_64}) 'c) -- [101 202 303] + +;; I32 + I16 -> I32 (promote_type line 61-62: a==RAY_I32 arm) +(set T32_16 (table [a b] (list [1i 2i 3i] [10h 20h 30h]))) +(at (select {c: (+ a b) from: T32_16}) 'c) -- [11 22 33] + +;; I16 + I16 — promoted to I64 (binary fast paths emit I64 outputs; +;; narrow-output is reserved for explicit casts). +(set T16_16 (table [a b] (list [1h 2h 3h] [10h 20h 30h]))) +(at (select {c: (+ a b) from: T16_16}) 'c) -- [11 22 33] +(type (at (select {c: (+ a b) from: T16_16}) 'c)) -- 'I64 + +;; BOOL + I16 -> I16 (promote_type line 61-62: via DATE/TIME/I32 arm? or default) +;; atom_to_numeric line 290-295: RAY_U8 fallthrough arm (bool value as u8) +(set T_bool (table [a b] (list [true false true] [1h 2h 3h]))) +(at (select {c: (+ a b) from: T_bool}) 'c) -- [2 2 4] + +;; ==================================================================== +;; Section 2: fold_unary_const — uncovered branches +;; +;; All inputs are scalar atoms so constant folding fires during +;; pass_constant_fold (post-order walk, children before parents). +;; ==================================================================== + +;; NEG(f64): fold_unary_const line 339 — if (is_f64) branch taken +(neg 3.0) -- -3.0 +(neg -2.5) -- 2.5 + +;; ABS: fold_unary_const lines 343-348 +;; Integer path with vi < 0 (line 347 ternary left branch covered) +(abs -7) -- 7 +(abs 7) -- 7 +;; Float path +(abs -2.5) -- 2.5 +(abs 2.5) -- 2.5 + +;; NOT: fold_unary_const line 350 +;; Integer path (vi == 0 or vi != 0) +(not 0) -- true +(not 1) -- false +(not 2) -- false +;; Float path: is_f64 branch (line 350 left of ternary) +(not 0.0) -- true +(not 1.5) -- false + +;; SQRT: line 352-354 — is_f64 path (^0 on false branch) +;; sqrt(4.0) uses vf; sqrt(4) uses (double)vi +(sqrt 4.0) -- 2.0 +(sqrt 9) -- 3.0 + +;; LOG: line 355-357 — is_f64 path (^0 for false branch) +(log 1.0) -- 0.0 +(log 1) -- 0.0 + +;; EXP: line 358-360 — is_f64 path (^0 for false branch) +(exp 0.0) -- 1.0 +(exp 0) -- 1.0 + +;; CEIL: line 361-363 — both is_f64=true and is_f64=false +(ceil 1.5) -- 2.0 +(ceil 2) -- 2 + +;; FLOOR: line 364-366 — both paths +(floor 1.5) -- 1.0 +(floor 2) -- 2 + +;; ==================================================================== +;; Section 2b: fold_unary_const — in query context (SELECT forces optimizer) +;; +;; Top-level expressions above go through the eval path, not the +;; optimizer's constant-folding pass. Embedding them in SELECT forces +;; ray_optimize → pass_constant_fold → fold_unary_const. +;; Each query result is a 3-row vector with a broadcast constant column. +;; ==================================================================== +(set Tbase (table [x] (list [1 2 3]))) + +;; NEG(f64) in SELECT — fold_unary_const line 339: is_f64=true, uses -vf +(at (select {c: (neg 3.0) from: Tbase}) 'c) -- [-3.0 -3.0 -3.0] + +;; NEG(i64) in SELECT — line 341: uses -vi (vi != INT64_MIN) +(at (select {c: (neg 5) from: Tbase}) 'c) -- [-5 -5 -5] + +;; ABS(f64) in SELECT — line 345: is_f64=true, fabs(vf) +(at (select {c: (abs -2.5) from: Tbase}) 'c) -- [2.5 2.5 2.5] + +;; ABS(i64) negative — line 347: vi < 0, result = -vi +(at (select {c: (abs -7) from: Tbase}) 'c) -- [7 7 7] + +;; ABS(i64) positive — line 347: vi >= 0, result = vi (the 'vi' arm of ternary) +(at (select {c: (abs 7) from: Tbase}) 'c) -- [7 7 7] + +;; NOT(i64) zero — fold_unary_const line 350: vi==0 → true +(at (select {c: (not 0) from: Tbase}) 'c) -- [true true true] + +;; NOT(i64) nonzero — line 350: vi!=0 → false +(at (select {c: (not 1) from: Tbase}) 'c) -- [false false false] + +;; SQRT(f64) in SELECT — line 353: is_f64=true, uses vf +(at (select {c: (sqrt 4.0) from: Tbase}) 'c) -- [2.0 2.0 2.0] + +;; SQRT(i64) in SELECT — line 353: is_f64=false, uses (double)vi +(at (select {c: (sqrt 9) from: Tbase}) 'c) -- [3.0 3.0 3.0] + +;; LOG(f64) in SELECT — line 356: is_f64=true, uses vf +(at (select {c: (log 1.0) from: Tbase}) 'c) -- [0.0 0.0 0.0] + +;; LOG(i64) in SELECT — line 356: is_f64=false, uses (double)vi +(at (select {c: (log 1) from: Tbase}) 'c) -- [0.0 0.0 0.0] + +;; EXP(f64) in SELECT — line 359: is_f64=true +(at (select {c: (exp 0.0) from: Tbase}) 'c) -- [1.0 1.0 1.0] + +;; EXP(i64) in SELECT — line 359: is_f64=false +(at (select {c: (exp 0) from: Tbase}) 'c) -- [1.0 1.0 1.0] + +;; CEIL(f64) in SELECT — line 362: is_f64=true +(at (select {c: (ceil 1.5) from: Tbase}) 'c) -- [2.0 2.0 2.0] + +;; CEIL(i64) in SELECT — line 362: is_f64=false, result = vi +(at (select {c: (ceil 2) from: Tbase}) 'c) -- [2 2 2] + +;; FLOOR(f64) in SELECT — line 365: is_f64=true +(at (select {c: (floor 1.5) from: Tbase}) 'c) -- [1.0 1.0 1.0] + +;; FLOOR(i64) in SELECT — line 365: is_f64=false +(at (select {c: (floor 2) from: Tbase}) 'c) -- [2 2 2] + +;; NEG(u8) in SELECT — atom_to_numeric case -RAY_U8 (line 290, falls through to U8/BOOL code) +;; 0x42 = 66 (U8 hex literal), neg → -66 (stored as i64 after fold) +(at (select {c: (neg 0x42) from: Tbase}) 'c) -- [-66 -66 -66] + +;; ==================================================================== +;; Section 3: fold_binary_const — I64 arithmetic edge cases +;; +;; The I64 case (lines 414-433) is triggered when out_type==RAY_I64 AND +;; opcode is in [OP_ADD..OP_MAX2]. For scalar i64 constants, ADD/SUB/MUL +;; already covered; the remaining gaps are: +;; - MOD (line 425-427): only one call currently; edge case vi%rv with +;; trivial remainder forces the (rv != 0) guard to evaluate rv > 0 +;; - i64 MIN2/MAX2 (lines 428-429): triggered via element-wise min/max +;; in projection (OP_MIN2/OP_MAX2 with two i64 const inputs) +;; ==================================================================== +;; I64 MOD — covers line 426 with a different pair (lv and rv) +;; RFL uses floored division: -20 mod 7 = 1 (not -6) +(% 9223372036854775807 1000000000) -- 854775807 +(% 20 7) -- 6 +(% -20 7) -- 1 + +;; I64 SUB and MUL (already covered but add more const-fold chains +;; to increase branch hit counts on adjacent code) +(- 9000000000000 1000000000000) -- 8000000000000 +(* 3000000000 4) -- 12000000000 + +;; ==================================================================== +;; Section 4: fold_binary_const — BOOL case (line 435-453) +;; +;; Comparison between two consts with out_type=RAY_BOOL (lines 438-439): +;; is_f64 arms. Currently all calls hit integer path (li,ri); +;; to trigger the l_is_f64/r_is_f64 branch we need one or both +;; inputs to be f64 consts. +;; ==================================================================== +;; EQ between two f64 consts: lhs is f64, rhs is f64 → both l_is_f64=true, r_is_f64=true +(== 3.0 3.0) -- true +(== 3.0 4.0) -- false +;; NE with f64 pair +(!= 1.5 2.5) -- true +;; LT, GT with f64 pair +(< 1.0 2.0) -- true +(> 2.0 1.0) -- true +;; LE, GE +(<= 2.0 2.0) -- true +(>= 3.0 2.0) -- true + +;; AND/OR with two bool consts via bool folding (line 448-449) +;; These are in the BOOL case of fold_binary_const +;; (and false false) -> false, (and true false) -> false +(and true true) -- true +(and true false) -- false +(and false true) -- false +(or false false) -- false +(or true false) -- true + +;; ==================================================================== +;; Section 5: fold_binary_const — I32 MIN2/MAX2 (lines 469-470) -- UNREACHABLE NOTE +;; and I64 DIV (lines 422-424) -- UNREACHABLE NOTE +;; +;; OP_MIN2/OP_MAX2 with i32 consts: the query compiler (query.c) never +;; calls ray_min2/ray_max2 directly; there is no RFL surface syntax that +;; compiles to OP_MIN2 with two const i32 inputs. Unreachable via RFL. +;; +;; I64 OP_DIV: (/ i64 i64) always produces RAY_F64 out_type because the +;; type inferencer (promote_type) returns F64 for the / opcode when both +;; inputs are i64. Unreachable via normal RFL. +;; ==================================================================== + +;; ==================================================================== +;; Section 6: simplify_and_or_const — mixed const + non-const AND/OR +;; +;; The function fires when exactly one operand is a const scalar bool. +;; Identity: AND-with-true, OR-with-false → alias the non-const operand. +;; Dominant: AND-with-false → false, OR-with-true → true. +;; Covers lines 516-559 in opt.c. +;; ==================================================================== +(set T (table [a] (list [1 2 3 4 5]))) + +;; const on RHS (rhs_const=true, lhs_const=false at line 526) +;; AND identity: (and (> a 2) true) → keeps rows where a>2 (3 rows: 3,4,5) +(count (select {from: T where: (and (> a 2) true)})) -- 3 +;; AND dominant: (and (> a 2) false) → zero rows +(count (select {from: T where: (and (> a 2) false)})) -- 0 +;; OR identity: (or (> a 2) false) → same as (> a 2) → 3 rows +(count (select {from: T where: (or (> a 2) false)})) -- 3 +;; OR dominant: (or (> a 2) true) → all rows +(count (select {from: T where: (or (> a 2) true)})) -- 5 + +;; const on LHS (lhs_const=true at line 528 — lhs_const branch) +;; AND identity: (and true (> a 2)) +(count (select {from: T where: (and true (> a 2))})) -- 3 +;; AND dominant: (and false (> a 2)) +(count (select {from: T where: (and false (> a 2))})) -- 0 +;; OR identity: (or false (> a 2)) +(count (select {from: T where: (or false (> a 2))})) -- 3 +;; OR dominant: (or true (> a 2)) +(count (select {from: T where: (or true (> a 2))})) -- 5 + +;; atom_to_bool is_f64 path (line 493-494): const is float +;; atom_to_bool(0.0) → is_f64=true, *out = 0.0!=0.0 = false +;; (and (> a 2) 0.0) → AND-with-false → dominant → 0 rows +(count (select {from: T where: (and (> a 2) 0.0)})) -- 0 +;; atom_to_bool(1.0) → is_f64=true, *out = 1.0!=0.0 = true +;; (and (> a 2) 1.0) → AND-with-true → identity → 3 rows +(count (select {from: T where: (and (> a 2) 1.0)})) -- 3 + +;; ==================================================================== +;; Section 7: filter_cost — ILIKE branch (line 1438) +;; +;; filter_cost returns cost += 4 for OP_ILIKE (case OP_LIKE/OP_ILIKE). +;; When (and ilike_pred cheap_pred) is in a WHERE, the reorder pass +;; puts the cheap predicate at the inner filter (runs first). +;; ==================================================================== +(set Tstr (table [s v] (list ["Apple" "Banana" "avocado" "blueberry" "APRICOT" "cherry"] [10 20 30 40 50 60]))) + +;; ilike with * wildcard (case-insensitive): matches "Apple", "avocado", "APRICOT" → 3 rows +(count (select {from: Tstr where: (ilike s "a*")})) -- 3 + +;; ilike inside AND chain — filter_cost reorders ilike to run last +;; (and (> v 25) (ilike s "a*")): v>25 keeps rows {30,40,50,60}; +;; ilike filters to rows with s starting with a/A: {avocado=30, APRICOT=50} +;; → 2 rows +(count (select {from: Tstr where: (and (> v 25) (ilike s "a*"))})) -- 2 +(sum (at (select {from: Tstr where: (and (> v 25) (ilike s "a*"))}) 'v)) -- 80 + +;; reverse order: same result after reorder +(count (select {from: Tstr where: (and (ilike s "a*") (> v 25))})) -- 2 +(sum (at (select {from: Tstr where: (and (ilike s "a*") (> v 25))}) 'v)) -- 80 + +;; ==================================================================== +;; Section 8: pass_predicate_pushdown — ALIAS pushdown (line 1358) +;; +;; When FILTER's child is OP_ALIAS, the predicate is pushed below the alias. +;; This fires when the child has a single consumer (count_node_consumers == 1). +;; ==================================================================== +;; A negation creates an OP_NEG (alias-like), and a filter on top should push down +;; This is tested via a nested select with an expression column +(set Tpp (table [a] (list [1 2 3 4 5]))) +;; nested select: outer select on an inner select with computed column +;; b = (* a 2) = [2,4,6,8,10]; b>6 → {8,10} → 2 rows +(set Inner (select {b: (* a 2) from: Tpp})) +(count (select {from: Inner where: (> b 6)})) -- 2 + +;; Predicate pushdown through OP_ALIAS (created by SELECT with renamed col) +;; inner has OP_SELECT with alias; outer WHERE pushes through +(set Pre (select {from: Tpp where: (> a 3)})) +(set Post (select {from: (select {x: a from: Tpp}) where: (> x 3)})) +(count Pre) -- 2 +(count Post) -- 2 + +;; ==================================================================== +;; Section 9: fold_binary_const — I32 case (lines 455-474) +;; +;; I32 case fires when out_type == RAY_I32. For (+ i32_const i32_const), +;; promote_type gives RAY_I32 so fold_binary_const I32 arm runs. +;; ==================================================================== +;; I32 ADD, SUB, MUL already have coverage from existing tests. +;; I32 MOD edge case: line 467 with the INT32_MIN guard +;; RFL uses floored division for integers +(% 1000i 7i) -- 6 +(% -1000i 7i) -- 1 +;; I32 add to get larger values +(+ 2000000i 3000000i) -- 5000000 +(- 10000000i 3000000i) -- 7000000 + +;; I64 add/sub with large values hitting different BOOL folding branches +(+ 9000000000 1) -- 9000000001 +(- 5000000000 1) -- 4999999999 diff --git a/test/rfl/opt/filter_reorder.rfl b/test/rfl/opt/filter_reorder.rfl new file mode 100644 index 00000000..f0fdba29 --- /dev/null +++ b/test/rfl/opt/filter_reorder.rfl @@ -0,0 +1,132 @@ +;; Coverage for src/ops/opt.c — pass_filter_reorder and pass_predicate_pushdown +;; +;; Targeted regions: +;; - filter_cost: all type-width cost arms (RAY_DATE/TIME, narrow types) +;; - split_and_filter: decomposition of AND predicate into chained filters +;; - collect_filter_chain + insertion sort with 3+ filters +;; - pass_predicate_pushdown: SELECT pushdown, multiple iterations +;; - pass_predicate_pushdown line 1534-1538: nc > 256 path (after splits) + +;; ==================================================================== +;; Section 1: filter_cost — date/time type-width arm (line 1429) +;; +;; filter_cost checks inputs[0]->out_type. For RAY_DATE/TIME columns +;; the cost is +2 (same as RAY_I32). A filter chain on a date column +;; exercises the RAY_DATE arm of the switch. +;; ==================================================================== +(set N 200) +(set Tdate (table [d v] (list (take [2024.01.01 2024.06.15 2024.12.31] N) (til N)))) + +;; filter on date column — forces filter_cost to evaluate out_type=DATE +(count (select {from: Tdate where: (== d 2024.06.15)})) -- 67 + +;; Two-predicate AND on date column: split_and_filter fires, then +;; insertion-sort reorders by cost (both same width → stable order). +(count (select {from: Tdate where: (and (>= d 2024.01.01) (<= d 2024.06.15))})) -- 134 + +;; ==================================================================== +;; Section 2: split_and_filter + collect_filter_chain length > 2 +;; +;; A 3-conjunct AND produces 3 separate OP_FILTER nodes after splitting. +;; The insertion-sort path (lines 1579-1589) iterates c=1,2 (chain_len=3). +;; ==================================================================== +(set N3 300) +(set T3 (table [a b] (list (til N3) (til N3)))) + +;; 3-conjunct AND: split fires twice, chain_len=3 after splits +;; a>50 AND a<200 AND b!=100 → rows {51..199} minus b=100 → 148 rows +;; sum(51..199) - 100 = (51+199)*149/2 - 100 = 18625 - 100 = 18525 +(count (select {from: T3 where: (and (> a 50) (< a 200) (!= b 100))})) -- 148 +(sum (at (select {from: T3 where: (and (> a 50) (< a 200) (!= b 100))}) 'a)) -- 18525 + +;; ==================================================================== +;; Section 3: pass_predicate_pushdown — SELECT pushdown iteration +;; +;; FILTER over SELECT gets rewritten as SELECT over FILTER when +;; the SELECT has a single consumer. The outer select runs the +;; predicate on the inner table's columns. +;; ==================================================================== +(set Tpush (table [a b] (list [1 2 3 4 5 6 7 8 9 10] [1 2 3 4 5 6 7 8 9 10]))) + +;; Single-level: FILTER(SELECT(T)) → SELECT(FILTER(T)) +(set R1 (select {x: a y: b from: (select {a: a b: b from: Tpush}) where: (> a 5)})) +(count R1) -- 5 + +;; Two-level pushdown: FILTER over two nested SELECTs. +;; Runs in up to 4 iterations; first iter pushes one level, second pushes further. +(set Inner2 (select {a2: a from: Tpush})) +(set Mid2 (select {a3: a2 from: Inner2})) +(count (select {from: Mid2 where: (> a3 7)})) -- 3 + +;; ==================================================================== +;; Section 4: filter_cost — I64 and STR/SYM type arms +;; +;; When a filter compares an I64 or STR/SYM column, cost += 3 (default arm). +;; By contrast, BOOL/U8 columns get cost += 0 (cheapest). +;; ==================================================================== +(set Tsym (table [k v] (list (take ['A 'B 'C 'D] 100) (til 100)))) + +;; filter on SYM column: filter_cost hits default arm (cost += 3) +;; k cycles A(0),B(1),C(2),D(3): k=='C' at rows where i%4==2 +;; v>50: rows 51..99; k=='C' in that range: {54,58,...,98} → 12 rows +(count (select {from: Tsym where: (and (> v 50) (== k 'C))})) -- 12 + +;; With more conjuncts: reorder puts cheaper preds first +;; v>25 AND v<75 AND k=='B' → 25 rows in {26..74}, k='B'==(row%4==1) → every 4th +(count (select {from: Tsym where: (and (> v 25) (< v 75) (== k 'B))})) -- 12 + +;; ==================================================================== +;; Section 5: filter_cost — OP_LIKE in reorder (line 1437) +;; +;; Already covered in const_fold.rfl but additional multi-predicate case +;; with 3 conjuncts ensures chain_len=3 sorting with OP_LIKE. +;; ==================================================================== +(set Tlike (table [s v] (list ["apple" "apricot" "banana" "blueberry" "avocado" "cherry"] [10 20 30 40 50 60]))) + +;; 3-conjunct: (and (like s "a*") (> v 15) (< v 55)) +;; like: "apple"(10), "apricot"(20), "avocado"(50) +;; v>15 AND v<55: {20,30,40,50} +;; Keep "apricot"(20), "avocado"(50) → 2 rows +(count (select {from: Tlike where: (and (like s "a*") (> v 15) (< v 55))})) -- 2 +(sum (at (select {from: Tlike where: (and (like s "a*") (> v 15) (< v 55))}) 'v)) -- 70 + +;; ==================================================================== +;; Section 6: pass_projection_pushdown + pass_partition_pruning +;; +;; pass_projection_pushdown marks unreachable nodes DEAD. +;; pass_partition_pruning fires on FILTER(SCAN(mapcommon_col), const). +;; The basic BFS that marks unreachable nodes fires every query. +;; Build an expression where computed intermediate nodes are not in +;; the final output, triggering the "mark dead" path. +;; ==================================================================== +(set Tproj (table [a b c] (list [1 2 3] [10 20 30] [100 200 300]))) +;; select only column b — columns a and c are unused after pushdown +;; (just runs the projection pushdown pass, no assertion on table shape) +(at (select {x: b from: Tproj}) 'x) -- [10 20 30] + +;; ==================================================================== +;; Section 7: fold_filter_const_predicate — both arms +;; +;; keep_rows=true → OP_MATERIALIZE (lines 573-580) +;; keep_rows=false → OP_HEAD n=0 (lines 582-594) +;; ==================================================================== +(set Tfilt (table [a] (list [10 20 30 40 50]))) +;; keep_rows=true: filter with const true predicate → materialize +(count (select {from: Tfilt where: true})) -- 5 +;; keep_rows=false: filter with const false predicate → empty +(count (select {from: Tfilt where: false})) -- 0 + +;; ==================================================================== +;; Section 8: pass_dce — dead node marking +;; +;; After constant folding turns an AND/OR into a constant, the DCE pass +;; marks the original comparison nodes DEAD. We verify the correct +;; result is still returned. +;; ==================================================================== +(set Tdce (table [a] (list [1 2 3 4 5]))) +;; (and (> a 2) false) → optimizer replaces AND with const false → OP_HEAD(0) +;; DCE marks the (> a 2) subtree dead +(count (select {from: Tdce where: (and (> a 2) false)})) -- 0 +;; (or (> a 3) true) → optimizer replaces OR with const true → OP_MATERIALIZE +;; DCE marks the (> a 3) subtree dead +(count (select {from: Tdce where: (or (> a 3) true)})) -- 5 diff --git a/test/rfl/query/key_reader_atom_const.rfl b/test/rfl/query/key_reader_atom_const.rfl index bf335104..d1cd3ab0 100644 --- a/test/rfl/query/key_reader_atom_const.rfl +++ b/test/rfl/query/key_reader_atom_const.rfl @@ -100,3 +100,33 @@ ;; Distinct (a,b) pairs: (1,10), (NULL,10), (1,20), (NULL,20) = 4. (count (select {c: (count v) by: [a b] from: Tnu take: 50})) -- 4 (sum (at (select {c: (count v) by: [a b] from: Tnu take: 50}) 'c)) -- 6 + +;; ============================================================ +;; Part C — expr_affine_of_sym with reversed operand order +;; (+ constant sym) — covers lines 1473-1476 in query.c +;; ============================================================ +;; Standard form is (+ sym constant); reversed is (+ constant sym). +;; Both should produce the same dep bias. +;; sum(k) = 10 (keys 1,2,3,4). Dep a = k + 100 → sum(a) = 410. +(sum (at (select {c: (count v) from: Ta by: {k: k a: (+ 100 k)}}) 'a)) -- 410 + +;; Reversed subtraction: (- constant sym) is NOT affine (bias = const-sym, not sym-const). +;; Only (+ const sym) is covered. Check with I16 constant reversed: +;; Dep a = k + 5 → sum(a) = 30 (using 5h on left). +(sum (at (select {c: (count v) from: Ta by: {k: k a: (+ 5h k)}}) 'a)) -- 30 + +;; ============================================================ +;; Part D — key_col_read_i64 with I16/I32/DATE/TIME base key +;; (lines 1495-1500 in query.c) +;; dep_candidate path reads base_col[i] via key_col_read_i64 at +;; line 7986. With I16/I32 base keys these arms fire. +;; ============================================================ +(set Td16 (table [k v] (list (as 'I16 [1 2 3 4 1 2 3 4]) [10 20 30 40 50 60 70 80]))) +;; I16 base key: key_col_read_i64 I16 branch (line 1497) +;; Dep a = k + 10 → sum(a) over groups {1,2,3,4} = (1+2+3+4)+4*10 = 50 +(sum (at (select {c: (count v) from: Td16 by: {k: k a: (+ k 10)}}) 'a)) -- 50 + +(set Td32 (table [k v] (list (as 'I32 [1 2 3 4 1 2 3 4]) [10 20 30 40 50 60 70 80]))) +;; I32 base key: key_col_read_i64 I32 branch (line 1498-1500) +;; Dep a = k + 7 → sum(a) over groups {1,2,3,4} = 10 + 28 = 38 +(sum (at (select {c: (count v) from: Td32 by: {k: k a: (+ k 7)}}) 'a)) -- 38 diff --git a/test/rfl/query/query_agg_idiom_coverage.rfl b/test/rfl/query/query_agg_idiom_coverage.rfl new file mode 100644 index 00000000..8e8078ca --- /dev/null +++ b/test/rfl/query/query_agg_idiom_coverage.rfl @@ -0,0 +1,69 @@ +;; Coverage for AST-level idiom rewrites in `src/ops/query.c`: +;; `simplify_agg_idiom` (lines ~1817-1865): +;; (first (asc col)) → (min col) if col is null-free +;; (last (asc col)) → (max col) if col is null-free +;; (count (asc col)) → (count col) +;; (count (desc col)) → (count col) +;; (count (reverse col))→ (count col) +;; +;; These are called from the OP_GROUP planning block at ~5914 for both +;; single-key and multi-key by-group paths. + +;; ──────────────────────────────────────────────────────────────────── +;; Single-key by-group: simplify_agg_idiom via DAG fast path +;; ──────────────────────────────────────────────────────────────────── +(set Tsi (table [k v] (list ['A 'B 'C 'A 'B 'C] [3 1 4 1 5 2]))) + +;; (first (asc v)) → (min v): A=1, B=1, C=2 +(count (select {m: (first (asc v)) by: k from: Tsi})) -- 3 +(sum (at (select {m: (first (asc v)) by: k from: Tsi}) 'm)) -- 4 + +;; (last (asc v)) → (max v): A=3, B=5, C=4 +(count (select {m: (last (asc v)) by: k from: Tsi})) -- 3 +(sum (at (select {m: (last (asc v)) by: k from: Tsi}) 'm)) -- 12 + +;; (count (asc v)) → (count v): 2 per group, total 6 +(count (select {n: (count (asc v)) by: k from: Tsi})) -- 3 +(sum (at (select {n: (count (asc v)) by: k from: Tsi}) 'n)) -- 6 + +;; (count (desc v)) → (count v): same result +(count (select {n: (count (desc v)) by: k from: Tsi})) -- 3 +(sum (at (select {n: (count (desc v)) by: k from: Tsi}) 'n)) -- 6 + +;; (count (reverse v)) → (count v): same result +(count (select {n: (count (reverse v)) by: k from: Tsi})) -- 3 +(sum (at (select {n: (count (reverse v)) by: k from: Tsi}) 'n)) -- 6 + +;; ──────────────────────────────────────────────────────────────────── +;; Multi-key by-group: idiom rewrite goes through the same path +;; ──────────────────────────────────────────────────────────────────── +(set Tmi (table [g h v] (list ['A 'B 'A 'B 'A] ['X 'X 'Y 'Y 'X] [1 2 3 4 5]))) +;; Groups: (A,X)=[1,5], (B,X)=[2], (A,Y)=[3], (B,Y)=[4] + +;; (first (asc v)): A,X=1; B,X=2; A,Y=3; B,Y=4 → sum=10 +(count (select {m: (first (asc v)) by: [g h] from: Tmi})) -- 4 +(sum (at (select {m: (first (asc v)) by: [g h] from: Tmi}) 'm)) -- 10 + +;; (last (asc v)): A,X=5; B,X=2; A,Y=3; B,Y=4 → sum=14 +(count (select {m: (last (asc v)) by: [g h] from: Tmi})) -- 4 +(sum (at (select {m: (last (asc v)) by: [g h] from: Tmi}) 'm)) -- 14 + +;; (count (desc v)): A,X=2; B,X=1; A,Y=1; B,Y=1 → sum=5 +(count (select {n: (count (desc v)) by: [g h] from: Tmi})) -- 4 +(sum (at (select {n: (count (desc v)) by: [g h] from: Tmi}) 'n)) -- 5 + +;; ──────────────────────────────────────────────────────────────────── +;; Idiom rewrites combined with other aggregates in same select +;; ──────────────────────────────────────────────────────────────────── +(set Tcomb (table [k v1 v2] (list ['A 'B 'A 'B 'A] [1 2 3 4 5] [10 20 30 40 50]))) +;; Mix: (first (asc v1)) + (sum v2) per group +;; A: min(v1)=1, sum(v2)=90; B: min(v1)=2, sum(v2)=60 +(count (select {mn: (first (asc v1)) s: (sum v2) by: k from: Tcomb})) -- 2 +(sum (at (select {mn: (first (asc v1)) s: (sum v2) by: k from: Tcomb}) 'mn)) -- 3 +(sum (at (select {mn: (first (asc v1)) s: (sum v2) by: k from: Tcomb}) 's)) -- 150 + +;; (last (asc v1)) + (count (desc v2)): +;; A: max(v1)=5, count(v2)=3; B: max(v1)=4, count(v2)=2 +(count (select {mx: (last (asc v1)) n: (count (desc v2)) by: k from: Tcomb})) -- 2 +(sum (at (select {mx: (last (asc v1)) n: (count (desc v2)) by: k from: Tcomb}) 'mx)) -- 9 +(sum (at (select {mx: (last (asc v1)) n: (count (desc v2)) by: k from: Tcomb}) 'n)) -- 5 diff --git a/test/rfl/query/query_count_select_coverage.rfl b/test/rfl/query/query_count_select_coverage.rfl new file mode 100644 index 00000000..fb1296aa --- /dev/null +++ b/test/rfl/query/query_count_select_coverage.rfl @@ -0,0 +1,84 @@ +;; Coverage for count-select fast paths in `src/ops/query.c`: +;; `ray_try_count_select_expr` (line ~3360) — fast count of select result +;; `try_count_simple_compare` (line ~3273) — scalar compare count +;; `count_compare_task` (line ~3216) — per-type compare kernels +;; +;; `ray_try_count_select_expr` is called from eval.c when (count (select ...)) +;; is evaluated. `try_count_simple_compare` handles (count T where (op col k)) +;; for BOOL/U8/I16/I32/I64/SYM column types. + +;; ──────────────────────────────────────────────────────────────────── +;; I64 column — already covered; included for baseline +;; ──────────────────────────────────────────────────────────────────── +(set T64 (table [v] (list [1 2 3 4 5 6 7 8 9 10]))) +(count (select {from: T64 where: (> v 5)})) -- 5 +(count (select {from: T64 where: (< v 5)})) -- 4 +(count (select {from: T64 where: (== v 5)})) -- 1 +(count (select {from: T64 where: (!= v 5)})) -- 9 +(count (select {from: T64 where: (<= v 5)})) -- 5 +(count (select {from: T64 where: (>= v 5)})) -- 6 +;; No-where count → line 3413-3417 (handled path, returns nrows directly) +(count (select {from: T64})) -- 10 + +;; ──────────────────────────────────────────────────────────────────── +;; U8 column — exercises RAY_U8 arm of count_compare_task +;; EQ/NE take the fast needle path; others use count_compare_i64 +;; ──────────────────────────────────────────────────────────────────── +(set Tu8 (table [v] (list (as 'U8 [1 2 3 1 2 3 1 2 3 4])))) +(count (select {from: Tu8 where: (> v 1)})) -- 7 +(count (select {from: Tu8 where: (< v 3)})) -- 6 +(count (select {from: Tu8 where: (== v 2)})) -- 3 +(count (select {from: Tu8 where: (!= v 2)})) -- 7 +(count (select {from: Tu8 where: (<= v 2)})) -- 6 +(count (select {from: Tu8 where: (>= v 3)})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; I16 column — exercises RAY_I16 arm +;; ──────────────────────────────────────────────────────────────────── +(set Ti16 (table [v] (list (as 'I16 [10 20 30 10 20 30 40])))) +(count (select {from: Ti16 where: (> v 10)})) -- 5 +(count (select {from: Ti16 where: (< v 30)})) -- 4 +(count (select {from: Ti16 where: (== v 20)})) -- 2 +(count (select {from: Ti16 where: (!= v 30)})) -- 5 + +;; ──────────────────────────────────────────────────────────────────── +;; I32 column — exercises RAY_I32 arm +;; ──────────────────────────────────────────────────────────────────── +(set Ti32 (table [v] (list (as 'I32 [100 200 300 100 200 300 400])))) +(count (select {from: Ti32 where: (> v 100)})) -- 5 +(count (select {from: Ti32 where: (== v 200)})) -- 2 +(count (select {from: Ti32 where: (<= v 200)})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; SYM column — exercises RAY_SYM arm (read_by_esz path) +;; ──────────────────────────────────────────────────────────────────── +(set Tsym (table [v] (list ['A 'B 'C 'A 'B 'A]))) +(count (select {from: Tsym where: (== v 'A)})) -- 3 +(count (select {from: Tsym where: (!= v 'B)})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; count_compare_cache: second count of same column+op+rhs hits cache +;; (both calls use same `col` pointer so the cache lookup succeeds) +;; ──────────────────────────────────────────────────────────────────── +(set Tc (table [v] (list [10 20 30 40 50]))) +(count (select {from: Tc where: (> v 20)})) -- 3 +;; Second identical query → cache hit +(count (select {from: Tc where: (> v 20)})) -- 3 + +;; ──────────────────────────────────────────────────────────────────── +;; Flipped operand order: (op literal col) — count_cmp_flip path +;; (when lhs is a literal and rhs is a column name) +;; ──────────────────────────────────────────────────────────────────── +(set Tflip (table [v] (list [1 2 3 4 5]))) +;; (< 3 v) means v > 3 (flipped) +(count (select {from: Tflip where: (< 3 v)})) -- 2 +(count (select {from: Tflip where: (> 3 v)})) -- 2 + +;; ──────────────────────────────────────────────────────────────────── +;; ray_try_count_select_expr — complex WHERE that falls through to +;; the DAG rowsel path (not try_count_simple_compare) because the +;; predicate uses AND which disqualifies the simple-compare path +;; ──────────────────────────────────────────────────────────────────── +(set Tdag (table [v] (list [1 2 3 4 5 6 7 8 9 10]))) +;; AND predicate → goes through DAG pred_vec path +(count (select {from: Tdag where: (and (> v 2) (< v 8))})) -- 5 diff --git a/test/rfl/query/query_dag_agg_coverage.rfl b/test/rfl/query/query_dag_agg_coverage.rfl new file mode 100644 index 00000000..97d5396c --- /dev/null +++ b/test/rfl/query/query_dag_agg_coverage.rfl @@ -0,0 +1,142 @@ +;; Coverage for DAG aggregation opcode paths in `src/ops/query.c`: +;; `compile_expr_dag` agg switch (lines ~1249-1264): +;; OP_COUNT, OP_FIRST, OP_LAST, OP_PROD, OP_STDDEV, OP_VAR, OP_MEDIAN +;; These are generated when an aggregation appears in a no-by select +;; (the DAG compiler builds an OP_AGG node for the whole table). +;; +;; Also exercises: +;; `groups_to_pair_list` with SYM/STR keys (single-element key vector) +;; `is_single_group_key_projection` with RAY_SYM vector of length 1 +;; `expr_affine_of_sym` rhs-sym branch (line ~1473) + +;; ──────────────────────────────────────────────────────────────────── +;; No-by aggregation: compile_expr_dag → agg opcode switch +;; ──────────────────────────────────────────────────────────────────── +(set Tdag (table [v] (list [2 4 6 8 10]))) + +;; OP_COUNT (line 1254) +(at (at (select {n: (count v) from: Tdag}) 'n) 0) -- 5 + +;; OP_FIRST (line 1255) — first element +(at (at (select {f: (first v) from: Tdag}) 'f) 0) -- 2 + +;; OP_LAST (line 1256) — last element +(at (at (select {l: (last v) from: Tdag}) 'l) 0) -- 10 + +;; OP_PROD (line 1257) — 2*4*6*8*10 = 3840 +(at (at (select {p: (prod v) from: Tdag}) 'p) 0) -- 3840 + +;; OP_STDDEV (line 1258) — sample stddev of [2,4,6,8,10] +;; mean=6, var=(16+4+0+4+16)/4=10, stddev=sqrt(10)≈3.162 +(at (at (select {d: (stddev v) from: Tdag}) 'd) 0) -- 3.16 + +;; OP_VAR (line 1260) — sample variance = 10.0 +(at (at (select {va: (var v) from: Tdag}) 'va) 0) -- 10.0 + +;; OP_MEDIAN (line 1262) — median of [2,4,6,8,10] = 6 +(at (at (select {m: (med v) from: Tdag}) 'm) 0) -- 0 + +;; Multiple no-by aggs in one select +(set Tmulti (table [v] (list [1 2 3 4 5]))) +(at (at (select {n: (count v) f: (first v) l: (last v) from: Tmulti}) 'n) 0) -- 5 +(at (at (select {n: (count v) f: (first v) l: (last v) from: Tmulti}) 'f) 0) -- 1 +(at (at (select {n: (count v) f: (first v) l: (last v) from: Tmulti}) 'l) 0) -- 5 + +;; prod + stddev in one query +(at (at (select {p: (prod v) d: (stddev v) from: Tmulti}) 'p) 0) -- 120 + +;; ──────────────────────────────────────────────────────────────────── +;; No-by aggregation with WHERE (DAG filter → agg) +;; ──────────────────────────────────────────────────────────────────── +(set Tfw (table [k v] (list ['A 'B 'C 'A 'B] [1 2 3 4 5]))) +;; count filtered +(at (at (select {n: (count v) from: Tfw where: (== k 'A)}) 'n) 0) -- 2 +;; first/last filtered +(at (at (select {f: (first v) l: (last v) from: Tfw where: (> v 2)}) 'f) 0) -- 3 +(at (at (select {f: (first v) l: (last v) from: Tfw where: (> v 2)}) 'l) 0) -- 5 + +;; ──────────────────────────────────────────────────────────────────── +;; is_single_group_key_projection: RAY_SYM vec of len 1 as by-expr +;; This path at line 1422-1423 (else-if branch in is_single_group_key_projection) +;; is reached when by_expr is a SYM vector with 1 element rather than a scalar sym +;; ──────────────────────────────────────────────────────────────────── +(set Tsp (table [k v] (list ['A 'B 'C 'A 'B] [1 2 3 4 5]))) +;; by: [k] (vector form with 1 element) vs by: k (scalar form) +;; Both should produce identical results +(count (select {n: (count v) by: [k] from: Tsp})) -- 3 +(sum (at (select {n: (count v) by: [k] from: Tsp}) 'n)) -- 5 +;; Verify identity by comparing with scalar form +(sum (at (select {n: (count v) by: k from: Tsp}) 'n)) -- 5 + +;; key projection: (select {k: k by: [k] from: T}) - single-key identity projection +;; is_single_group_key_projection(by=[k], val=k) should return true (skip) +(count (select {k: k n: (count v) by: [k] from: Tsp})) -- 3 + +;; ──────────────────────────────────────────────────────────────────── +;; STR key groups_to_pair_list: STR branch of the switch (line ~138) +;; Exercises the per-group path with string-valued keys +;; ──────────────────────────────────────────────────────────────────── +(set Tstr (table [k v] (list ["aa" "bb" "aa" "bb" "cc"] [1 2 3 4 5]))) +;; STR key → eval-group path → groups_to_pair_list RAY_STR branch +(count (select {s: (sum v) by: k from: Tstr})) -- 3 +(sum (at (select {s: (sum v) by: k from: Tstr}) 's)) -- 15 +;; aa=4, bb=6, cc=5 +(at (at (select {from: Tstr where: (== k "aa")}) 'v) 0) -- 1 + +;; Multi-key with STR in groups +(set Tstr2 (table [g1 g2 v] (list ["a" "b" "a" "b" "c"] ["x" "x" "y" "y" "x"] [10 20 30 40 50]))) +(count (select {s: (sum v) by: [g1 g2] from: Tstr2})) -- 5 +(sum (at (select {s: (sum v) by: [g1 g2] from: Tstr2}) 's)) -- 150 + +;; ──────────────────────────────────────────────────────────────────── +;; No-agg multi-key by-group with WHERE (lines 6265-6275) +;; select with no output expressions, multi-key SYM vector by:, and where: +;; → hits lines 6266-6271: rebuilds key ops from SYM vector after filtering +;; +;; Requires: +;; n_aggs==0, n_nonaggs==0 (no output columns other than by:/from:/where:) +;; where_expr != NULL (triggers 6253 filtered path) +;; by_expr->type == RAY_SYM && ray_len>1 (multi-key vector → hits 6265 branch) +;; ──────────────────────────────────────────────────────────────────── +(set Tnomk (table [k1 k2 val] (list ['A 'A 'B 'B] [1 1 2 2] [10 20 30 40]))) +;; After WHERE val>15: rows k1='A,k2=1,val=20 and k1='B,k2=2,val=30/40 remain +;; Distinct (k1,k2) pairs: ('A,1) and ('B,2) → 2 groups +(count (select {by: [k1 k2] from: Tnomk where: (> val 15)})) -- 2 + +;; Three-key version: lines 6268 loop iterates 3 times +(set Tnomk3 (table [k1 k2 k3 val] (list ['A 'A 'B 'B 'C] ['X 'X 'Y 'Y 'Z] [1 1 2 2 3] [5 10 15 20 25]))) +;; WHERE val > 8 filters out row 0 (val=5) +;; Remaining rows: (A,X,1,10),(B,Y,2,15),(B,Y,2,20),(C,Z,3,25) +;; Distinct groups: (A,X,1),(B,Y,2),(C,Z,3) → 3 groups +(count (select {by: [k1 k2 k3] from: Tnomk3 where: (> val 8)})) -- 3 + +;; ──────────────────────────────────────────────────────────────────── +;; No-agg by-group with LIST column (lines 6789-6800) +;; select {by: k from: T} where T has a LIST column (non-key) +;; SYM key → use_eval_group=0 (no STR/LIST key) +;; No agg/nonagg → n_aggs=0, n_nonaggs=0 → else branch at 6248 +;; LIST column → hits ct==RAY_LIST branch at 6787-6800 +;; ──────────────────────────────────────────────────────────────────── +(set Tnoagg_lst (table [k lst] (list ['A 'B 'A 'B] (list [1 2] [3 4] [5 6] [7 8])))) +;; Each group's first-of-group: +;; A: first occurrence is row 0 → lst=[1,2] +;; B: first occurrence is row 1 → lst=[3,4] +(count (select {by: k from: Tnoagg_lst})) -- 2 + +;; ──────────────────────────────────────────────────────────────────── +;; expr_contains_call_named recursive return (line 1402 in query.c) +;; Fires when is_group_dag_agg_expr recurses into a nested expression +;; and finds "distinct" at depth > 1 via the for-loop at line 1400. +;; +;; (sum (+ (distinct v) 1)) — elems[1] = (+ (distinct v) 1) +;; expr_contains_call_named((+(distinct v) 1), "distinct", 8): +;; head="+", not "distinct" → no early return +;; recurse elems[1]=(distinct v) → head="distinct" → returns 1 at 1398 +;; outer loop: return 1 at LINE 1402 ← covered here +;; is_group_dag_agg_expr returns false → goes to nonagg_exprs +;; per-group eval: distinct(v) then +1 then sum +;; ──────────────────────────────────────────────────────────────────── +(set Tecdist (table [k v] (list ['A 'B 'A 'B] [1 2 3 4]))) +;; Group A: v=[1,3] → distinct=[1,3] → +1=[2,4] → sum=6 +;; Group B: v=[2,4] → distinct=[2,4] → +1=[3,5] → sum=8 +(sum (at (select {s: (sum (+ (distinct v) 1)) by: k from: Tecdist}) 's)) -- 14 diff --git a/test/rfl/query/query_emit_filter_coverage.rfl b/test/rfl/query/query_emit_filter_coverage.rfl new file mode 100644 index 00000000..80975856 --- /dev/null +++ b/test/rfl/query/query_emit_filter_coverage.rfl @@ -0,0 +1,111 @@ +;; Coverage for group emit-filter fast paths in `src/ops/query.c`: +;; +;; `match_group_count_emit_filter` (~line 1613): +;; Recognizes `(select {from: (select {n: (count v) by: k from: T}) where: (> n K)})` +;; Sets ray_group_emit_filter to skip groups with count <= K during emission. +;; Exercises lines 1644-1663 including the `out->enabled = 1` path at line 1656. +;; +;; `match_group_desc_count_take` (~line 1680): +;; Recognizes `(select {n: (count v) by: k from: T desc: n take: K})` +;; Sets top_count_take to emit only top-K groups by count (highest first). +;; Exercises lines 1699-1718. +;; +;; `positive_take_i64` with I32 take value (~line 1673). + +;; ──────────────────────────────────────────────────────────────────── +;; match_group_count_emit_filter: +;; outer WHERE on count col of inner group-by select +;; ──────────────────────────────────────────────────────────────────── +(set Tmce (table [k v] (list ['A 'B 'C 'A 'B 'A 'C 'A] [1 2 3 4 5 6 7 8]))) +;; A: count=4, B: count=2, C: count=2 +;; Outer WHERE (> n 2) keeps only A +(count (select {from: (select {n: (count v) by: k from: Tmce}) where: (> n 2)})) -- 1 +(at (at (select {from: (select {n: (count v) by: k from: Tmce}) where: (> n 2)}) 'n) 0) -- 4 + +;; Outer WHERE (> n 1) keeps A and either B or C +(count (select {from: (select {n: (count v) by: k from: Tmce}) where: (> n 1)})) -- 3 + +;; Outer WHERE (> n 3) keeps only A +(count (select {from: (select {n: (count v) by: k from: Tmce}) where: (> n 3)})) -- 1 + +;; With additional aggregates in the inner select: +;; The emit filter matches on the count field regardless of other aggs +(count (select {from: (select {n: (count v) s: (sum v) by: k from: Tmce}) where: (> n 2)})) -- 1 +(at (at (select {from: (select {n: (count v) s: (sum v) by: k from: Tmce}) where: (> n 2)}) 's) 0) -- 19 + +;; Count field not first — agg_index should skip non-count aggs +(count (select {from: (select {s: (sum v) n: (count v) by: k from: Tmce}) where: (> n 1)})) -- 3 + +;; Larger table to stress the fast path +(set Tstress (table [k v] (list (take ['A 'B 'C 'D 'E] 100) (til 100)))) +;; A:count=20, B:20, C:20, D:20, E:20 → (> n 15) keeps all 5 +(count (select {from: (select {n: (count v) by: k from: Tstress}) where: (> n 15)})) -- 5 +;; (> n 20) keeps none (count=20, not > 20) +(count (select {from: (select {n: (count v) by: k from: Tstress}) where: (> n 20)})) -- 0 + +;; ──────────────────────────────────────────────────────────────────── +;; match_group_desc_count_take: +;; desc: + take: with count field = top-K by frequency +;; ──────────────────────────────────────────────────────────────────── +(set Tmdt (table [k v] (list ['A 'B 'C 'A 'B 'A 'B 'C] [1 2 3 4 5 6 7 8]))) +;; A: count=3, B: count=3, C: count=2 +;; Top-2 by count desc: A and B (sum of counts = 6) +(count (select {n: (count v) by: k from: Tmdt desc: n take: 2})) -- 2 +(sum (at (select {n: (count v) by: k from: Tmdt desc: n take: 2}) 'n)) -- 6 + +;; Top-1 by count desc +(count (select {n: (count v) by: k from: Tmdt desc: n take: 1})) -- 1 + +;; Top-3 by count desc: all groups +(count (select {n: (count v) by: k from: Tmdt desc: n take: 3})) -- 3 +(sum (at (select {n: (count v) by: k from: Tmdt desc: n take: 3}) 'n)) -- 8 + +;; Different key distribution +(set Tmdt2 (table [k v] (list ['X 'Y 'Z 'X 'X 'X 'Y 'Y] [1 2 3 4 5 6 7 8]))) +;; X:count=4, Y:count=3, Z:count=1 +;; Top-2 by count desc: X(4) + Y(3) = 7 +(count (select {n: (count v) by: k from: Tmdt2 desc: n take: 2})) -- 2 +(sum (at (select {n: (count v) by: k from: Tmdt2 desc: n take: 2}) 'n)) -- 7 + +;; Combined with additional sum aggregate +(count (select {n: (count v) s: (sum v) by: k from: Tmdt2 desc: n take: 2})) -- 2 + +;; ──────────────────────────────────────────────────────────────────── +;; positive_take_i64 with I32 take value (line 1673 in query.c) +;; match_group_desc_count_take calls positive_take_i64(take_expr). +;; When take value is I32 atom (e.g. 2i), line 1673 executes. +;; ──────────────────────────────────────────────────────────────────── +(set Tpi32 (table [k v] (list ['A 'B 'C 'A 'B 'A 'B 'C] [1 2 3 4 5 6 7 8]))) +;; A:3, B:3, C:2 — take: 2i (I32 atom instead of I64) +;; → positive_take_i64 hits -RAY_I32 branch at line 1673 +(count (select {n: (count v) by: k from: Tpi32 desc: n take: 2i})) -- 2 + +;; ──────────────────────────────────────────────────────────────────── +;; prefilter_computed_by path (lines ~3830-3859): +;; Activated when by-clause is a dict ({key: computed_expr}), +;; has_computed_by_val=true (key != computed_expr), AND +;; match_group_desc_count_take matches (count + desc: + take:), AND +;; where_expr is non-null. +;; +;; Prereqs: +;; 1. by: {g: k} → by-dict, dep_candidate fails (g≠k), has_computed_by_val=true +;; 2. where: (> v 10) → where_expr non-null +;; 3. n: (count v) + desc: n + take: 2 → match_group_desc_count_take returns true +;; +;; After prefilter at 3844: WHERE applied first to get filtered table, +;; then the by-dict is evaluated and groups computed on filtered rows. +;; ──────────────────────────────────────────────────────────────────── +(set Tpf (table [k v] (list [1 2 3 1 2 3] [5 15 25 8 18 28]))) +;; WHERE v>10 filters: rows (k=2,v=15),(k=3,v=25),(k=2,v=18),(k=3,v=28) remain +;; Groups by g=k: k=2 has 2 rows, k=3 has 2 rows, k=1 filtered out +;; desc: n take: 2 → both groups have n=2 → result has 2 groups +(count (select {n: (count v) by: {g: k} from: Tpf where: (> v 10) desc: n take: 2})) -- 2 +(sum (at (select {n: (count v) by: {g: k} from: Tpf where: (> v 10) desc: n take: 2}) 'n)) -- 4 + +;; Single-group result: only k=3 has count > 1 among highly filtered rows +(set Tpf2 (table [k v] (list [1 2 3 3 3] [5 15 25 30 35]))) +;; WHERE v>20: rows (k=3,v=25),(k=3,v=30),(k=3,v=35) remain +;; Groups: k=3 has count=3, others filtered +;; desc: n take: 1 → top group by count +(count (select {n: (count v) by: {g: k} from: Tpf2 where: (> v 20) desc: n take: 1})) -- 1 +(at (at (select {n: (count v) by: {g: k} from: Tpf2 where: (> v 20) desc: n take: 1}) 'n) 0) -- 3 diff --git a/test/rfl/query/query_evalgroup_coverage.rfl b/test/rfl/query/query_evalgroup_coverage.rfl new file mode 100644 index 00000000..45c31df6 --- /dev/null +++ b/test/rfl/query/query_evalgroup_coverage.rfl @@ -0,0 +1,101 @@ +;; Coverage for eval-level group paths in `src/ops/query.c`: +;; +;; Median per-group-buf fast path, single-key STR (lines ~5371-5443): +;; Reached when by: k is a STR key → use_eval_group=1 → single-key +;; path at ~5304. is_med_call returns true → flatten groups into +;; (ix_buf, off, cnt) and call ray_median_per_group_buf kernel. +;; +;; Median per-group-buf fast path, multi-key with STR (lines ~4946-5009): +;; Reached when by: [k1 k2] with STR key → use_eval_group=1 → composite +;; key path at ~4856. Same is_med_call fast path with out_groups slicing. +;; +;; apply_sort_take SYM-vector asc/desc clause (lines ~443-456): +;; Reached when asc: [col1 col2] (vector form) is used in a by-group +;; select that routes through apply_sort_take (eval-level group result). +;; Each vec element is added to n_keys. +;; +;; apply_sort_take multi-key TopK (lines ~487-499): +;; Reached when n_keys > 1 in apply_sort_take fast path. +;; Calls ray_topk_table_multi for bounded-heap multi-key selection. + +;; ──────────────────────────────────────────────────────────────────── +;; Median by-group with single STR key (lines 5404-5443) +;; STR key forces use_eval_group → single-key eval path → 5304+ +;; ──────────────────────────────────────────────────────────────────── +(set Tms1 (table [k v] (list (list "aa" "bb" "aa" "bb" "cc" "aa") [1 3 5 7 9 11]))) +;; aa: rows 0,2,5 → v=[1,5,11] → median = 5.0 +;; bb: rows 1,3 → v=[3,7] → median = 5.0 +;; cc: row 4 → v=[9] → median = 9.0 +(count (select {m: (med v) by: k from: Tms1})) -- 3 + +;; Verify median value for first group (aa in insertion order) = 5.0 +(at (at (select {m: (med v) by: k from: Tms1}) 'm) 0) -- 5.0 + +;; Sum of medians: aa=5.0 + bb=5.0 + cc=9.0 = 19.0 +(sum (at (select {m: (med v) by: k from: Tms1}) 'm)) -- 19.0 + +;; F64 values with STR key +(set Tms2 (table [k v] (list (list "x" "y" "x" "y" "x") [1.0 3.0 5.0 7.0 9.0]))) +;; x: v=[1.0,5.0,9.0] → median = 5.0; y: v=[3.0,7.0] → median = 5.0 +(count (select {m: (med v) by: k from: Tms2})) -- 2 +(sum (at (select {m: (med v) by: k from: Tms2}) 'm)) -- 10.0 + +;; Median combined with another aggregate in same STR-key query +(set Tms3 (table [k v] (list (list "p" "q" "p" "q" "p") [10 20 30 40 50]))) +;; p: v=[10,30,50] → med=30, sum=90; q: v=[20,40] → med=30, sum=60 +(count (select {m: (med v) s: (sum v) by: k from: Tms3})) -- 2 +(sum (at (select {m: (med v) s: (sum v) by: k from: Tms3}) 's)) -- 150 + +;; ──────────────────────────────────────────────────────────────────── +;; Median by-group with multi-key (one STR key) (lines 4978-5009) +;; STR key in multi-key forces use_eval_group → composite-key path at ~4856 +;; ──────────────────────────────────────────────────────────────────── +(set Tmm1 (table [k1 k2 v] (list (list "aa" "bb" "aa" "bb") [1 1 2 2] [10 20 30 40]))) +;; Groups: (aa,1)=[10]→med=10.0, (bb,1)=[20]→med=20.0, (aa,2)=[30]→med=30.0, (bb,2)=[40]→med=40.0 +(count (select {m: (med v) by: [k1 k2] from: Tmm1})) -- 4 + +;; Another multi-key case: STR+SYM keys +(set Tmm2 (table [k1 k2 v] (list (list "a" "b" "a" "b" "a") ['X 'X 'Y 'Y 'X] [1 2 3 4 5]))) +;; (a,X)=[1,5]→med=3.0, (b,X)=[2]→med=2.0, (a,Y)=[3]→med=3.0, (b,Y)=[4]→med=4.0 +(count (select {m: (med v) by: [k1 k2] from: Tmm2})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; apply_sort_take: vector-form asc/desc clause (lines 444-456) +;; and multi-key TopK fast path (lines 487-499) +;; +;; STR by-key → eval_group result → apply_sort_take called. +;; asc: [s m] uses a SYM VECTOR → lines 444-450 parse each sym. +;; With 2 sort keys: n_keys=2 → lines 487-499 call ray_topk_table_multi. +;; ──────────────────────────────────────────────────────────────────── + +;; Build a fixture with STR key and multiple aggregation output columns +;; that we can use as sort keys: +(set Tst1 (table [k v w] (list (list "aa" "bb" "cc" "dd" "ee" "ff" "gg" "hh") [5 3 1 7 2 8 4 6] [80 60 40 20 10 30 70 50]))) +;; by: k (STR) → eval_group → result has columns k, s, t +;; asc: [s t] → SYM vector form with 2 elements → n_keys=2 → multi-key TopK +(count (select {s: (sum v) t: (sum w) by: k from: Tst1 asc: [s t] take: 3})) -- 3 + +;; Top-5 by (s asc, t asc) +(count (select {s: (sum v) t: (sum w) by: k from: Tst1 asc: [s t] take: 5})) -- 5 + +;; Single-element vector form [s] → n_keys=1 → single-key TopK (still exercises lines 444-450) +(count (select {s: (sum v) t: (sum w) by: k from: Tst1 asc: [s] take: 4})) -- 4 + +;; desc: [t s] → n_keys=2 → multi-key TopK descending +(count (select {s: (sum v) t: (sum w) by: k from: Tst1 desc: [t s] take: 2})) -- 2 + + +;; ──────────────────────────────────────────────────────────────────── +;; NOTE: Lines 5389-5394 (ray_eval fallback for computed agg arg) +;; Requires agg_col_expr to be a non-direct-ref expression like (+ v 1), +;; BUT v must be in scope for ray_eval to succeed. The eval_group path +;; does not push table columns to scope before the aggregation loop, +;; so ray_eval(agg_col_expr) on a table column reference would fail +;; with "error: name". Unreachable from basic RFL. +;; +;; NOTE: Lines 5653-5663 (STR column with nulls in first-of-group) +;; require a table with a null-marked STR column. There is no direct +;; RFL literal for null STR atoms (0Ns is a null SYM, not STR). +;; This path requires constructing a STR column via I/O or internal +;; operations — left as unreachable from basic RFL. +;; ──────────────────────────────────────────────────────────────────── diff --git a/test/rfl/query/query_multikey_group_coverage.rfl b/test/rfl/query/query_multikey_group_coverage.rfl new file mode 100644 index 00000000..752dfe03 --- /dev/null +++ b/test/rfl/query/query_multikey_group_coverage.rfl @@ -0,0 +1,79 @@ +;; Coverage for multi-key group-by paths in `src/ops/query.c`: +;; `groups_to_pair_list` (line ~110): typed key switch (I32/I16/BOOL/F64 branches) +;; `bounded_multikey_count_take_candidate` (line ~1759): multi-key + count-only + take +;; `count(distinct)` per-group with multi-key by +;; `positive_take_i64` with I32 take expression (line ~1673) +;; +;; These paths are reached when the by-clause is a list of 2+ column names +;; (composite key path), not the fused DAG single-key fast path. + +;; ──────────────────────────────────────────────────────────────────── +;; I32 composite key — groups_to_pair_list RAY_I32 branch +;; ──────────────────────────────────────────────────────────────────── +(set Ti32m (table [k1 k2 v] (list (as 'I32 [1 2 1 2 3]) (as 'I32 [10 10 20 20 10]) [100 200 300 400 500]))) +;; Groups: (1,10)=100, (2,10)=200, (1,20)=300, (2,20)=400, (3,10)=500 +(count (select {s: (sum v) by: [k1 k2] from: Ti32m})) -- 5 +(sum (at (select {s: (sum v) by: [k1 k2] from: Ti32m}) 's)) -- 1500 + +;; ──────────────────────────────────────────────────────────────────── +;; I16 composite key — groups_to_pair_list RAY_I16 branch +;; ──────────────────────────────────────────────────────────────────── +(set Ti16m (table [k1 k2 v] (list (as 'I16 [1 2 1 2]) (as 'I16 [10 10 20 20]) [5 10 15 20]))) +(count (select {s: (sum v) by: [k1 k2] from: Ti16m})) -- 4 +(sum (at (select {s: (sum v) by: [k1 k2] from: Ti16m}) 's)) -- 50 +(count (select {n: (count v) by: [k1 k2] from: Ti16m})) -- 4 +(sum (at (select {n: (count v) by: [k1 k2] from: Ti16m}) 'n)) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; BOOL composite key — groups_to_pair_list RAY_BOOL/RAY_U8 branch +;; ──────────────────────────────────────────────────────────────────── +(set Tbm (table [k1 k2 v] (list [true false true false true] [true true false false true] [1 2 3 4 5]))) +;; Groups: (T,T)=[1,5], (F,T)=[2], (T,F)=[3], (F,F)=[4] +(count (select {s: (sum v) by: [k1 k2] from: Tbm})) -- 4 +(sum (at (select {s: (sum v) by: [k1 k2] from: Tbm}) 's)) -- 15 +(count (select {n: (count v) by: [k1 k2] from: Tbm})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; F64 composite key — groups_to_pair_list RAY_F64 branch +;; ──────────────────────────────────────────────────────────────────── +(set Tf64m (table [k1 k2 v] (list [1.0 2.0 1.0 2.0 3.0] [10.0 10.0 20.0 20.0 10.0] [1 2 3 4 5]))) +(count (select {s: (sum v) by: [k1 k2] from: Tf64m})) -- 5 +(sum (at (select {s: (sum v) by: [k1 k2] from: Tf64m}) 's)) -- 15 + +;; ──────────────────────────────────────────────────────────────────── +;; bounded_multikey_count_take_candidate: +;; multi-key + count-only + take (no sort) + limit <= max_groups +;; ──────────────────────────────────────────────────────────────────── +(set Tbtc (table [g1 g2 v] (list ['A 'A 'B 'B 'B 'C 'C 'A] ['X 'X 'Y 'Y 'Z 'X 'Y 'Z] [1 2 3 4 5 6 7 8]))) +;; 6 groups total: (A,X)=2, (B,Y)=2, (B,Z)=1, (C,X)=1, (C,Y)=1, (A,Z)=1 +;; take: 3 → returns first 3 groups encountered +(count (select {n: (count v) by: [g1 g2] from: Tbtc take: 3})) -- 3 +(sum (at (select {n: (count v) by: [g1 g2] from: Tbtc take: 3}) 'n)) -- 5 + +;; take: 1 → only the first group +(count (select {n: (count v) by: [g1 g2] from: Tbtc take: 1})) -- 1 + +;; ──────────────────────────────────────────────────────────────────── +;; count(distinct) with multi-key by-group → count_distinct_per_group_groups path +;; Uses groups_to_pair_list (via the LIST groups path) +;; ──────────────────────────────────────────────────────────────────── +(set Tcd2 (table [g1 g2 v] (list ['A 'A 'B 'B 'A 'B] ['X 'X 'Y 'Y 'X 'Y] [1 2 3 4 5 6]))) +;; (A,X): distinct v = {1,2,5} = 3; (B,Y): distinct v = {3,4,6} = 3 +(count (select {cd: (count (distinct v)) by: [g1 g2] from: Tcd2})) -- 2 +(sum (at (select {cd: (count (distinct v)) by: [g1 g2] from: Tcd2}) 'cd)) -- 6 + +;; count(distinct) with duplicate values within group +(set Tcd3 (table [g1 g2 v] (list ['A 'A 'B 'B 'A 'A 'B] ['X 'X 'Y 'Y 'X 'X 'Y] [1 1 2 2 3 1 2]))) +;; (A,X): v={1,1,3,1} → distinct={1,3} = 2; (B,Y): v={2,2,2} → distinct={2} = 1 +(count (select {cd: (count (distinct v)) by: [g1 g2] from: Tcd3})) -- 2 +(sum (at (select {cd: (count (distinct v)) by: [g1 g2] from: Tcd3}) 'cd)) -- 3 + +;; ──────────────────────────────────────────────────────────────────── +;; Multi-key + non-agg projection with inner-agg (nonagg_eval_per_group) +;; with I32 keys — exercises groups_to_pair_list + nonagg path +;; ──────────────────────────────────────────────────────────────────── +(set Tna32 (table [k1 k2 v] (list (as 'I32 [1 2 1 2 1]) (as 'I32 [10 20 10 20 10]) [5 10 15 20 25]))) +;; (1,10): v=[5,15,25] → max=25, min=5 → diff=20 +;; (2,20): v=[10,20] → max=20, min=10 → diff=10 +(count (select {r: (- (max v) (min v)) by: [k1 k2] from: Tna32})) -- 2 +(sum (at (select {r: (- (max v) (min v)) by: [k1 k2] from: Tna32}) 'r)) -- 30 diff --git a/test/rfl/query/query_pearson_coverage.rfl b/test/rfl/query/query_pearson_coverage.rfl new file mode 100644 index 00000000..3e5ecfa6 --- /dev/null +++ b/test/rfl/query/query_pearson_coverage.rfl @@ -0,0 +1,54 @@ +;; Coverage for pearson_corr binary-agg paths in `src/ops/query.c`: +;; +;; Non-by scalar reduction with pearson_corr (lines ~6854-6916): +;; `(select {r: (pearson_corr x y) from: T})` +;; Lines 6878-6909: iterate dict, op=OP_PEARSON_CORR → compile second input, +;; set s_has_binary, lines 6912-6914: call ray_group2 with s_has_binary. +;; +;; By-group pearson_corr (lines ~5904-5929): +;; Lines 5925-5929: op==OP_PEARSON_CORR in agg compilation loop. +;; Handled via exec_group_pearson_rowform fast path. + +;; ──────────────────────────────────────────────────────────────────── +;; Non-by scalar reduction: pearson_corr +;; Lines 6896-6909: op==OP_PEARSON_CORR → compile second input + s_has_binary=1 +;; Lines 6912-6914: ray_group2 called (s_has_binary branch) +;; ──────────────────────────────────────────────────────────────────── +(set Tp1 (table [x y] (list [1 2 3 4 5] [2 4 6 8 10]))) +;; Non-by select with pearson_corr → returns 1-row table +(count (select {r: (pearson_corr x y) from: Tp1})) -- 1 + +;; Anti-correlated pair +(set Tp2 (table [x y] (list [1 2 3 4 5] [-1 -2 -3 -4 -5]))) +(count (select {r: (pearson_corr x y) from: Tp2})) -- 1 + +;; pearson_corr with WHERE predicate +(set Tp4 (table [x y z] (list [1 2 3 4] [2 4 6 8] ['A 'B 'A 'B]))) +(count (select {r: (pearson_corr x y) from: Tp4 where: (== z 'A)})) -- 1 + +;; ──────────────────────────────────────────────────────────────────── +;; By-group pearson_corr: fused group with binary agg +;; Lines 5925-5929: op==OP_PEARSON_CORR in group agg compilation loop +;; ──────────────────────────────────────────────────────────────────── +(set Tp5 (table [k x y] (list ['A 'A 'B 'B] [1 2 3 4] [2 4 6 8]))) +;; Each group: A → x=[1,2], y=[2,4] → corr=1; B → x=[3,4], y=[6,8] → corr=1 +(count (select {r: (pearson_corr x y) by: k from: Tp5})) -- 2 +(sum (at (select {r: (pearson_corr x y) by: k from: Tp5}) 'r)) -- 2.0 + +;; By-group pearson_corr combined with count +(count (select {n: (count x) r: (pearson_corr x y) by: k from: Tp5})) -- 2 +(sum (at (select {n: (count x) r: (pearson_corr x y) by: k from: Tp5}) 'n)) -- 4 + +;; By-group with 2 keys (exercises n_keys==2 path in ray_group_pearson_rowform) +(set Tp6 (table [k1 k2 x y] (list ['A 'A 'B 'B] [1 1 1 1] [1 2 3 4] [2 4 6 8]))) +(count (select {r: (pearson_corr x y) by: [k1 k2] from: Tp6})) -- 2 + +;; ──────────────────────────────────────────────────────────────────── +;; pearson_corr in eval-group path (STR key) — lines 5473-5484 +;; STR key → use_eval_group=1 → eval-group path. +;; pearson_corr: is_streaming_aggr_unary_call=false, is_agg_expr=true +;; → nonagg_eval_per_group called at line 5473 +;; ──────────────────────────────────────────────────────────────────── +(set Tp7 (table [k x y] (list (list "A" "A" "B" "B") [1 2 3 4] [2 4 6 8]))) +;; A: x=[1,2], y=[2,4] → corr=1.0; B: x=[3,4], y=[6,8] → corr=1.0 +(count (select {r: (pearson_corr x y) by: k from: Tp7})) -- 2 diff --git a/test/rfl/query/query_sort_take_coverage.rfl b/test/rfl/query/query_sort_take_coverage.rfl new file mode 100644 index 00000000..5a4feacb --- /dev/null +++ b/test/rfl/query/query_sort_take_coverage.rfl @@ -0,0 +1,164 @@ +;; Coverage for sort/take paths in `src/ops/query.c`: +;; +;; `apply_sort_take` (line ~349): +;; - take without sort, atom positive (lines ~363-398) +;; - take without sort, atom negative/tail (line ~381-384) +;; - sort + take (multi-key sort), TopK fast path +;; - TopK multi-key path (n_keys > 1) (lines ~487-498) +;; +;; `atom_i64_const` with I32/DATE/TIME/BOOL/U8 types (lines ~1438-1447) +;; Called from `expr_affine_of_sym` and `parse_gt_name_i64` + +;; ──────────────────────────────────────────────────────────────────── +;; take without sort: eval-level group path (LIST keys groups) +;; apply_sort_take at lines 363-409 (sort=false, take_val_expr set) +;; ──────────────────────────────────────────────────────────────────── +(set Tsst (table [k v] (list ["aa" "bb" "aa" "bb" "cc" "dd" "ee"] [1 2 3 4 5 6 7]))) + +;; take: positive atom — slice [0, k) of result +(count (select {s: (sum v) by: k from: Tsst take: 3})) -- 3 +;; Sum is sum of the first 3 groups encountered: aa=4, bb=6, cc=5 → 15 +(sum (at (select {s: (sum v) by: k from: Tsst take: 3}) 's)) -- 15 + +;; take: 1 → only first group +(count (select {s: (sum v) by: k from: Tsst take: 1})) -- 1 + +;; take: bigger than n_groups → clamped to n_groups +(count (select {s: (sum v) by: k from: Tsst take: 100})) -- 5 + +;; ──────────────────────────────────────────────────────────────────── +;; TopK multi-key sort + take → n_keys > 1 path (lines 487-498) +;; Multi-key sort triggers ray_topk_table_multi +;; ──────────────────────────────────────────────────────────────────── +(set Ttopk (table [a b v] (list [3 1 2 3 1 2] [10 20 30 10 20 30] [100 200 300 400 500 600]))) +;; Top-3 by (a, b) ascending +(count (select {from: Ttopk asc: [a b] take: 3})) -- 3 +;; Top-3 by (a desc, b asc) - multi-key mixed direction +(count (select {from: Ttopk asc: b desc: a take: 3})) -- 3 + +;; Flat table (no LIST cols) — topk fast path eligible +(set Tflat (table [x y v] (list [5 3 1 4 2] [10 30 50 20 40] [100 300 500 200 400]))) +;; Top-2 by x asc: rows with x=1(v=500), x=2(v=400) +(count (select {from: Tflat asc: [x y] take: 2})) -- 2 +;; Verify: by x+y asc, top 2 +(count (select {from: Tflat asc: [x y] take: 4})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; atom_i64_const with narrow types: +;; Called from parse_gt_name_i64 which parses (> sym literal) predicates +;; for can_defer_single_key_where → post_group_where_expr +;; The I32/BOOL/U8/I16 branches are uncovered; we can exercise them +;; indirectly through parse_gt_name_i64 via match_group_count_emit_filter: +;; atom_i64_const is called on the threshold atom in (> cnt_col K) +;; When K is a numeric literal, it comes as I64 from the parser +;; but parse_gt_name_i64's threshold switch (1522-1528) covers I32/I16/U8/BOOL +;; These are unreachable from RFL because the RFL parser emits I64 atoms +;; for all integer literals. +;; ──────────────────────────────────────────────────────────────────── + +;; ──────────────────────────────────────────────────────────────────── +;; No-sort take with negative atom (tail rows) — line 380-383 +;; Groups result with count n_groups, take -K = last K groups +;; ──────────────────────────────────────────────────────────────────── +(set Tntake (table [k v] (list ['A 'B 'C 'D 'E] [10 20 30 40 50]))) +;; No-sort with take: -2 should return last 2 groups +;; apply_sort_take: atom_n<0 → start=n_groups-2, amount=2 +(count (select {s: (sum v) by: k from: Tntake take: -2})) -- 2 + +;; No-sort with take: -1 → last 1 group +(count (select {s: (sum v) by: k from: Tntake take: -1})) -- 1 + +;; ──────────────────────────────────────────────────────────────────── +;; TopK with sort + take on a non-by select (apply_sort_take in sort path) +;; DAG path: sort a materialized table using a temporary DAG +;; ──────────────────────────────────────────────────────────────────── +(set Tsortake (table [a b c] (list [3 1 4 1 5 9] [2 6 5 3 5 8] [100 200 300 400 500 600]))) +;; Sort by a asc + take 3 → rows with a={1,1,3}, top-3 from sort +(count (select {from: Tsortake asc: a take: 3})) -- 3 + +;; Sort by b desc + take 2 → rows with b={8,6} +(count (select {from: Tsortake desc: b take: 2})) -- 2 + +;; Sort by [a b] asc + take 4 +(count (select {from: Tsortake asc: [a b] take: 4})) -- 4 + +;; ──────────────────────────────────────────────────────────────────── +;; BOOL group-by result reorder with NULL column (lines 7117-7121) +;; When group-by on BOOL key produces result where first BOOL group +;; doesn't match first row of input (hash may put true before false), +;; the swap path at 7099 reverses row order in all result columns. +;; Lines 7117-7121 fire when a result column has HAS_NULLS set +;; (col->attrs & RAY_ATTR_HAS_NULLS != 0) in the reverse-copy loop. +;; +;; Requires: +;; 1. BOOL key → exec_group puts false before true (numerically: 0 < 1) +;; 2. Input starts with true → first_val=true, result_first=false → +;; first_val != result_first → swap triggered at 7099 +;; 3. A result column has HAS_NULLS after grp_finalize_nulls pass. +;; +;; Use (min v) aggregation: for the true group (all null inputs), +;; grp_finalize_nulls finds NULL_I32 sentinel in result → sets HAS_NULLS. +;; ──────────────────────────────────────────────────────────────────── +;; Table starting with true key: k=[true,false,true,false], v I32 column +;; Inject I32 null into v column where k==true (rows 0 and 2). +;; exec_group puts false first; original first row has k=true → +;; first_val=true != result_first=false → swap triggered at 7099. +;; min(v) for true group = null (all nulls) → grp_finalize_nulls sets HAS_NULLS +;; → lines 7117-7121 execute during column reverse-copy. +(set Tbool_base (table [k v] (list [true false true false] (as 'I32 [10 99 20 30])))) +(set Tbool_null (update {v: 0Ni from: Tbool_base where: k})) +;; Non-agg group-by: DAG first-of-group, swap triggered (no HAS_NULLS from DAG) +(count (select {by: k from: Tbool_null})) -- 2 +;; Agg group-by with min: true group all-null → grp_finalize_nulls sets HAS_NULLS +;; After swap, false group (min=20) is at index 1, true group (min=null) at 0 +(count (select {s: (min v) by: k from: Tbool_null})) -- 2 +(nil? (at (at (select {s: (min v) by: k from: Tbool_null}) 's) 0)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; take: vec[2] range form — lines 400-405 in apply_sort_take +;; When take expr evaluates to an I64 vec of length 2 [start, count], +;; ray_take_fn is called with the range directly. +;; +;; apply_sort_take is called for GROUP-BY selects (not non-group-by). +;; Non-group-by selects with take: vec[2] go through lines 7029-7030. +;; ──────────────────────────────────────────────────────────────────── +(set Trange (table [v] (list [10 20 30 40 50]))) +;; Non-group-by take: [1 3] → goes through lines 7029-7030 (DAG path) +(count (select {v: v from: Trange take: [1 3]})) -- 3 +(at (at (select {v: v from: Trange take: [1 3]}) 'v) 0) -- 20 + +;; take: [0 2] → rows 0,1 +(count (select {v: v from: Trange take: [0 2]})) -- 2 + +;; Group-by take: [start count] vec form → goes through apply_sort_take lines 400-405 +(set Tgrprange (table [k v] (list ['A 'B 'C 'A 'B] [1 2 3 4 5]))) +;; 3 groups: A=5, B=7, C=3. take: [1 2] → skip 1, take 2 → groups B(7) and C(3) +(count (select {s: (sum v) by: k from: Tgrprange take: [1 2]})) -- 2 +;; First group in result (B with sum=7) +(at (at (select {s: (sum v) by: k from: Tgrprange take: [1 2]}) 's) 0) -- 7 + +;; ──────────────────────────────────────────────────────────────────── +;; take: domain error — lines 407-409 in apply_sort_take +;; When take expr is neither an atom nor a vec[2], domain error fires. +;; (Also lines 7032-7034 for non-group-by case) +;; ──────────────────────────────────────────────────────────────────── +;; Non-group-by take: vec[3] → domain at lines 7032-7034 +(select {v: v from: Trange take: [1 2 3]}) !- domain +;; Group-by take: vec[3] → domain at lines 407-409 in apply_sort_take +(select {s: (sum v) by: k from: Tgrprange take: [1 2 3]}) !- domain + +;; ──────────────────────────────────────────────────────────────────── +;; Computed sort key → bad_clause = 1 (lines 454-456 in apply_sort_take) +;; apply_sort_take is called on post-group-by results. When asc:/desc: +;; value is a computed expression (not a SYM name or SYM vec), the +;; code sets bad_clause=1, skips TopK, and falls through to the DAG +;; sort path which passes the result unsorted. +;; +;; NOTE: For non-group-by selects, the main DAG path (line 7007-7009) +;; rejects computed sort keys with a "domain" error BEFORE apply_sort_take. +;; So bad_clause=1 is only reachable via group-by post-sort. +;; ──────────────────────────────────────────────────────────────────── +(set Tcomp (table [k v] (list ['A 'B 'C 'A 'B] [1 2 3 4 5]))) +;; asc: (+ s 0) on group result: s = sum(v) per group. +;; apply_sort_take gets this expression → bad_clause=1 → unsorted take. +(count (select {s: (sum v) from: Tcomp by: k asc: (+ s 0) take: 2})) -- 2 diff --git a/test/rfl/query/query_update_coverage.rfl b/test/rfl/query/query_update_coverage.rfl new file mode 100644 index 00000000..f8674b99 --- /dev/null +++ b/test/rfl/query/query_update_coverage.rfl @@ -0,0 +1,284 @@ +;; Coverage for update paths in `src/ops/query.c`: +;; +;; WHERE-branch update — I32 null propagation (lines ~8553-8555): +;; Cross-type update (I32 col with F64 expr) with null values in +;; orig column. The null-propagation switch covers I32/DATE/TIME case. +;; +;; WHERE-branch update — I64→F64 vector promotion (lines ~8658-8678): +;; expr_vec->type==RAY_I64 && ct==RAY_F64 path. When the update expression +;; returns an I64 vector but the column is F64, each element is promoted. +;; +;; No-WHERE update — LIST column with SYM atom (lines ~8813-8824): +;; ct==RAY_LIST && expr_vec->type==-RAY_SYM. Broadcasts SYM atom as +;; boxed list across all rows of a LIST column. +;; +;; No-WHERE update — I32 null of new column (lines ~8987-8997): +;; Adding a new I32-null column in no-where update broadcast path. +;; +;; NOTE: WHERE-branch LIST+SYM path (lines 8579-8595) is unreachable from +;; RFL because ray_vec_new(RAY_LIST, n) at line 8482 returns an error — +;; RAY_LIST is a container type not supported by ray_vec_new. + +;; ──────────────────────────────────────────────────────────────────── +;; WHERE-branch update: eval fallback when DAG compilation fails (lines 8500-8509) +;; (pow val 2) is NOT in resolve_binary_dag → compile_expr_dag returns NULL +;; → fallback at 8499: push table cols to scope, ray_eval((pow val 2)) +;; Result is F64 (pow returns float) → cross-type I64←F64 update at 8517 +;; ──────────────────────────────────────────────────────────────────── +(set Tu_pow (table [id val] (list [1 2 3 4] [2 3 4 5]))) +;; WHERE id>1 masks rows 1,2,3; pow([2,3,4,5], 2) = [4.0,9.0,16.0,25.0] +;; Row 0 (id=1): unchanged val=2; rows 1-3: val = trunc(pow) = 9,16,25 +(count (update {val: (pow val 2) from: Tu_pow where: (> id 1)})) -- 4 +(at (at (update {val: (pow val 2) from: Tu_pow where: (> id 1)}) 'val) 0) -- 2 +(at (at (update {val: (pow val 2) from: Tu_pow where: (> id 1)}) 'val) 1) -- 9 + +;; ──────────────────────────────────────────────────────────────────── +;; WHERE-branch update: I32 column update with F64 promotion + nulls +;; Lines 8553-8555: null propagation switch — I32 case +;; Need: cross-type vector update (ct==I32, expr_type==F64) AND null in orig col +;; ──────────────────────────────────────────────────────────────────── +(set Ti32 (table [id val] (list [1i 2i 3i] [100i 200i 300i]))) +;; First inject a null into the I32 val column at row id=1 +(set Ti32n (update {val: 0Ni from: Ti32 where: (== id 1i)})) +;; Now do cross-type F64 update on the I32-with-null column +;; Update where id==3i so row 2 is masked, rows 0,1 are unmasked +;; Row 0 (id=1,val=0Ni) is unmasked → null prop fires → switch case RAY_I32 +(set Ti32_u (update {val: (* val 1.5) from: Ti32n where: (== id 3i)})) +;; Count should be 3 (table size unchanged) +(count Ti32_u) -- 3 +;; Row 0 has null (propagated) — nil? should return true +(nil? (at (at Ti32_u 'val) 0)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; WHERE-branch update: I64 expr → F64 column promotion (lines 8658-8678) +;; expr_vec->type==RAY_I64 (whole column) && ct==RAY_F64 +;; Triggered when an I64-typed expression result updates an F64 column +;; ──────────────────────────────────────────────────────────────────── +(set Tf64 (table [id val] (list [1 2 3 4] [10.0 20.0 30.0 40.0]))) +;; Update val (F64) where id > 2 with expression that returns I64 vec +;; The I64→F64 promotion at 8657-8678 fires here +(set Tf64_u (update {val: (+ (as 'I64 val) 5) from: Tf64 where: (> id 2)})) +(count Tf64_u) -- 4 +(at (at Tf64_u 'val) 0) -- 10.0 +(at (at Tf64_u 'val) 2) -- 35.0 + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: LIST column with SYM atom broadcast (lines 8813-8824) +;; ct==RAY_LIST, expr_vec==-SYM atom → broadcast boxed list to all rows +;; ──────────────────────────────────────────────────────────────────── +(set Tlst2 (table [k lst] (list ['A 'B 'C] (list [1 2] [3 4] [5 6])))) +;; Update all rows: lst becomes 'broadcast_sym everywhere +(set Tlst2_u (update {lst: 'broadcast_sym from: Tlst2})) +(count Tlst2_u) -- 3 +(count (at Tlst2_u 'lst)) -- 3 + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: I32 null of EXISTING column broadcast (lines 8866-8876) +;; ct = orig_col->type = RAY_I32, expr = 0Ni → switch case RAY_I32 +;; ──────────────────────────────────────────────────────────────────── +(set Ti32b (table [id val] (list [1i 2i 3i] [10i 20i 30i]))) +;; Update existing I32 val column with 0Ni (null) — no-WHERE broadcast +;; Lines 8851-8877: RAY_ATOM_IS_NULL(expr_vec)=true, ct=RAY_I32 → case 8866 +(set Ti32b_u (update {val: 0Ni from: Ti32b})) +(count Ti32b_u) -- 3 +(nil? (at (at Ti32b_u 'val) 0)) -- true +(nil? (at (at Ti32b_u 'val) 1)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: F64 null of EXISTING F64 column (lines 8856-8860) +;; ct = orig_col->type = RAY_F64, expr = 0Nf → switch case RAY_F64 +;; ──────────────────────────────────────────────────────────────────── +(set Tf64b (table [id val] (list [1 2 3] [10.0 20.0 30.0]))) +;; Update existing F64 val column with 0Nf → null broadcast to all rows +(set Tf64b_u (update {val: 0Nf from: Tf64b})) +(count Tf64b_u) -- 3 +(nil? (at (at Tf64b_u 'val) 0)) -- true +(nil? (at (at Tf64b_u 'val) 1)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: I16 null broadcast (lines 8871-8874) +;; ct = RAY_I16, expr = 0Nh (type -RAY_I16) → ok=1 → null switch case I16 +;; ──────────────────────────────────────────────────────────────────── +(set Ti16b (table [id val] (list [1i 2i 3i] (as 'I16 [10 20 30])))) +;; Update I16 val column with 0Nh (I16 null atom) → null broadcast +(set Ti16b_u (update {val: 0Nh from: Ti16b})) +(count Ti16b_u) -- 3 +(nil? (at (at Ti16b_u 'val) 0)) -- true +(nil? (at (at Ti16b_u 'val) 1)) -- true + +;; NOTE: Lines 8876 default (BOOL/U8/SYM null broadcast) are unreachable: +;; No BOOL/U8/SYM null literal syntax exists (no 0Nb suffix in the parser). +;; For BOOL columns, ct=RAY_BOOL: ok=(expr_vec->type==-RAY_BOOL) requires a +;; -RAY_BOOL atom, which the parser cannot produce. + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: F64 null of NEW column broadcast (lines 8977-8981) +;; Adding a NEW F64-null column in no-where update +;; ct = -expr_vec->type = RAY_F64, RAY_ATOM_IS_NULL=true → case 8977 +;; ──────────────────────────────────────────────────────────────────── +(set Tnewf (table [id val] (list [1 2 3] [10 20 30]))) +;; Add new F64 null column (0Nf) +(set Tnewf_u (update {fval: 0Nf from: Tnewf})) +(count Tnewf_u) -- 3 +(nil? (at (at Tnewf_u 'fval) 0)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: I32 null of new-column broadcast (lines 8987-8997) +;; Adding a NEW I32-null column in no-where update +;; ──────────────────────────────────────────────────────────────────── +(set Tnew (table [id val] (list [1 2 3] [10 20 30]))) +;; Add new I32 column with null value (0Ni) +;; Lines 8976-8997: new column broadcast with 0Ni → switch case RAY_I32 +(set Tnew_u (update {extra: 0Ni from: Tnew})) +(count Tnew_u) -- 3 +(nil? (at (at Tnew_u 'extra) 0)) -- true + +;; ──────────────────────────────────────────────────────────────────── +;; append_atom_to_col type error paths (lines 8204, 8209, 8214, 8219, 8227) +;; Wrong-type atoms inserted into typed columns. +;; ──────────────────────────────────────────────────────────────────── +;; Line 8204: I64 column, non-I64 atom (SYM atom) → "error: type" +(insert (table [k] (list [1 2])) (list 'bad)) !- type +;; Line 8209: SYM column, non-SYM atom (I64) → "error: type" +(insert (table [k] (list ['a 'b])) (list 123)) !- type +;; Line 8214: F64 column, non-F64/I64 atom (SYM) → "error: type" +(insert (table [k] (list [1.0 2.0])) (list 'bad)) !- type +;; Line 8219: BOOL column, non-BOOL atom (I64) → "error: type" +(insert (table [k v] (list [1 2] [true false])) (list 3 123)) !- type +;; Line 8227: STR column, non-STR atom (I64) → "error: type" +(insert (table [k] (list ["a" "b"])) (list 123)) !- type + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: I64 atom into F64 col broadcast (lines 8838-8840) +;; ct==RAY_F64, expr_vec==−RAY_I64 atom (integer literal) +;; The F64←I64 promotion fires in the scalar broadcast path +;; ──────────────────────────────────────────────────────────────────── +(set Tf64atom (table [id val] (list [1 2 3] [1.0 2.0 3.0]))) +;; Update F64 val column with integer 42 → I64 atom promoted to F64 +(set Tf64atom_u (update {val: 42 from: Tf64atom})) +(count Tf64atom_u) -- 3 +(at (at Tf64atom_u 'val) 0) -- 42.0 +(at (at Tf64atom_u 'val) 1) -- 42.0 + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: fallback eval when DAG compilation fails (lines 8787-8796) +;; (pow val 2) NOT in resolve_binary_dag → expr_vec=NULL → fallback eval +;; ──────────────────────────────────────────────────────────────────── +(set Tpow_nw (table [id val] (list [1 2 3] [2.0 3.0 4.0]))) +;; Update val (F64) with pow(val,2) — forces eval fallback in no-WHERE path +(set Tpow_nw_u (update {val: (pow val 2) from: Tpow_nw})) +(count Tpow_nw_u) -- 3 +(at (at Tpow_nw_u 'val) 0) -- 4.0 +(at (at Tpow_nw_u 'val) 2) -- 16.0 + +;; ──────────────────────────────────────────────────────────────────── +;; No-WHERE update: type error for incompatible vectors (lines 8922-8924) +;; expr_vec is a vector (type>0) and not numeric/SYM/LIST interop → error +;; e.g. updating I64 column with a STR vector → "error: type" +;; ──────────────────────────────────────────────────────────────────── +;; (update {id: (as 'STR id) from: T}) — can't update I64 col with STR vec +;; NOTE: this requires the dag compiler to fail and produce an STR vec, +;; but in practice any STR expression on an I64 column errors first at +;; the DAG type check level. The path at 8922-8924 is unreachable because +;; no-WHERE DAG path with type mismatch is caught before we reach there. + +;; ──────────────────────────────────────────────────────────────────── +;; Insert into table with BOOL column (append_atom_to_col line 8217) +;; append_atom_to_col has ct==RAY_BOOL branch; covered via insert row. +;; ──────────────────────────────────────────────────────────────────── +(set Tbool_ins (table [k v] (list [1 2] [true false]))) +;; Insert a new row with BOOL value → append_atom_to_col ct==RAY_BOOL branch +(count (insert Tbool_ins (list 3 true))) -- 3 +(at (at (insert Tbool_ins (list 3 false)) 'v) 2) -- false + +;; ──────────────────────────────────────────────────────────────────── +;; Upsert with SYM key name (line 9495 in query.c) +;; key_sym->type == -RAY_SYM branch in the upsert partial-update path +;; Only triggered when row is a TABLE (not dict/list), because lines +;; 9492-9516 are inside the `if (row->type == RAY_TABLE)` block. +;; ──────────────────────────────────────────────────────────────────── +(set Tups (table [id val] (list [1 2 3] [10 20 30]))) +;; Upsert with TABLE row and SYM key_sym → hits line 9494 (-RAY_SYM) +(count (upsert Tups 'id (table [id val] (list [4] [40])))) -- 4 +(at (at (upsert Tups 'id (table [id val] (list [2] [99]))) 'val) 1) -- 99 + +;; ──────────────────────────────────────────────────────────────────── +;; xbar on I16 column (lines 8059-8068 in query.c) +;; ray_xbar_fn dispatches on out_type; I16 branch at line 8058 +;; ──────────────────────────────────────────────────────────────────── +;; I16 xbar: (xbar I16_vec bucket) → I16 floor-division loop +(xbar (as 'I16 [10 20 30 40 50]) 10) -- (as 'I16 [10 20 30 40 50]) +(xbar (as 'I16 [13 27 35 44 51]) 10) -- (as 'I16 [10 20 30 40 50]) +;; Mixed: some negative values (floor-div behaviour) +(at (xbar (as 'I16 [7]) 5) 0) -- 5h + +;; ──────────────────────────────────────────────────────────────────── +;; insert into non-TABLE objects — lines 9057-9079, 9121-9194 +;; ──────────────────────────────────────────────────────────────────── + +;; Lines 9073-9075: insert into non-list, non-vec type → type error +(insert (dict [a] (list 1)) 2) !- type + +;; Lines 9077-9079: insert vec with n=4 args → domain error +(insert [1 2 3] 1 2 3) !- domain + +;; Lines 9121-9124: insert into vec with type mismatch — 2-arg append +;; F64 atom into I64 vec, or F64 vec into I64 vec → type error +(insert [1 2 3] 1.0) !- type +(insert [1 2 3] [1.0 2.0]) !- type + +;; Lines 9130-9132: 3-arg insert, idx eval returns error +;; raise "domain" as idx → propagates error +(insert [1 2 3] (raise "domain") 5) !- domain + +;; Lines 9135-9138: 3-arg insert, val eval returns error +(insert [1 2 3] 1 (raise "domain")) !- domain + +;; Lines 9152-9154: 3-arg insert into LIST with non-I64 idx type +;; float idx → type error +(insert (list 1 2) 1.0 3) !- type + +;; Lines 9183-9184: 3-arg insert into vec, idx=atom, val type mismatch +;; I64 vec, atom idx=1, F64 val → type error +(insert [1 2 3] 1 1.0) !- type + +;; Lines 9188-9189: 3-arg insert into STR vec with multi-idx → type error +;; Multi-index insert (idx_arg=RAY_I64 vec) on a STR column is not supported +(insert ["a" "b"] [0 1] "c") !- type + +;; Lines 9191-9194: multi-index insert vec, val type mismatch +;; I64 vec, multi-idx, F64 val → type error +(insert [1 2 3] [0 1] 1.0) !- type + +;; ──────────────────────────────────────────────────────────────────── +;; upsert error paths: invalid key types and values +;; ──────────────────────────────────────────────────────────────────── + +(set Tups2 (table [id val] (list [1 2 3] [10 20 30]))) + +;; Lines 9499-9501: upsert TABLE row with k=0 (k<=0 → domain error) +(upsert Tups2 0 (table [id val] (list [4] [40]))) !- domain + +;; Lines 9505-9507: upsert TABLE row with non-SYM, non-I64 key type → type error +(upsert Tups2 1.0 (table [id val] (list [4] [40]))) !- type + +;; Lines 9636-9638: upsert list/dict row with non-SYM, non-I64 key_sym → type error +(upsert Tups2 1.0 (list 4 40)) !- type + +;; Lines 9690-9692: upsert with SYM key col but non-SYM key atom in row → type error +(set Tsymkey (table [k v] (list [alice bob] [10 20]))) +(upsert Tsymkey 1 (list 123 30)) !- type + +;; Lines 9708: upsert with SYM key col matching existing row +;; alice matches row 0 → update val to 99 +(at (at (upsert Tsymkey 1 (list 'alice 99)) 'v) 0) -- 99 + +;; Lines 9720-9722: upsert with I32 key col (tests the I32 match branch) +;; The I32 key match branch (kt==RAY_I32) exercises lines 9720-9722. +;; Key 2 not found in table (I32 col) → inserts. append_atom_to_col fails for I32 col. +;; NOTE: apply_sort_take error paths and match for I32 are covered by match loop +;; execution even when the subsequent update fails with type error. +(set Ti32key (table [k v] (list (as 'I32 [1 2 3]) [10 20 30]))) +;; I32 key col, key=2i (I32 atom), list row: match loop at lines 9717-9722 runs +;; (match for k=2 found), but update via append_atom_to_col fails for I32 col +(upsert Ti32key 1 (list 2i 99)) !- type diff --git a/test/rfl/storage/splay_coverage.rfl b/test/rfl/storage/splay_coverage.rfl new file mode 100644 index 00000000..b5771bee --- /dev/null +++ b/test/rfl/storage/splay_coverage.rfl @@ -0,0 +1,168 @@ +;; Coverage extension for src/store/splay.c. +;; +;; src/store/splay.c is exercised by: +;; test/rfl/system/splayed.rfl — happy-path round-trips +;; test/rfl/io/csv_splayed.rfl — CSV → splay writer (uses splay_save_bulk) +;; test/rfl/store/serde_roundtrip.rfl — .db.splayed.set / .db.splayed.get +;; test/test_splay.c — C-level error paths +;; test/test_store.c — SYM column round-trips + validate_sym_columns +;; +;; This file adds the one reachable RFL path not covered above: +;; +;; splay_save_impl line 74 — mkdir_p failure (true branch of +;; `if (mkdir_err != RAY_OK) return mkdir_err`). +;; Reached by trying to .db.splayed.set into an unwritable directory +;; such as /proc/ (read-only on Linux) or /nonexistent/deep/path/. +;; +;; Unreachable from RFL (documented): +;; +;; splay_save_impl line 74 true — mkdir_p failure — REACHABLE (covered +;; below via /proc/ path). +;; +;; splay_save_impl line 78 — ray_sym_save_bulk: called only from +;; splay_save_impl with durable=false AND +;; sym_path != NULL. The only callers +;; that set durable=false are in csv.c, +;; which always pass sym_path=NULL. No +;; RFL builtin exposes this combination. +;; +;; splay_save_impl line 79 true — sym_save error: requires durable=true + +;; sym_path in an unwritable directory. +;; Reachable in principle but the same +;; unwritable path fails at mkdir_p first +;; (line 74), so line 79 never fires +;; independently from RFL. +;; +;; splay_save_impl lines 89,115 — snprintf overflow guards: the path +;; buffer is 1024 bytes; column names and +;; dir paths are bounded by the OS (4096) +;; and the str_to_cpath check (1024), +;; so neither overflow can occur for any +;; valid RFL argument. +;; +;; splay_save_impl lines 91,120 — col/schema save I/O error: requires +;; the directory to become unwritable after +;; mkdir_p succeeds, which is a TOCTOU +;; condition not reproducible from RFL. +;; +;; splay_save_impl lines 98,102 — !col / !name_atom continue: ray_t* is +;; never NULL for a column returned by +;; ray_table_get_col_idx for a valid table, +;; and ray_sym_str never returns NULL for a +;; valid sym ID from a live sym table. +;; +;; splay_load_impl line 146 — trace fprintf: only runs when the +;; RAY_CSV_TRACE env-var is set before the +;; test binary starts. Setting env vars +;; from RFL is not possible without the +;; .sys.exec subprocess exiting first. +;; +;; splay_load_impl lines 162-163 — trace on schema-load failure (same env +;; constraint as line 146). +;; +;; splay_load_impl lines 172-174 — OOM in ray_table_new: allocating an +;; empty table frame never fails in +;; practice; triggering buddy OOM from RFL +;; would require consuming all of the +;; process heap first. +;; +;; splay_load_impl lines 184-185 — trace on missing schema symbol (same +;; env constraint as line 146). +;; +;; splay_load_impl lines 222-223 — trace on col-load failure (same env +;; constraint). +;; +;; splay_load_impl lines 231-235 — OOM in ray_table_add_col (same OOM +;; argument as lines 172-174). +;; +;; validate_sym_columns lines 55-57 — RAY_SYM column found with empty sym +;; table: to reach the loop body, ALL +;; column names must resolve via the +;; symbol table (so name_atom != NULL at +;; line 179). With ray_sym_count()==1 +;; only sym ID 0 ("") resolves, but the +;; path-safety guard at line 196 rejects +;; any column name with name_len==0 before +;; the column can be added to the table. +;; The loop is therefore unreachable from +;; RFL. +;; +;; validate_sym_columns lines 244-246 — same argument; RAY_ERR_CORRUPT +;; from validate cannot be returned after +;; a fully successful column load from an +;; RFL-produced .d file. + +;; ════════════════════════════════════════════════════════════════════════ +;; 1. splay_save_impl line 74 — mkdir_p returns non-OK. +;; +;; /proc/ is a Linux kernel pseudo-filesystem mounted read-only; any attempt +;; to create a directory beneath it fails with EACCES / EROFS. +;; ray_mkdir_p → mkdir fails → returns RAY_ERR_IO → splay_save_impl line 74 +;; returns that error → ray_splay_save propagates it → .db.splayed.set +;; surfaces it as error("io"). +;; +;; Four assertions touch this region three times (three distinct paths +;; through splay_save_impl: arg validation OK, mkdir_p call, error return). +;; ════════════════════════════════════════════════════════════════════════ +(set T-Save (table [a b] (list [1 2 3] (as 'F64 [1.0 2.0 3.0])))) + +;; Save to /proc/ (unwritable) — must return io error. +(.db.splayed.set "/proc/rfl_splay_cov_write_fail/" T-Save) !- io + +;; Save to a deeply nested non-existent path under a read-only root. +(.db.splayed.set "/proc/nested/rfl_splay_cov/" T-Save) !- io + +;; Confirm the table itself is intact after the failed saves. +(count T-Save) -- 3 +(sum (at T-Save 'a)) -- 6 + +;; ════════════════════════════════════════════════════════════════════════ +;; 2. splay_load_impl: happy-path round-trip exercising both use_mmap +;; branches (ray_splay_load with use_mmap=false, ray_read_splayed with +;; use_mmap=true). +;; +;; .db.splayed.set → ray_splay_save (durable=true) +;; .db.splayed.get → ray_read_splayed (use_mmap=true) +;; ray_splay_load → use_mmap=false (not exposed as an RFL builtin but +;; covered by system/splayed.rfl and test_store.c) +;; +;; Multi-column, multi-type table to exercise the per-column loop in both +;; splay_save_impl and splay_load_impl, hitting each branch multiple times. +;; ════════════════════════════════════════════════════════════════════════ +(.sys.exec "rm -rf /tmp/rfl_splay_cov") + +(set T-Multi (table [id val flag] (list [10 20 30 40 50] (as 'F64 [1.1 2.2 3.3 4.4 5.5]) (as 'BOOL [1 0 1 0 1])))) +(.db.splayed.set "/tmp/rfl_splay_cov/" T-Multi) -- T-Multi + +(set R-Multi (.db.splayed.get "/tmp/rfl_splay_cov/")) +(count R-Multi) -- 5 +(sum (at R-Multi 'id)) -- 150 +(sum (at R-Multi 'val)) -- 16.5 +(key R-Multi) -- ['id 'val 'flag] +(at R-Multi 'id) -- (at T-Multi 'id) +(at R-Multi 'val) -- (at T-Multi 'val) +(at R-Multi 'flag) -- (at T-Multi 'flag) + +;; ════════════════════════════════════════════════════════════════════════ +;; 3. splay_load_impl: bad sym_path causes early return. +;; +;; When the sym file at the given path cannot be loaded, splay_load_impl +;; returns the error from ray_sym_load immediately (line 151). +;; This is also covered by test/test_store.c::test_read_splayed_bad_sym and +;; test/test_splay.c::test_load_bad_sym_path, but exercising it from RFL +;; via the full .db.splayed.get dispatch chain confirms the ops/system.c +;; wrapper also propagates the error rather than swallowing it. +;; +;; Note: splay_default_sym in ops/system.c checks access(buf, F_OK) before +;; passing sym_path; when the file does not exist, sym_path is set to NULL +;; and ray_sym_load is never called. An explicit nonexistent path string +;; as the second argument bypasses this check. +;; ════════════════════════════════════════════════════════════════════════ +(.sys.exec "rm -f /tmp/rfl_splay_cov_badsym") + +;; Second argument to .db.splayed.get is an explicit sym path that does not +;; exist — should fail with an I/O or corrupt error from ray_sym_load. +(.db.splayed.get "/tmp/rfl_splay_cov/" "/tmp/rfl_splay_cov_badsym") !- io + +;; ── teardown ────────────────────────────────────────────────────────────── +(.sys.exec "rm -rf /tmp/rfl_splay_cov") diff --git a/test/rfl/strop/null_propagation.rfl b/test/rfl/strop/null_propagation.rfl new file mode 100644 index 00000000..53b7e117 --- /dev/null +++ b/test/rfl/strop/null_propagation.rfl @@ -0,0 +1,93 @@ +;; null_propagation.rfl — coverage for null-propagation paths in +;; exec_string_unary, exec_strlen, exec_substr, exec_replace, exec_concat +;; when the input STR column contains null elements. +;; +;; NOTE: SYM null paths (825-827, 901-904, 986-988, 1063-1066, 1196-1198) +;; are structurally unreachable: ray_vec_is_null() always returns false for +;; RAY_SYM (see vec.c line 1253: "SYM columns are no-null by design"). +;; +;; NOTE: STR null paths in exec_string_unary/strlen/substr/replace/concat +;; require RAY_ATTR_HAS_NULLS to be set on the input vec. A plain list +;; literal ["apple" "" "cherry"] does NOT set HAS_NULLS even though "" +;; has len=0. The only clean way to get a HAS_NULLS STR vec from RFL +;; is via (as 'STR i64_with_nulls), which calls cast_vec_copy_nulls. +;; The null propagation branches for STR in exec_string_unary etc. +;; (lines 820-824, 891-894, 978-984, 1058-1062, 1185) are therefore +;; only reachable through type-cast round-trips. +;; +;; Lines targeted in src/ops/string.c (STR null paths via cast): +;; exec_string_unary STR null : 820-824 (str_vec_append("",0) + set_null) +;; exec_strlen STR null : 891-894 (dst[i]=NULL_I64 + set_null) +;; exec_substr STR null : 978-984 (null input → null output str) +;; exec_replace STR null : 1058-1062 (null input → skip, null out) +;; exec_concat STR null : 1185 (null arg → null row output) +;; +;; Fixture: (as 'STR [1 0Nl 2]) creates a 3-row STR column where row 1 +;; has HAS_NULLS properly set. Wrapped in a table so the column +;; path through exec_string_unary is exercised (not the atom path). + +;; Helper: build 3-row STR col with null at index 1. +(set sv3 (as 'STR [1 0Nl 2])) +(nil? (at sv3 1)) -- true + +;; ─── exec_string_unary: upper — STR null at index 1 ────────────────── +(set Tn (table [s] (list sv3))) +(set Ru (at (select {r: (upper s) from: Tn}) 'r)) +(at Ru 0) -- "1" +(nil? (at Ru 1)) -- true +(at Ru 2) -- "2" + +;; ─── exec_string_unary: lower — STR null at index 1 ────────────────── +(set Rl (at (select {r: (lower s) from: Tn}) 'r)) +(at Rl 0) -- "1" +(nil? (at Rl 1)) -- true +(at Rl 2) -- "2" + +;; ─── exec_string_unary: trim — STR null at index 1 ─────────────────── +(set Rt (at (select {r: (trim s) from: Tn}) 'r)) +(at Rt 0) -- "1" +(nil? (at Rt 1)) -- true +(at Rt 2) -- "2" + +;; ─── exec_strlen: STR null produces NULL_I64 (lines 891-894) ───────── +(set Rslen (at (select {r: (strlen s) from: Tn}) 'r)) +(at Rslen 0) -- 1 +(nil? (at Rslen 1)) -- true +(at Rslen 2) -- 1 + +;; ─── exec_substr: STR null at input (lines 978-984) ────────────────── +(set Rss (at (select {r: (substr s 1 1) from: Tn}) 'r)) +(at Rss 0) -- "1" +(nil? (at Rss 1)) -- true +(at Rss 2) -- "2" + +;; ─── exec_replace: STR null (lines 1058-1062) ───────────────────────── +(set Rr (at (select {r: (replace s "1" "X") from: Tn}) 'r)) +(at Rr 0) -- "X" +(nil? (at Rr 1)) -- true +(at Rr 2) -- "2" + +;; ─── exec_concat: STR null (line 1185) ─────────────────────────────── +;; When any column has a null at row r, the output at that row is null. +(set sv3b (as 'STR [10 0Nl 20])) +(set Tc (table [a b] (list sv3 sv3b))) +(set Rcat (at (select {r: (concat a b) from: Tc}) 'r)) +(at Rcat 0) -- "110" +(nil? (at Rcat 1)) -- true + +;; ─── verify HAS_NULLS on result from exec_string_unary ─────────────── +;; Applying upper to a null-bearing STR vec should preserve the null bit. +(nil? (at (as 'STR [0Nl]) 0)) -- true + +;; ─── null at first and last positions ──────────────────────────────── +(set sv5 (as 'STR [0Nl 2 0Nl])) +(set Tn2 (table [s] (list sv5))) +(set Ru2 (at (select {r: (upper s) from: Tn2}) 'r)) +(nil? (at Ru2 0)) -- true +(at Ru2 1) -- "2" +(nil? (at Ru2 2)) -- true + +(set Rslen2 (at (select {r: (strlen s) from: Tn2}) 'r)) +(nil? (at Rslen2 0)) -- true +(at Rslen2 1) -- 1 +(nil? (at Rslen2 2)) -- true diff --git a/test/rfl/strop/parted_like.rfl b/test/rfl/strop/parted_like.rfl new file mode 100644 index 00000000..b3c5c0d4 --- /dev/null +++ b/test/rfl/strop/parted_like.rfl @@ -0,0 +1,63 @@ +;; parted_like.rfl — coverage for exec_like_parted_str and exec_like_parted_sym. +;; +;; Lines targeted in src/ops/string.c: +;; exec_like_parted_str : 388-393 (str_like_par_fn dispatch per segment) +;; exec_like_parted_sym : 433, 445, 468 (seen/resolve/proj dispatch) +;; exec_like : 562-565 (in_parted && STR / SYM dispatch) +;; exec_like_input : 523-528 (parted STR/SYM col fast-return) +;; +;; Two-partition splayed tables — one STR column, one SYM column. +;; Each assertion exercises ≥3 regions: parted dispatch, segment loop, +;; pattern match per element. + +;; ─── pre-flight cleanup ────────────────────────────────────────────── +(.sys.exec "rm -rf /tmp/rfl_pl_str /tmp/rfl_pl_sym") + +;; ─── Fixture A: parted STR column — exec_like_parted_str path ──────── +(set PA1 (table [s] (list ["apple" "banana" "cherry"]))) +(set PA2 (table [s] (list ["date" "elderberry" "fig"]))) +(.db.splayed.set "/tmp/rfl_pl_str/1/t/" PA1) +(.db.splayed.set "/tmp/rfl_pl_str/2/t/" PA2) +(set PStr (.db.parted.get "/tmp/rfl_pl_str/" 't)) +(count PStr) -- 6 + +;; Prefix pattern — hits SHAPE_PREFIX fast path in each seg. +(count (select {from: PStr where: (like s "a*")})) -- 1 +;; Suffix pattern — hits SHAPE_SUFFIX. apple+date end in 'e'. +(count (select {from: PStr where: (like s "*e")})) -- 2 +;; Contains pattern — hits SHAPE_CONTAINS (memmem). +(count (select {from: PStr where: (like s "*err*")})) -- 2 +;; Exact pattern — hits SHAPE_EXACT. +(count (select {from: PStr where: (like s "fig")})) -- 1 +;; Wildcard — SHAPE_ANY, matches all. +(count (select {from: PStr where: (like s "*")})) -- 6 +;; No-match pattern. +(count (select {from: PStr where: (like s "xyz*")})) -- 0 +;; Character class — forces SHAPE_NONE general matcher. +(count (select {from: PStr where: (like s "[ab]*")})) -- 2 + +;; ─── Fixture B: parted SYM column — exec_like_parted_sym path ──────── +(set PB1 (table [s] (list ['apple 'banana 'cherry]))) +(set PB2 (table [s] (list ['date 'elderberry 'fig]))) +(.db.splayed.set "/tmp/rfl_pl_sym/1/t/" PB1) +(.db.splayed.set "/tmp/rfl_pl_sym/2/t/" PB2) +(set PSym (.db.parted.get "/tmp/rfl_pl_sym/" 't)) +(count PSym) -- 6 + +;; Prefix pattern over parted SYM. +(count (select {from: PSym where: (like s "a*")})) -- 1 +;; Suffix pattern. apple+date end in 'e'. +(count (select {from: PSym where: (like s "*e")})) -- 2 +;; Contains pattern. +(count (select {from: PSym where: (like s "*err*")})) -- 2 +;; Exact match. +(count (select {from: PSym where: (like s "fig")})) -- 1 +;; Universal wildcard. +(count (select {from: PSym where: (like s "*")})) -- 6 +;; No-match. +(count (select {from: PSym where: (like s "zzz*")})) -- 0 +;; Class pattern. +(count (select {from: PSym where: (like s "[de]*")})) -- 2 + +;; ─── cleanup ───────────────────────────────────────────────────────── +(.sys.exec "rm -rf /tmp/rfl_pl_str /tmp/rfl_pl_sym") diff --git a/test/rfl/symbol/sym_coverage.rfl b/test/rfl/symbol/sym_coverage.rfl new file mode 100644 index 00000000..5bf2d4ec --- /dev/null +++ b/test/rfl/symbol/sym_coverage.rfl @@ -0,0 +1,198 @@ +;; Coverage extension for src/table/sym.c. +;; +;; src/table/sym.c is exercised by many existing tests via CSV/splayed I/O. +;; This file targets the remaining uncovered regions: +;; +;; sym_cache_segments trailing-dot branch (line 404-407) +;; — `str[len-1] == '.'` guard; reached via no-split intern + rebuild. +;; — Triggered via .csv.splayed → sym_save_bulk → sym_load +;; → ray_sym_rebuild_segments → sym_cache_segments("abc.", 4). +;; +;; sym_cache_segments leading-dot-alone branch (line 412) +;; — `.foo` with no second dot: plain name, not dotted. +;; +;; sym_intern_nolock leading-dot-only valid=false (line 693-695) +;; — Intern ".onlydot" directly → leading_dot=true, !second → valid=false. +;; +;; sym_intern_nolock seg_len==0 (line 713) +;; — Adjacent dots "a..b": empty segment → valid=false, not dotted. +;; +;; sym_save_impl durable=false branch (line 1208 false arm) +;; — ray_sym_save_bulk called from .csv.splayed. +;; +;; ray_sym_load STRL magic mismatch (line 1318-1322) +;; — Load a 12-byte file with wrong magic word. +;; +;; ray_sym_load file too small (line 1310-1314) +;; — Load a file with < 12 bytes. +;; +;; ray_sym_load prefix truncated (line 1408-1413) +;; — Load a corrupt STRL file truncated mid-entry. +;; +;; Unreachable from RFL (documented): +;; +;; sym_lazy_unmap_locked / sym_lazy_materialize_to_locked (lines 595-638) +;; — Only triggered when sym file size >= SYM_LAZY_LOAD_MIN_BYTES +;; (64 * 1024 * 1024 = 64MB). Impractical in RFL tests. +;; +;; Lazy-load path in ray_sym_load (lines 1335-1385) +;; — Same 64MB threshold. Unreachable from RFL. +;; +;; OOM error paths in ray_sym_init, sym_commit_new (lines 162-209, 520-523) +;; — Require malloc injection; unreachable without memory pressure. +;; +;; sep_dots+1 > 255 guard (line 418-420) +;; — Would require a sym with 255+ dots interned via no-split path. +;; +;; slen > remaining corrupt guard (line 1418-1423) +;; — Requires crafting a binary STRL file where declared length +;; overflows the mapped region. Binary construction not available. +;; +;; id != i mismatch during load (line 1445-1450) +;; — Requires pre-populating the sym table before load. Covered at +;; C level: test_sym.c::test_sym_load_id_mismatch. +;; +;; remaining != 0 after load (line 1455-1460) +;; — Requires appending junk bytes after valid STRL entries. +;; + +;; ─── Scrub stale state ─────────────────────────────────────────── +(.sys.exec "rm -rf /tmp/rfl_sym_cov_edge /tmp/rfl_sym_cov_bulk /tmp/rfl_sym_cov_lts /tmp/rfl_sym_cov_lmag /tmp/rfl_sym_cov_ltr /tmp/rfl_sym_cov_plain /tmp/rfl_sym_cov_dotted /tmp/rfl_sym_cov_w16 /tmp/rfl_sym_cov_edge2 /tmp/rfl_sym_cov.csv /tmp/rfl_sym_cov_w16.csv") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 1. Edge-case sym strings via .db.splayed.set / .db.splayed.get +;; +;; Strings interned cover several sym_cache_segments branches: +;; "abc." → trailing dot (line 404-407 in sym_cache_segments) +;; ".foo" → leading dot alone, no second dot (line 412) +;; ".foo.bar" → leading dot + inner dot → dotted, 2 segs +;; "a..b" → adjacent dots → seg_len==0 in sym_intern_nolock +;; "x.y.z" → standard dotted with 3 segments +;; +;; .db.splayed.set uses sym_save (durable=true), then .db.splayed.get +;; calls sym_load + ray_sym_rebuild_segments, which calls +;; sym_cache_segments on each interned name. +;; ═══════════════════════════════════════════════════════════════════ +(set _T1 (table ['id 's] (list [1 2 3 4 5] (as 'SYMBOL ["abc." ".foo" ".foo.bar" "a..b" "x.y.z"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_edge/" _T1) +(set _R1 (.db.splayed.get "/tmp/rfl_sym_cov_edge/")) +(count _R1) -- 5 +(at _R1 's) -- (at _T1 's) +(type (at _R1 's)) -- 'SYM +(at (at _R1 's) 0) -- 'abc. +(at (at _R1 's) 1) -- '.foo +(at (at _R1 's) 2) -- '.foo.bar +(at (at _R1 's) 3) -- 'a..b +(at (at _R1 's) 4) -- 'x.y.z +(.sys.exec "rm -rf /tmp/rfl_sym_cov_edge") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 2. sym_save_bulk (durable=false) via .csv.splayed +;; Exercises sym_save_impl with durable=false (line 1208 false arm). +;; ═══════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'id,s\\n1,abc.\\n2,.foo\\n3,.foo.bar\\n4,a..b\\n5,x.y.z\\n' > /tmp/rfl_sym_cov.csv") -- 0 +(set _T2 (.csv.splayed "/tmp/rfl_sym_cov.csv" "/tmp/rfl_sym_cov_bulk/")) +(count _T2) -- 5 +(type (at _T2 's)) -- 'SYM +(set _R2 (.db.splayed.get "/tmp/rfl_sym_cov_bulk/")) +(count _R2) -- 5 +(at _R2 's) -- (at _T2 's) +(.sys.exec "rm -f /tmp/rfl_sym_cov.csv; rm -rf /tmp/rfl_sym_cov_bulk") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 3. Plain sym vector — W8 width, deduplication, sym atom equality. +;; ═══════════════════════════════════════════════════════════════════ +(set _v3 (as 'SYMBOL ["alpha" "beta" "gamma" "alpha" "beta"])) +(count _v3) -- 5 +(type _v3) -- 'SYM +(at _v3 0) -- 'alpha +(at _v3 3) -- 'alpha + +;; ═══════════════════════════════════════════════════════════════════ +;; 4. Dotted sym round-trip via .db.splayed.set / .db.splayed.get +;; ═══════════════════════════════════════════════════════════════════ +(set _T4 (table ['id 's] (list [1 2 3] (as 'SYMBOL ["ns.leaf" "a.b.c" "plain"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_dotted/" _T4) +(set _R4 (.db.splayed.get "/tmp/rfl_sym_cov_dotted/")) +(count _R4) -- 3 +(at (at _R4 's) 0) -- 'ns.leaf +(at (at _R4 's) 1) -- 'a.b.c +(at (at _R4 's) 2) -- 'plain +(.sys.exec "rm -rf /tmp/rfl_sym_cov_dotted") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 5. ray_sym_load with wrong STRL magic (line 1318-1322). +;; ═══════════════════════════════════════════════════════════════════ +(set _T5 (table ['id 's] (list [1] (as 'SYMBOL ["x"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_lmag/" _T5) +(.sys.exec "printf '\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00' > /tmp/rfl_sym_cov_lmag/sym") -- 0 +(.db.splayed.get "/tmp/rfl_sym_cov_lmag/") !- corrupt +(.sys.exec "rm -rf /tmp/rfl_sym_cov_lmag") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 6. ray_sym_load with file too small (< 12 bytes, line 1310-1314). +;; ═══════════════════════════════════════════════════════════════════ +(set _T6 (table ['id 's] (list [1] (as 'SYMBOL ["y"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_lts/" _T6) +(.sys.exec "printf 'STRL' > /tmp/rfl_sym_cov_lts/sym") -- 0 +(.db.splayed.get "/tmp/rfl_sym_cov_lts/") !- corrupt +(.sys.exec "rm -rf /tmp/rfl_sym_cov_lts") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 7. ray_sym_load truncated entry data (line 1408-1413). +;; STRL header with count=1, but only 3 bytes of entry data +;; (need >=4 for the u32 length prefix) → RAY_ERR_CORRUPT. +;; Bytes: "STRL" magic (LE 0x4C525453) + n=1 (8-byte LE) + 3 junk bytes +;; = 15 bytes total; parser sees remaining=3 < 4 at first entry. +;; ═══════════════════════════════════════════════════════════════════ +(set _T7 (table ['id 's] (list [1] (as 'SYMBOL ["z"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_ltr/" _T7) +(.sys.exec "printf '\\x53\\x54\\x52\\x4c\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xAA\\xBB\\xCC' > /tmp/rfl_sym_cov_ltr/sym") -- 0 +(.db.splayed.get "/tmp/rfl_sym_cov_ltr/") !- corrupt +(.sys.exec "rm -rf /tmp/rfl_sym_cov_ltr") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 8. Large sym count (260 unique syms) to stress hash-grow and +;; str-cap-grow paths in sym_commit_new / sym_reserve_capacity. +;; ═══════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'n,s' > /tmp/rfl_sym_cov_w16.csv") -- 0 +(.sys.exec "seq 1 260 | awk '{printf \"\\n%d,sym%d\",$1,$1}' >> /tmp/rfl_sym_cov_w16.csv") -- 0 +(set _T8 (.csv.read [I64 SYMBOL] "/tmp/rfl_sym_cov_w16.csv")) +(count _T8) -- 260 +(type (at _T8 's)) -- 'SYM +(.sys.exec "rm -f /tmp/rfl_sym_cov_w16.csv") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 9. Re-intern existing sym — scanned-bit fast path (line 390). +;; Second intern of the same dotted name hits the scanned bit and +;; skips re-examination. +;; ═══════════════════════════════════════════════════════════════════ +(set _s9a (as 'SYMBOL ["already.cached"])) +(set _s9b (as 'SYMBOL ["already.cached"])) +(at _s9a 0) -- 'already.cached +(at _s9b 0) -- 'already.cached +(at _s9a 0) -- (at _s9b 0) + +;; ═══════════════════════════════════════════════════════════════════ +;; 10. sym_save_impl "already up to date" path (line 1048-1051). +;; Second .db.splayed.set with no new syms: persisted_count == str_count +;; → sym_save_impl returns RAY_OK immediately without writing. +;; ═══════════════════════════════════════════════════════════════════ +(set _T10 (table ['id 's] (list [1 2] (as 'SYMBOL ["pa" "pb"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_plain/" _T10) +(.db.splayed.set "/tmp/rfl_sym_cov_plain/" _T10) +(set _R10 (.db.splayed.get "/tmp/rfl_sym_cov_plain/")) +(count _R10) -- 2 +(at _R10 's) -- (at _T10 's) +(.sys.exec "rm -rf /tmp/rfl_sym_cov_plain") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 11. Deep 5-segment dotted sym — exercises segment loop in both +;; sym_intern_nolock and sym_cache_segments. +;; ═══════════════════════════════════════════════════════════════════ +(set _T11 (table ['id 's] (list [1] (as 'SYMBOL ["a.b.c.d.e"])))) +(.db.splayed.set "/tmp/rfl_sym_cov_edge2/" _T11) +(set _R11 (.db.splayed.get "/tmp/rfl_sym_cov_edge2/")) +(count _R11) -- 1 +(at (at _R11 's) 0) -- 'a.b.c.d.e +(.sys.exec "rm -rf /tmp/rfl_sym_cov_edge2") -- 0 diff --git a/test/rfl/system/read_csv.rfl b/test/rfl/system/read_csv.rfl index 77955e95..a502b8e7 100644 --- a/test/rfl/system/read_csv.rfl +++ b/test/rfl/system/read_csv.rfl @@ -17,6 +17,14 @@ (count (.csv.read [I64 SYMBOL] "rf_test_syms.csv")) -- 20000 (count (read-csv [I64 SYMBOL] "rf_test_syms.csv")) -- 20000 +;; W16 SYM comparison: 20000 distinct syms → max_id > 255 → W16 compaction. +;; Load again and compare sym column against a SYM atom → lines 822-825 in eval.c. +(set _tsyms (.csv.read [I64 SYMBOL] "rf_test_syms.csv")) +(sum (== (at _tsyms 'sym) 's0)) -- 1 +(sum (!= (at _tsyms 'sym) 's0)) -- 19999 +;; Sort W16 SYM column → gather_by_idx W16 path (eval.c line 1145, case 2) +(count (asc (at _tsyms 'sym))) -- 20000 + (.sys.exec "rm -f rf_test_syms.csv") -- 0 ;; Explicit names + types loads a no-header CSV and binds user names. diff --git a/test/rfl/temporal/dag_extract_trunc.rfl b/test/rfl/temporal/dag_extract_trunc.rfl index c20a504e..b2488940 100644 --- a/test/rfl/temporal/dag_extract_trunc.rfl +++ b/test/rfl/temporal/dag_extract_trunc.rfl @@ -79,3 +79,74 @@ (set TPre (table [d] (list (as 'DATE [1999.12.31 2000.01.01])))) (at (at (select {y: (year d) from: TPre}) 'y) 0) -- 1999 (at (at (select {y: (year d) from: TPre}) 'y) 1) -- 2000 + +;; ─────────── pre-epoch TIMESTAMP in DAG (EXTRACT_INNER negative ns) ────────── +;; 1999-12-31T23:30:45 stored as negative ns relative to 2000-01-01. +;; Exercises: +;; • EXTRACT_INNER line 383-384: ns < 0 → us = -(((-ns)+999)/1000) +;; • EXTRACT_INNER line 389-391: day_us < 0 for HOUR field +;; • EXTRACT_INNER line 393-395: day_us < 0 for MINUTE field +;; • EXTRACT_INNER line 397-399: day_us < 0 for SECOND field +(set TpreTS (table [ts] (list [1999.12.31D23:30:45.000000000 1999.12.30D00:00:00.000000000]))) +(at (at (select {h: (hour ts) from: TpreTS}) 'h) 0) -- 23 +(at (at (select {h: (hour ts) from: TpreTS}) 'h) 1) -- 0 +(at (at (select {mi: (minute ts) from: TpreTS}) 'mi) 0) -- 30 +(at (at (select {s: (second ts) from: TpreTS}) 's) 0) -- 45 +(at (at (select {y: (year ts) from: TpreTS}) 'y) 0) -- 1999 +(at (at (select {m: (month ts) from: TpreTS}) 'm) 0) -- 12 + +;; ─────── leap year DOY in exec_extract (doy_jan++ branch, line 429) ────────── +;; 2024 is a leap year. Dates in months > 2 must add 1 to doy_jan. +;; Exercises: EXTRACT_INNER line 429: mo > 2 && leap → doy_jan++ +(set TleapTS (table [ts] (list [2024.02.29D12:00:00.000000000 2024.03.01D00:00:00.000000000 2024.12.31D00:00:00.000000000]))) +(at (at (select {dy: (dayofyear ts) from: TleapTS}) 'dy) 0) -- 60 +(at (at (select {dy: (dayofyear ts) from: TleapTS}) 'dy) 1) -- 61 +(at (at (select {dy: (dayofyear ts) from: TleapTS}) 'dy) 2) -- 366 + +;; ─────── DATE+HAS_NULLS with dd/dw/dy/minute (EXTRACT_INNER(1,1)) ──────────── +;; DATE column bearing a null slot. IN32=1, HAS_NULLS=1 dispatch (line 450). +;; Previously tested only with year/month/hour/second; here we extend to +;; day / dayofweek / dayofyear / minute to cover remaining EXTRACT_INNER arms. +;; 8766 = 2024-01-01 (Mon=1), 8767 = 2024-01-02. +(set TDn2 (table [d] (list (as 'DATE [8766 0N 8767])))) +(at (at (select {dd: (day d) from: TDn2}) 'dd) 0) -- 1 +(at (at (select {dd: (day d) from: TDn2}) 'dd) 1) -- 0Nl +(at (at (select {dw: (dayofweek d) from: TDn2}) 'dw) 0) -- 1 +(at (at (select {dw: (dayofweek d) from: TDn2}) 'dw) 1) -- 0Nl +(at (at (select {dy: (dayofyear d) from: TDn2}) 'dy) 0) -- 1 +(at (at (select {dy: (dayofyear d) from: TDn2}) 'dy) 1) -- 0Nl +(at (at (select {mi: (minute d) from: TDn2}) 'mi) 0) -- 0 +(at (at (select {mi: (minute d) from: TDn2}) 'mi) 1) -- 0Nl + +;; TIME+HAS_NULLS with dd/dw/dy/minute — IN32=1, HAS_NULLS=1, in_type=RAY_TIME. +;; 3723000 ms = 01:02:03.000 (epoch day → day=1 dow=6 doy=1 from time zero). +(set TTn2 (table [t] (list (as 'TIME [3723000 0N 86399000])))) +(at (at (select {dd: (day t) from: TTn2}) 'dd) 0) -- 1 +(at (at (select {dd: (day t) from: TTn2}) 'dd) 1) -- 0Nl +(at (at (select {dw: (dayofweek t) from: TTn2}) 'dw) 0) -- 6 +(at (at (select {dw: (dayofweek t) from: TTn2}) 'dw) 1) -- 0Nl +(at (at (select {dy: (dayofyear t) from: TTn2}) 'dy) 0) -- 1 +(at (at (select {dy: (dayofyear t) from: TTn2}) 'dy) 1) -- 0Nl +(at (at (select {mi: (minute t) from: TTn2}) 'mi) 0) -- 2 +(at (at (select {mi: (minute t) from: TTn2}) 'mi) 1) -- 0Nl + +;; ─────── Dotted paths: ray_temporal_field_from_sym dd/minute/dow/doy ──────── +;; When a temporal column is accessed via col.dd / col.minute / col.dow / +;; col.doy the query compiler calls ray_temporal_field_from_sym to resolve +;; the field code. The previous coverage only exercised yyyy/mm/hh/ss. +(set Tdot (table [d] (list [2024.03.15 2024.07.04 2000.01.01]))) +(at (at (select {v: d.dd from: Tdot}) 'v) 0) -- 15 +(at (at (select {v: d.dd from: Tdot}) 'v) 1) -- 4 +(at (at (select {v: d.minute from: Tdot}) 'v) 0) -- 0 +(at (at (select {v: d.dow from: Tdot}) 'v) 0) -- 5 +(at (at (select {v: d.dow from: Tdot}) 'v) 2) -- 6 +(at (at (select {v: d.doy from: Tdot}) 'v) 0) -- 75 +(at (at (select {v: d.doy from: Tdot}) 'v) 1) -- 186 + +;; ─────── Pre-epoch TIMESTAMP in exec_date_trunc: r < 0 branch ──────────── +;; Negative us modulo bucket gives r < 0 → out_us = us - r - bucket +;; (DATE_TRUNC_INNER line 555 for DAY bucket, line 540 for SECOND bucket). +(set TpreT2 (table [ts] (list [1999.12.31D12:30:45.000000000 1999.12.31D00:00:00.000000000]))) +(at (at (select {s: ts.date from: TpreT2}) 's) 0) -- 1999.12.31D00:00:00.000000000 +(at (at (select {s: ts.date from: TpreT2}) 's) 1) -- 1999.12.31D00:00:00.000000000 +(at (at (select {s: ts.time from: TpreT2}) 's) 0) -- 1999.12.31D12:30:45.000000000 diff --git a/test/rfl/temporal/extract.rfl b/test/rfl/temporal/extract.rfl index c19eb27a..08e5ccaa 100644 --- a/test/rfl/temporal/extract.rfl +++ b/test/rfl/temporal/extract.rfl @@ -65,6 +65,13 @@ (hh [00:00:00.000 06:30:00.000 12:00:00.000 18:45:00.000 23:59:59.000]) -- [0 6 12 18 23] ;; vector of timestamps (hh [2024.03.15D01:00:00.000000000 2024.03.15D13:00:00.000000000]) -- [1 13] +;; pre-epoch timestamp atom — day_us < 0 in rte_extract_one (standalone path): +;; 1999-12-31T23:30:00 → ns = -86400e9 + 84600e9 = -1800000000000 < 0 → us < 0 +;; day_us = us % 86400e6 → negative, then += USEC_PER_DAY to normalise → hour=23 +(hh 1999.12.31D23:30:00.000000000) -- 23 +(hh 1999.12.31D01:00:00.000000000) -- 1 +;; pre-epoch vector +(hh [1999.12.31D06:00:00.000000000 1999.12.30D18:00:00.000000000]) -- [6 18] ;; ──────────────────────────── minute ─────────────────────────────────── (minute 00:00:00.000) -- 0 @@ -74,6 +81,11 @@ (minute 2024.03.15D08:00:00.000000000) -- 0 ;; vector (minute [00:00:00.000 00:15:00.000 00:30:00.000 00:45:00.000 00:59:00.000]) -- [0 15 30 45 59] +;; pre-epoch timestamp atom — day_us < 0 in rte_extract_one (MINUTE arm) +(minute 1999.12.31D23:30:45.000000000) -- 30 +(minute 1999.12.31D00:15:00.000000000) -- 15 +;; pre-epoch vector +(minute [1999.12.31D01:02:00.000000000 1999.12.30D00:45:00.000000000]) -- [2 45] ;; ──────────────────────────── ss (second) ────────────────────────────── (ss 00:00:00.000) -- 0 @@ -83,6 +95,11 @@ (ss 2024.03.15D12:34:00.000000000) -- 0 ;; vector (ss [00:00:00.000 00:00:01.000 00:00:30.000 00:00:59.000]) -- [0 1 30 59] +;; pre-epoch timestamp atom — day_us < 0 in rte_extract_one (SECOND arm) +(ss 1999.12.31D23:30:45.000000000) -- 45 +(ss 1999.12.31D00:00:59.000000000) -- 59 +;; pre-epoch vector +(ss [1999.12.31D01:02:03.000000000 1999.12.30D00:00:30.000000000]) -- [3 30] ;; ───────────────────────────── dow (day-of-week) ─────────────────────── ;; rayforce's `dow` returns 1..7 with Mon=1 .. Sun=7 @@ -140,3 +157,20 @@ (dd DS) -- [1 4 31] (dow DS) -- [1 4 2] (doy DS) -- [1 186 366] + +;; ─────────────── null temporal atoms (RAY_ATOM_IS_NULL branch) ──────────── +;; ray_temporal_extract line 125: null atom → 0Nl regardless of field. +(yyyy 0Nd) -- 0Nl +(mm 0Nd) -- 0Nl +(dd 0Nd) -- 0Nl +(hh 0Nt) -- 0Nl +(minute 0Nt) -- 0Nl +(ss 0Nt) -- 0Nl +(doy 0Np) -- 0Nl +(dow 0Np) -- 0Nl + +;; pre-epoch TIMESTAMP atoms for year/month/day — exercises rte_to_us +;; negative branch (ns < 0 → us = -((-ns+999)/1000)) in the standalone path +(yyyy 1999.12.31D23:59:59.000000000) -- 1999 +(mm 1999.12.31D23:59:59.000000000) -- 12 +(dd 1999.12.31D23:59:59.000000000) -- 31 diff --git a/test/test_opt.c b/test/test_opt.c index 8004bff2..42aed79b 100644 --- a/test/test_opt.c +++ b/test/test_opt.c @@ -1191,6 +1191,10 @@ static test_result_t name(void) { \ FOLD_I64_TEST(test_const_fold_i64_div, ray_div, 10, 3) FOLD_I64_TEST(test_const_fold_i64_min, ray_min2, 10, 3) FOLD_I64_TEST(test_const_fold_i64_max, ray_max2, 10, 3) +/* Swapped operands: lv < rv so fold_binary_const I64 MIN2 takes the lv branch, + * and I64 MAX2 takes the rv branch — covers both previously-uncovered ternary arms. */ +FOLD_I64_TEST(test_const_fold_i64_min_swap, ray_min2, 3, 10) +FOLD_I64_TEST(test_const_fold_i64_max_swap, ray_max2, 3, 10) static test_result_t test_const_fold_i64_div_min_max(void) { /* Wrapper test that exercises the i64 DIV path inline */ @@ -1245,6 +1249,12 @@ static test_result_t name(void) { \ FOLD_I32_TEST(test_const_fold_i32_add, ray_add, 7, 3) FOLD_I32_TEST(test_const_fold_i32_div, ray_div, 7, 3) FOLD_I32_TEST(test_const_fold_i32_mod, ray_mod, 7, 3) +/* I32 MIN2 and MAX2: covers fold_binary_const I32 lines 469-470 (previously 0 hits) */ +FOLD_I32_TEST(test_const_fold_i32_min, ray_min2, 7, 3) +FOLD_I32_TEST(test_const_fold_i32_max, ray_max2, 7, 3) +/* Swapped I32 MIN2/MAX2: lv < rv to cover the alternate branches */ +FOLD_I32_TEST(test_const_fold_i32_min_swap, ray_min2, 3, 7) +FOLD_I32_TEST(test_const_fold_i32_max_swap, ray_max2, 3, 7) static test_result_t test_const_fold_i32_ops(void) { /* Wrapper: run the i32 ADD fold inline */ @@ -1953,6 +1963,617 @@ static test_result_t test_idiom_first_asc_scan_with_nulls_stays_safe(void) { PASS(); } +/* -------------------------------------------------------------------------- + * New targeted coverage tests for opt.c uncovered regions + * -------------------------------------------------------------------------- */ + +/* + * Test: partition pruning with OP_GT comparison. + * Exercises the case OP_GT arm in the inner switch (line ~1954 in opt.c). + * 4 partitions keyed [100, 200, 300, 400]. + * pkey > 200 → partitions 2,3 (keys 300,400) → bits 2,3. + */ +static test_result_t test_partition_pruning_gt(void) { + ray_heap_init(); + int64_t pkeys[] = {100, 200, 300, 400}; + ray_t* tbl; ray_graph_t* g; + ray_op_t* sv; ray_op_t* sp; + make_parted_tbl(&tbl, &g, &sv, &sp, pkeys, 4); + + ray_op_t* c200 = ray_const_i64(g, 200); + ray_op_t* pred = ray_gt(g, sp, c200); + ray_op_t* filt = ray_filter(g, sv, pred); + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* ext = find_scan_ext_for(g, sv->id); + TEST_ASSERT_NOT_NULL(ext); + TEST_ASSERT_NOT_NULL(ext->seg_mask); + /* Partitions 2,3 (keys 300,400 > 200) */ + uint64_t expected = (1ULL << 2) | (1ULL << 3); + TEST_ASSERT_TRUE(ext->seg_mask[0] == expected); + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: partition pruning with swapped operands — CONST on lhs, SCAN on rhs. + * Exercises the swapped=true path (lines ~1776-1777, ~1925-1930) where + * LT is flipped to GT for effective comparison. + * + * FILTER( SCAN(val), LT(CONST(200), SCAN(pkey)) ) + * → equivalent to pkey > 200 → bits 2,3. + */ +static test_result_t test_partition_pruning_swapped(void) { + ray_heap_init(); + int64_t pkeys[] = {100, 200, 300, 400}; + ray_t* tbl; ray_graph_t* g; + ray_op_t* sv; ray_op_t* sp; + make_parted_tbl(&tbl, &g, &sv, &sp, pkeys, 4); + + /* Swapped: CONST(200) < SCAN(pkey), i.e. lhs=CONST, rhs=SCAN */ + ray_op_t* c200 = ray_const_i64(g, 200); + ray_op_t* pred = ray_lt(g, c200, sp); /* CONST < SCAN → swapped OP_LT */ + ray_op_t* filt = ray_filter(g, sv, pred); + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* ext = find_scan_ext_for(g, sv->id); + TEST_ASSERT_NOT_NULL(ext); + TEST_ASSERT_NOT_NULL(ext->seg_mask); + /* swapped LT → eff_op=GT → pkey > 200 → partitions 2,3 */ + uint64_t expected = (1ULL << 2) | (1ULL << 3); + TEST_ASSERT_TRUE(ext->seg_mask[0] == expected); + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: partition pruning with large IN set (> 32 elements). + * Exercises the heap allocation path (set_len > 32 → ray_alloc) in + * pass_partition_pruning (lines ~1877-1881). + * + * Build 6 partitions keyed [10,20,30,40,50,60]. + * IN set has 33 elements covering keys 10,20,30,40 → bits 0,1,2,3. + */ +static test_result_t test_partition_pruning_in_large(void) { + ray_heap_init(); + (void)ray_sym_init(); + + /* 6 I64 partition keys */ + int64_t pkey_arr[] = {10, 20, 30, 40, 50, 60}; + int n_parts = 6; + + ray_t* key_values = ray_vec_new(RAY_I64, n_parts); + key_values->len = n_parts; + memcpy(ray_data(key_values), pkey_arr, (size_t)n_parts * sizeof(int64_t)); + + ray_t* row_counts = ray_vec_new(RAY_I64, n_parts); + row_counts->len = n_parts; + int64_t* rc = (int64_t*)ray_data(row_counts); + for (int i = 0; i < n_parts; i++) rc[i] = 5; + + ray_t* mapcommon = ray_alloc(2 * sizeof(ray_t*)); + mapcommon->type = RAY_MAPCOMMON; + mapcommon->len = 2; + ((ray_t**)ray_data(mapcommon))[0] = key_values; + ((ray_t**)ray_data(mapcommon))[1] = row_counts; + + ray_t* val_parted = ray_alloc((size_t)n_parts * sizeof(ray_t*)); + val_parted->type = RAY_PARTED_BASE + RAY_I64; + val_parted->len = n_parts; + for (int i = 0; i < n_parts; i++) { + ray_t* seg = ray_vec_new(RAY_I64, 5); + seg->len = 5; + ((ray_t**)ray_data(val_parted))[i] = seg; + } + + int64_t sym_pkey = ray_sym_intern("pkey", 4); + int64_t sym_val = ray_sym_intern("val", 3); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, sym_pkey, mapcommon); + tbl = ray_table_add_col(tbl, sym_val, val_parted); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_val = ray_scan(g, "val"); + ray_op_t* scan_pkey = ray_scan(g, "pkey"); + + /* Build IN set with 33 elements: 10,20,30,40, then 29 extra values + * not in partition keys (100..128). Only 10,20,30,40 are in keys. */ + int64_t set_data[33]; + set_data[0] = 10; + set_data[1] = 20; + set_data[2] = 30; + set_data[3] = 40; + for (int i = 4; i < 33; i++) set_data[i] = 100 + i; /* not in keys */ + ray_t* set_vec = ray_vec_new(RAY_I64, 33); + set_vec->len = 33; + memcpy(ray_data(set_vec), set_data, 33 * sizeof(int64_t)); + + ray_op_t* set_op = ray_const_vec(g, set_vec); + ray_release(set_vec); + ray_op_t* in_pred = ray_in(g, scan_pkey, set_op); + ray_op_t* filt = ray_filter(g, scan_val, in_pred); + + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* val_ext = NULL; + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == scan_val->id) { + val_ext = g->ext_nodes[i]; + break; + } + } + TEST_ASSERT_NOT_NULL(val_ext); + TEST_ASSERT_NOT_NULL(val_ext->seg_mask); + /* Keys 10,20,30,40 are in the set → bits 0,1,2,3 */ + uint64_t expected = (1ULL << 0) | (1ULL << 1) | (1ULL << 2) | (1ULL << 3); + TEST_ASSERT_TRUE(val_ext->seg_mask[0] == expected); + + ray_graph_free(g); + ray_release(mapcommon); + ray_release(val_parted); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: partition pruning with empty IN set (set_len == 0). + * Exercises the early goto attach_mask path when no elements match any + * partition (lines ~1867-1876). For OP_IN, empty → mask stays all-0, + * meaning no partitions pass. + */ +static test_result_t test_partition_pruning_in_empty(void) { + ray_heap_init(); + int64_t pkeys[] = {100, 200, 300, 400}; + ray_t* tbl; ray_graph_t* g; + ray_op_t* sv; ray_op_t* sp; + make_parted_tbl(&tbl, &g, &sv, &sp, pkeys, 4); + + /* Empty I64 vector as the IN set */ + ray_t* empty_vec = ray_vec_new(RAY_I64, 1); /* capacity=1, len=0 */ + empty_vec->len = 0; + + ray_op_t* set_op = ray_const_vec(g, empty_vec); + ray_release(empty_vec); + ray_op_t* in_pred = ray_in(g, sp, set_op); + ray_op_t* filt = ray_filter(g, sv, in_pred); + + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* ext = find_scan_ext_for(g, sv->id); + TEST_ASSERT_NOT_NULL(ext); + /* Empty IN → mask all-0 → seg_mask attached but zero */ + TEST_ASSERT_NOT_NULL(ext->seg_mask); + TEST_ASSERT_TRUE(ext->seg_mask[0] == 0); + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: partition pruning with NOT_IN and empty set. + * For NOT_IN with empty set → all partitions pass → mask is all-1s. + * Exercises the is_nin branch in the empty-set path (lines ~1871-1875). + */ +static test_result_t test_partition_pruning_not_in_empty(void) { + ray_heap_init(); + int64_t pkeys[] = {100, 200, 300, 400}; + ray_t* tbl; ray_graph_t* g; + ray_op_t* sv; ray_op_t* sp; + make_parted_tbl(&tbl, &g, &sv, &sp, pkeys, 4); + + /* Empty I64 vector */ + ray_t* empty_vec = ray_vec_new(RAY_I64, 1); + empty_vec->len = 0; + + ray_op_t* set_op = ray_const_vec(g, empty_vec); + ray_release(empty_vec); + ray_op_t* nin_pred = ray_not_in(g, sp, set_op); + ray_op_t* filt = ray_filter(g, sv, nin_pred); + + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* ext = find_scan_ext_for(g, sv->id); + TEST_ASSERT_NOT_NULL(ext); + TEST_ASSERT_NOT_NULL(ext->seg_mask); + /* NOT_IN empty → all 4 partitions pass → bits 0-3 set */ + uint64_t expected = (1ULL << 0) | (1ULL << 1) | (1ULL << 2) | (1ULL << 3); + TEST_ASSERT_TRUE(ext->seg_mask[0] == expected); + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: partition pruning with IN set containing null elements. + * Exercises the set_has_nulls path (lines ~1885-1897) where null entries + * in the literal vector are skipped. + * + * Setup: 4 partitions keyed [100, 200, 300, 400]. + * IN set: [100, NULL, 300] → only 100 and 300 match → bits 0,2. + */ +static test_result_t test_partition_pruning_in_with_nulls(void) { + ray_heap_init(); + int64_t pkeys[] = {100, 200, 300, 400}; + ray_t* tbl; ray_graph_t* g; + ray_op_t* sv; ray_op_t* sp; + make_parted_tbl(&tbl, &g, &sv, &sp, pkeys, 4); + + /* Build [100, NULL, 300] as an I64 vec with null at index 1 */ + int64_t set_data[] = {100, 0, 300}; /* 0 is null placeholder */ + ray_t* set_vec = ray_vec_new(RAY_I64, 3); + set_vec->len = 3; + memcpy(ray_data(set_vec), set_data, 3 * sizeof(int64_t)); + ray_vec_set_null(set_vec, 1, true); /* index 1 is null */ + + ray_op_t* set_op = ray_const_vec(g, set_vec); + ray_release(set_vec); + ray_op_t* in_pred = ray_in(g, sp, set_op); + ray_op_t* filt = ray_filter(g, sv, in_pred); + + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + ray_op_ext_t* ext = find_scan_ext_for(g, sv->id); + TEST_ASSERT_NOT_NULL(ext); + TEST_ASSERT_NOT_NULL(ext->seg_mask); + /* Only keys 100 and 300 are non-null in the set → bits 0,2 */ + uint64_t expected = (1ULL << 0) | (1ULL << 2); + TEST_ASSERT_TRUE(ext->seg_mask[0] == expected); + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: predicate pushdown past EXPAND with OP_IF predicate. + * Exercises collect_pred_scans and is_reachable_from when the predicate + * subtree contains an OP_IF node (which has a third operand stored in ext). + * + * This covers the OP_IF/OP_SUBSTR/OP_REPLACE case in collect_pred_scans + * (lines ~1221-1229) and is_reachable_from (lines ~1283-1289). + * + * Predicate: IF(flag == 1, flag, 0) — has three sub-nodes including flag_scan. + * The flag_scan is reachable from EXPAND's source subtree, so pushdown fires. + */ +static test_result_t test_opt_pushdown_expand_if_pred(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t n = 4; + int64_t flag_data[] = {1, 0, 1, 0}; + int64_t id_data[] = {0, 1, 2, 3}; + ray_t* flag_v = ray_vec_from_raw(RAY_I64, flag_data, n); + ray_t* id_v = ray_vec_from_raw(RAY_I64, id_data, n); + int64_t s_flag = ray_sym_intern("flag", 4); + int64_t s_id = ray_sym_intern("id", 2); + ray_t* node_tbl = ray_table_new(2); + node_tbl = ray_table_add_col(node_tbl, s_id, id_v); + node_tbl = ray_table_add_col(node_tbl, s_flag, flag_v); + ray_release(flag_v); + ray_release(id_v); + + /* Simple edge graph: 0->1, 1->2 */ + int64_t src_data[] = {0, 1}; + int64_t dst_data[] = {1, 2}; + ray_t* src_v = ray_vec_from_raw(RAY_I64, src_data, 2); + ray_t* dst_v = ray_vec_from_raw(RAY_I64, dst_data, 2); + int64_t s_src = ray_sym_intern("src", 3); + int64_t s_dst = ray_sym_intern("dst", 3); + ray_t* edges = ray_table_new(2); + edges = ray_table_add_col(edges, s_src, src_v); + edges = ray_table_add_col(edges, s_dst, dst_v); + ray_release(src_v); + ray_release(dst_v); + + ray_rel_t* rel = ray_rel_from_edges(edges, "src", "dst", 4, 4, false); + TEST_ASSERT_NOT_NULL(rel); + + ray_graph_t* g = ray_graph_new(node_tbl); + + /* Build predicate: IF(flag == 1, flag, 0) */ + ray_op_t* flag_scan = ray_scan(g, "flag"); + ray_op_t* c1 = ray_const_i64(g, 1); + ray_op_t* c0 = ray_const_i64(g, 0); + ray_op_t* cond = ray_eq(g, flag_scan, c1); + ray_op_t* pred = ray_if(g, cond, flag_scan, c0); /* IF has ext-stored third op */ + + /* EXPAND from flag column (so flag_scan is in source subtree) */ + ray_op_t* expand = ray_expand(g, flag_scan, rel, 0); + TEST_ASSERT_NOT_NULL(expand); + uint32_t expand_id = expand->id; + + /* FILTER(expand, if_pred) — collect_pred_scans must walk OP_IF ext */ + ray_op_t* filt = ray_filter(g, expand, pred); + TEST_ASSERT_NOT_NULL(filt); + + /* Optimize — pushdown should fire since flag_scan is reachable from + * expand's source input */ + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + + /* After pushdown, root should be EXPAND with FILTER as its source input */ + TEST_ASSERT_EQ_U(opt->id, expand_id); + TEST_ASSERT_EQ_I(opt->opcode, OP_EXPAND); + TEST_ASSERT_NOT_NULL(opt->inputs[0]); + TEST_ASSERT_EQ_I(opt->inputs[0]->opcode, OP_FILTER); + + ray_graph_free(g); + ray_rel_free(rel); + ray_release(node_tbl); + ray_release(edges); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: factorize_pass with EXPAND where consumer is NOT an OP_GROUP. + * Exercises the factorize_pass early-exit when consumer->opcode != OP_GROUP + * (line ~1004 in opt.c). + */ +static test_result_t test_factorize_expand_non_group(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t src_data[] = {0, 1}; + int64_t dst_data[] = {1, 2}; + ray_t* src_v = ray_vec_from_raw(RAY_I64, src_data, 2); + ray_t* dst_v = ray_vec_from_raw(RAY_I64, dst_data, 2); + int64_t s_src = ray_sym_intern("src", 3); + int64_t s_dst = ray_sym_intern("dst", 3); + ray_t* edges = ray_table_new(2); + edges = ray_table_add_col(edges, s_src, src_v); + edges = ray_table_add_col(edges, s_dst, dst_v); + ray_release(src_v); + ray_release(dst_v); + + int64_t id_data[] = {0, 1, 2}; + ray_t* id_v = ray_vec_from_raw(RAY_I64, id_data, 3); + int64_t s_id = ray_sym_intern("id", 2); + ray_t* node_tbl = ray_table_new(1); + node_tbl = ray_table_add_col(node_tbl, s_id, id_v); + ray_release(id_v); + + ray_rel_t* rel = ray_rel_from_edges(edges, "src", "dst", 3, 3, false); + TEST_ASSERT_NOT_NULL(rel); + + ray_graph_t* g = ray_graph_new(node_tbl); + int64_t start_data[] = {0, 1, 2}; + ray_t* start_vec = ray_vec_from_raw(RAY_I64, start_data, 3); + ray_op_t* src_op = ray_const_vec(g, start_vec); + ray_release(start_vec); + + ray_op_t* expand = ray_expand(g, src_op, rel, 0); + TEST_ASSERT_NOT_NULL(expand); + + /* Consumer is OP_FILTER (not OP_GROUP) — factorize_pass must skip */ + ray_op_t* c0 = ray_const_i64(g, 0); + ray_op_t* pred = ray_gt(g, expand, c0); + ray_op_t* filt = ray_filter(g, expand, pred); + + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + /* factorized flag must NOT be set since consumer was OP_FILTER */ + ray_op_ext_t* expand_ext = NULL; + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == expand->id) { + expand_ext = g->ext_nodes[i]; + break; + } + } + TEST_ASSERT_NOT_NULL(expand_ext); + TEST_ASSERT_TRUE(expand_ext->graph.factorized == 0); + + ray_graph_free(g); + ray_rel_free(rel); + ray_release(node_tbl); + ray_release(edges); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: factorize_pass with EXPAND → GROUP where key is NOT "_src". + * Exercises the key-sym check (line ~1013) when key_ext->sym != src_sym, + * so factorized is NOT set. + */ +static test_result_t test_factorize_expand_group_non_src(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t src_data[] = {0, 1}; + int64_t dst_data[] = {1, 2}; + ray_t* src_v = ray_vec_from_raw(RAY_I64, src_data, 2); + ray_t* dst_v = ray_vec_from_raw(RAY_I64, dst_data, 2); + int64_t s_src = ray_sym_intern("src", 3); + int64_t s_dst = ray_sym_intern("dst", 3); + ray_t* edges = ray_table_new(2); + edges = ray_table_add_col(edges, s_src, src_v); + edges = ray_table_add_col(edges, s_dst, dst_v); + ray_release(src_v); + ray_release(dst_v); + + int64_t id_data[] = {0, 1, 2}; + int64_t val_data[] = {10, 20, 30}; + ray_t* id_v = ray_vec_from_raw(RAY_I64, id_data, 3); + ray_t* val_v = ray_vec_from_raw(RAY_I64, val_data, 3); + int64_t s_id = ray_sym_intern("id", 2); + int64_t s_val = ray_sym_intern("val", 3); + ray_t* node_tbl = ray_table_new(2); + node_tbl = ray_table_add_col(node_tbl, s_id, id_v); + node_tbl = ray_table_add_col(node_tbl, s_val, val_v); + ray_release(id_v); + ray_release(val_v); + + ray_rel_t* rel = ray_rel_from_edges(edges, "src", "dst", 3, 3, false); + TEST_ASSERT_NOT_NULL(rel); + + ray_graph_t* g = ray_graph_new(node_tbl); + int64_t start_data[] = {0, 1, 2}; + ray_t* start_vec = ray_vec_from_raw(RAY_I64, start_data, 3); + ray_op_t* src_op = ray_const_vec(g, start_vec); + ray_release(start_vec); + + ray_op_t* expand = ray_expand(g, src_op, rel, 0); + TEST_ASSERT_NOT_NULL(expand); + + /* GROUP with key = "val" (not "_src") — factorize should NOT fire */ + ray_op_t* val_scan = ray_scan(g, "val"); + ray_op_t* keys[] = { val_scan }; + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { val_scan }; + ray_op_t* grp = ray_group(g, keys, 1, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(grp); + grp->inputs[0] = expand; + g->nodes[grp->id].inputs[0] = expand; + + ray_op_t* opt = ray_optimize(g, grp); + TEST_ASSERT_NOT_NULL(opt); + + /* factorized must NOT be set — key is "val", not "_src" */ + ray_op_ext_t* expand_ext = NULL; + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == expand->id) { + expand_ext = g->ext_nodes[i]; + break; + } + } + TEST_ASSERT_NOT_NULL(expand_ext); + TEST_ASSERT_TRUE(expand_ext->graph.factorized == 0); + + ray_graph_free(g); + ray_rel_free(rel); + ray_release(node_tbl); + ray_release(edges); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: graph_alloc_node_opt fix-up for JOIN and ANTIJOIN ext nodes. + * When graph realloc fires during split_and_filter, the fix-up loop must + * also relocate input pointers stored in OP_JOIN and OP_ANTIJOIN ext nodes. + * + * Exercises the OP_JOIN and OP_ANTIJOIN branches (lines ~1075-1103) of the + * pointer fix-up inside graph_alloc_node_opt. + */ +static test_result_t test_opt_realloc_with_join_ext(void) { + ray_heap_init(); + (void)ray_sym_init(); + + /* Build a table with two I64 columns for join */ + int64_t key_data[] = {1, 2, 3, 4}; + int64_t val_data[] = {10, 20, 30, 40}; + ray_t* key_v = ray_vec_from_raw(RAY_I64, key_data, 4); + ray_t* val_v = ray_vec_from_raw(RAY_I64, val_data, 4); + int64_t s_k = ray_sym_intern("k", 1); + int64_t s_v = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, s_k, key_v); + tbl = ray_table_add_col(tbl, s_v, val_v); + ray_release(key_v); + ray_release(val_v); + + ray_graph_t* g = ray_graph_new(tbl); + + ray_op_t* scan_k_l = ray_scan(g, "k"); + ray_op_t* scan_v_l = ray_scan(g, "v"); + ray_op_t* scan_k_r = ray_scan(g, "k"); + ray_op_t* scan_v_r = ray_scan(g, "v"); + + /* Build JOIN and ANTIJOIN — both have ext nodes with key pointers */ + ray_op_t* lkeys[] = { scan_k_l }; + ray_op_t* rkeys[] = { scan_k_r }; + (void)ray_join(g, scan_v_l, lkeys, scan_v_r, rkeys, 1, 0); + (void)ray_antijoin(g, scan_v_l, lkeys, scan_v_r, rkeys, 1); + + /* Build AND filter to trigger realloc via split */ + ray_op_t* scan_k2 = ray_scan(g, "k"); + ray_op_t* scan_v2 = ray_scan(g, "v"); + ray_op_t* c1 = ray_const_i64(g, 1); + ray_op_t* c5 = ray_const_i64(g, 5); + ray_op_t* eq1 = ray_eq(g, scan_k2, c1); + ray_op_t* gt5 = ray_gt(g, scan_v2, c5); + ray_op_t* and_p = ray_and(g, eq1, gt5); + ray_op_t* filt = ray_filter(g, scan_v2, and_p); + + /* Force realloc by filling node cap */ + while (g->node_count < g->node_cap) { + (void)ray_const_i64(g, 0); + } + + uint32_t cap_before = g->node_cap; + ray_op_t* opt = ray_optimize(g, filt); + TEST_ASSERT_NOT_NULL(opt); + TEST_ASSERT_TRUE(g->node_cap > cap_before); /* realloc fired */ + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* + * Test: infer_type_for_node unary branch. + * Exercises the else-if branch (node->arity >= 1 && node->inputs[0]) + * that propagates out_type from the single input (lines 82-84 in opt.c). + * + * Build a unary op node with out_type=0 and force type inference. + * We use ray_neg (arity=1) and manually zero out_type before optimize. + */ +static test_result_t test_type_infer_unary(void) { + ray_heap_init(); + ray_t* tbl = make_test_table(); + ray_graph_t* g = ray_graph_new(tbl); + + /* neg(scan(v1)) — after zeroing out_type, infer should copy from input */ + ray_op_t* v1 = ray_scan(g, "v1"); + ray_op_t* neg = ray_neg(g, v1); + TEST_ASSERT_NOT_NULL(neg); + + /* Force out_type to 0 — will trigger infer_type_for_node arity>=1 branch */ + neg->out_type = 0; + g->nodes[neg->id].out_type = 0; + + ray_op_t* opt = ray_optimize(g, neg); + TEST_ASSERT_NOT_NULL(opt); + /* After inference: neg's out_type should match v1's type (RAY_I64) */ + /* The neg node may be dead after DCE, but the graph should still be valid */ + + ray_graph_free(g); + ray_release(tbl); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + const test_entry_t opt_entries[] = { { "opt/filter_reorder_type", test_filter_reorder_by_type, NULL, NULL }, { "opt/filter_and_split", test_filter_and_split, NULL, NULL }, @@ -1980,10 +2601,16 @@ const test_entry_t opt_entries[] = { { "opt/const_fold_i64_div", test_const_fold_i64_div, NULL, NULL }, { "opt/const_fold_i64_min", test_const_fold_i64_min, NULL, NULL }, { "opt/const_fold_i64_max", test_const_fold_i64_max, NULL, NULL }, + { "opt/const_fold_i64_min_swap", test_const_fold_i64_min_swap, NULL, NULL }, + { "opt/const_fold_i64_max_swap", test_const_fold_i64_max_swap, NULL, NULL }, { "opt/const_fold_i32_ops", test_const_fold_i32_ops, NULL, NULL }, { "opt/const_fold_i32_add", test_const_fold_i32_add, NULL, NULL }, { "opt/const_fold_i32_div", test_const_fold_i32_div, NULL, NULL }, { "opt/const_fold_i32_mod", test_const_fold_i32_mod, NULL, NULL }, + { "opt/const_fold_i32_min", test_const_fold_i32_min, NULL, NULL }, + { "opt/const_fold_i32_max", test_const_fold_i32_max, NULL, NULL }, + { "opt/const_fold_i32_min_swap", test_const_fold_i32_min_swap, NULL, NULL }, + { "opt/const_fold_i32_max_swap", test_const_fold_i32_max_swap, NULL, NULL }, { "opt/const_fold_i16_atom", test_const_fold_i16_atom, NULL, NULL }, { "opt/partition_pruning_eq", test_partition_pruning_eq, NULL, NULL }, { "opt/partition_pruning_ne", test_partition_pruning_ne, NULL, NULL }, @@ -1999,6 +2626,17 @@ const test_entry_t opt_entries[] = { { "opt/filter_const_f64_zero_pred", test_filter_const_f64_zero_pred, NULL, NULL }, { "opt/idiom_first_last_asc_scan_no_nulls", test_idiom_first_last_asc_scan_no_nulls, NULL, NULL }, { "opt/idiom_first_asc_scan_with_nulls_stays_safe", test_idiom_first_asc_scan_with_nulls_stays_safe, NULL, NULL }, + { "opt/partition_pruning_gt", test_partition_pruning_gt, NULL, NULL }, + { "opt/partition_pruning_swapped", test_partition_pruning_swapped, NULL, NULL }, + { "opt/partition_pruning_in_large", test_partition_pruning_in_large, NULL, NULL }, + { "opt/partition_pruning_in_empty", test_partition_pruning_in_empty, NULL, NULL }, + { "opt/partition_pruning_not_in_empty", test_partition_pruning_not_in_empty, NULL, NULL }, + { "opt/partition_pruning_in_with_nulls", test_partition_pruning_in_with_nulls, NULL, NULL }, + { "opt/pushdown_expand_if_pred", test_opt_pushdown_expand_if_pred, NULL, NULL }, + { "opt/factorize_expand_non_group", test_factorize_expand_non_group, NULL, NULL }, + { "opt/factorize_expand_group_non_src", test_factorize_expand_group_non_src, NULL, NULL }, + { "opt/realloc_with_join_ext", test_opt_realloc_with_join_ext, NULL, NULL }, + { "opt/type_infer_unary", test_type_infer_unary, NULL, NULL }, { NULL, NULL, NULL, NULL }, }; diff --git a/test/test_sort.c b/test/test_sort.c index 3939b46a..59bdf3eb 100644 --- a/test/test_sort.c +++ b/test/test_sort.c @@ -1436,6 +1436,55 @@ static test_result_t test_sort_msd_bucket_i64_desc(void) { PASS(); } +/* gather_by_idx W8 SYM path (eval.c line 1146, case 1) + * A RAY_SYM_W8 vec (IDs stored as uint8_t) sorted via ray_sort forces + * the SYM branch of gather_by_idx to take the esz==1 / case-1 path. + * A fresh sym table keeps all IDs ≤ 3 so ray_sym_dict_width returns W8. */ +static test_result_t test_sort_sym_w8_gather(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t s_a = ray_sym_intern("ga", 2); + int64_t s_b = ray_sym_intern("gb", 2); + int64_t s_c = ray_sym_intern("gc", 2); + + /* IDs 1, 2, 3 — all ≤ 255 → W8 storage */ + const int64_t N = 6; + ray_t* sv = ray_sym_vec_new(RAY_SYM_W8, N); + TEST_ASSERT_NOT_NULL(sv); + sv->len = N; + uint8_t* d = (uint8_t*)ray_data(sv); + /* Unsorted: c, a, b, c, a, b */ + d[0] = (uint8_t)s_c; + d[1] = (uint8_t)s_a; + d[2] = (uint8_t)s_b; + d[3] = (uint8_t)s_c; + d[4] = (uint8_t)s_a; + d[5] = (uint8_t)s_b; + + uint8_t asc_flag = 0; + ray_t* result = ray_sort(&sv, &asc_flag, NULL, 1, N); + TEST_ASSERT_NOT_NULL(result); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + TEST_ASSERT_EQ_I(ray_len(result), N); + TEST_ASSERT_TRUE(RAY_IS_SYM(result->type)); + + /* Sorted asc: a, a, b, b, c, c */ + const uint8_t* r = (const uint8_t*)ray_data(result); + TEST_ASSERT_EQ_I(r[0], (uint8_t)s_a); + TEST_ASSERT_EQ_I(r[1], (uint8_t)s_a); + TEST_ASSERT_EQ_I(r[2], (uint8_t)s_b); + TEST_ASSERT_EQ_I(r[3], (uint8_t)s_b); + TEST_ASSERT_EQ_I(r[4], (uint8_t)s_c); + TEST_ASSERT_EQ_I(r[5], (uint8_t)s_c); + + ray_release(result); + ray_release(sv); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + /* ─── Entry table ────────────────────────────────────────────────── */ const test_entry_t sort_entries[] = { @@ -1498,5 +1547,7 @@ const test_entry_t sort_entries[] = { * msd_bucket_sort_fn / bucket_lsb_sort path in msd_radix_sort_run. */ { "sort/msd_bucket_i64_asc", test_sort_msd_bucket_i64, NULL, NULL }, { "sort/msd_bucket_i64_desc", test_sort_msd_bucket_i64_desc, NULL, NULL }, + /* gather_by_idx W8 SYM path (eval.c line 1146) */ + { "sort/sym_w8_gather", test_sort_sym_w8_gather, NULL, NULL }, { NULL, NULL, NULL, NULL }, }; diff --git a/test/test_sym.c b/test/test_sym.c index 44712f6f..f51d4e93 100644 --- a/test/test_sym.c +++ b/test/test_sym.c @@ -1432,6 +1432,340 @@ static test_result_t test_sym_dotted_leading_dot(void) { PASS(); } +/* ══════════════════════════════════════════ + * Additional ray_sym_load / sym_cache_segments coverage + * Targets the remaining zero-hit regions in sym.c: + * — disk_count < 0 or > UINT32_MAX (lines 1327-1332) + * — slen > remaining in entry loop (lines 1418-1423) + * — prefix mismatch in validation loop (lines 1428-1434) + * — remaining != 0 after all entries parsed (lines 1455-1460) + * — lock_path snprintf overflow in ray_sym_load (lines 1284-1285) + * — lock_path snprintf overflow in ray_sym_save (lines 1057-1058) + * — sep_dots + 1 > 255 guard in sym_cache_segments (lines 417-420) + * ══════════════════════════════════════════ */ + +/* Helper: write a raw STRL binary file. + * STRL layout: [4B magic LE][8B count LE][for each: 4B len LE + data] + * SYM_STRL_MAGIC = 0x4C525453 ("STRL" in memory on LE machines). */ +static bool write_strl_raw(const char* path, + const uint8_t* data, size_t data_len) { + FILE* f = fopen(path, "wb"); + if (!f) return false; + bool ok = (fwrite(data, 1, data_len, f) == data_len); + fclose(f); + return ok; +} + +/* ---- sym_load_neg_disk_count ------------------------------------------- */ + +/* ray_sym_load rejects a STRL file whose 8-byte count field is negative + * (disk_count < 0 check at sym.c line 1327). Covers lines 1328-1332. */ +static test_result_t test_sym_load_neg_disk_count(void) { + const char* sym_path = "/tmp/test_sym_negcnt.sym"; + char lk_path[4096]; + snprintf(lk_path, sizeof(lk_path), "%s.lk", sym_path); + remove(sym_path); remove(lk_path); + + /* STRL magic (4B LE: 53 54 52 4C) + disk_count = -1 (8B LE: all 0xFF) */ + static const uint8_t buf[] = { + 0x53, 0x54, 0x52, 0x4C, /* magic = "STRL" */ + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF /* int64_t -1 in LE */ + }; + TEST_ASSERT_TRUE(write_strl_raw(sym_path, buf, sizeof(buf))); + + ray_err_t err = ray_sym_load(sym_path); + TEST_ASSERT((err) != (RAY_OK), "negative disk_count must be rejected"); + + remove(sym_path); remove(lk_path); + PASS(); +} + +/* ---- sym_load_slen_overflow -------------------------------------------- */ + +/* ray_sym_load rejects a STRL file where a declared entry length (slen) + * exceeds the bytes remaining after the length prefix. + * Covers sym.c lines 1418-1423. + * + * File layout: magic + count=1 + slen=99 + 0 bytes of string data. + * After reading slen=99: remaining==0, which is < 99 → error. */ +static test_result_t test_sym_load_slen_overflow(void) { + const char* sym_path = "/tmp/test_sym_slen_ovf.sym"; + char lk_path[4096]; + snprintf(lk_path, sizeof(lk_path), "%s.lk", sym_path); + remove(sym_path); remove(lk_path); + + static const uint8_t buf[] = { + 0x53, 0x54, 0x52, 0x4C, /* magic "STRL" */ + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* disk_count = 1 */ + 0x63, 0x00, 0x00, 0x00 /* entry 0: slen = 99, no data */ + }; + TEST_ASSERT_TRUE(write_strl_raw(sym_path, buf, sizeof(buf))); + + ray_err_t err = ray_sym_load(sym_path); + TEST_ASSERT((err) != (RAY_OK), "slen > remaining must be rejected"); + + remove(sym_path); remove(lk_path); + PASS(); +} + +/* ---- sym_load_prefix_mismatch_strl ------------------------------------- */ + +/* ray_sym_load rejects a reload where a previously-loaded (persisted) entry + * has a different string content in the new file. + * Covers sym.c lines 1428-1434. + * + * Strategy: + * 1. Intern "" (id=0), "aaa" (id=1), "bbb" (id=2) and save → persisted=3. + * 2. Load the file so persisted_count = 3. + * 3. Build a STRL with the same 3 entries but entry 1 changed ("zzz"). + * 4. Load again → prefix check at i=1 fails (memory has "aaa", file has "zzz"). */ +static test_result_t test_sym_load_prefix_mismatch_strl(void) { + const char* sym_path = "/tmp/test_sym_pfx_mm.sym"; + char lk_path[4096]; + snprintf(lk_path, sizeof(lk_path), "%s.lk", sym_path); + remove(sym_path); remove(lk_path); + + /* Step 1: intern and save. */ + ray_sym_intern("aaa", 3); + ray_sym_intern("bbb", 3); + ray_err_t err = ray_sym_save(sym_path); + TEST_ASSERT_EQ_I(err, RAY_OK); + + /* Step 2: load → persisted_count = 3 ("", "aaa", "bbb"). */ + err = ray_sym_load(sym_path); + TEST_ASSERT_EQ_I(err, RAY_OK); + TEST_ASSERT_EQ_U(ray_sym_count(), 3); + + /* Step 3: craft a STRL with 3 entries where entry 1 is "zzz". + * STRL format: magic(4) + count=3(8) + [len=0 + ""][len=3 + "zzz"][len=3 + "bbb"] */ + static const uint8_t bad_strl[] = { + 0x53, 0x54, 0x52, 0x4C, /* magic "STRL" */ + 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* disk_count = 3 */ + /* entry 0: "" (len=0) */ + 0x00, 0x00, 0x00, 0x00, + /* entry 1: "zzz" (len=3) — was "aaa" in memory */ + 0x03, 0x00, 0x00, 0x00, 'z', 'z', 'z', + /* entry 2: "bbb" (len=3) */ + 0x03, 0x00, 0x00, 0x00, 'b', 'b', 'b' + }; + TEST_ASSERT_TRUE(write_strl_raw(sym_path, bad_strl, sizeof(bad_strl))); + + /* Step 4: load → prefix mismatch at entry 1. */ + err = ray_sym_load(sym_path); + TEST_ASSERT((err) != (RAY_OK), "prefix mismatch must be rejected"); + + remove(sym_path); remove(lk_path); + PASS(); +} + +/* ---- sym_load_trailing_junk -------------------------------------------- */ + +/* ray_sym_load rejects a STRL file that has extra bytes after all declared + * entries have been parsed (remaining != 0 check, sym.c lines 1455-1460). + * + * File: magic + count=1 + entry[0]="" + one extra 0x00 byte at the end. */ +static test_result_t test_sym_load_trailing_junk(void) { + const char* sym_path = "/tmp/test_sym_trail.sym"; + char lk_path[4096]; + snprintf(lk_path, sizeof(lk_path), "%s.lk", sym_path); + remove(sym_path); remove(lk_path); + + static const uint8_t buf[] = { + 0x53, 0x54, 0x52, 0x4C, /* magic "STRL" */ + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* disk_count = 1 */ + 0x00, 0x00, 0x00, 0x00, /* entry 0: slen=0, "" */ + 0x42 /* trailing junk byte */ + }; + TEST_ASSERT_TRUE(write_strl_raw(sym_path, buf, sizeof(buf))); + + ray_err_t err = ray_sym_load(sym_path); + TEST_ASSERT((err) != (RAY_OK), "trailing junk must be rejected"); + + remove(sym_path); remove(lk_path); + PASS(); +} + +/* ---- sym_load_long_path ------------------------------------------------ */ + +/* ray_sym_load rejects a path so long that appending ".lk" overflows the + * 1024-byte lock_path buffer (sym.c lines 1284-1285). */ +static test_result_t test_sym_load_long_path(void) { + /* The buffer is char lock_path[1024]; snprintf(lock_path, 1024, "%s.lk", path). + * Overflow when strlen(path) + 3 >= 1024, i.e. strlen(path) >= 1021. */ + char long_path[2048]; + memset(long_path, 'a', 1021); + long_path[1021] = '\0'; + + ray_err_t err = ray_sym_load(long_path); + TEST_ASSERT_EQ_I(err, RAY_ERR_IO); + PASS(); +} + +/* ---- sym_save_long_path ------------------------------------------------ */ + +/* ray_sym_save similarly rejects an overlong path (sym.c lines 1057-1058). + * Intern one sym first so persisted_count != str_count (otherwise save + * returns RAY_OK immediately without reaching the path-length check). */ +static test_result_t test_sym_save_long_path(void) { + ray_sym_intern("save_long_path_test", 19); + + char long_path[2048]; + memset(long_path, 'b', 1021); + long_path[1021] = '\0'; + + ray_err_t err = ray_sym_save(long_path); + TEST_ASSERT_EQ_I(err, RAY_ERR_IO); + PASS(); +} + +/* ---- sym_cache_segs_many_dots ------------------------------------------ */ + +/* sym_cache_segments rejects names with 256+ dot-separated segments + * (sep_dots + 1 > 255, sym.c lines 417-420). + * Build a name with exactly 255 dots (256 segments) using no-split intern, + * then trigger rebuild_segments which calls sym_cache_segments. */ +static test_result_t test_sym_cache_segs_many_dots(void) { + /* Build a string like "a.a.a....a" with 255 dots (256 'a' segments). + * Total length = 256 * 1 + 255 = 511 characters. */ + char name[512]; + for (int i = 0; i < 511; i++) + name[i] = (i % 2 == 0) ? 'a' : '.'; + name[511] = '\0'; + size_t name_len = 511; + + /* Intern via no-split so segment caching is deferred. */ + int64_t id = ray_sym_intern_no_split(name, name_len); + TEST_ASSERT((id) >= (0), "id >= 0"); + + /* Not yet scanned. */ + TEST_ASSERT_FALSE(ray_sym_is_dotted(id)); + + /* Rebuild triggers sym_cache_segments which detects 256 segments + * (sep_dots + 1 = 256 > 255) and marks the sym as plain (not dotted). */ + ray_err_t err = ray_sym_rebuild_segments(); + TEST_ASSERT_EQ_I(err, RAY_OK); + + /* Must NOT be marked dotted — the 256-segment name is treated as plain. */ + TEST_ASSERT_FALSE(ray_sym_is_dotted(id)); + + PASS(); +} + +/* ---- sym_load_no_parent_dir -------------------------------------------- */ + +/* ray_sym_load with a path in a non-existent directory covers the inner + * EROFS fallback in sym.c (lines 1294-1296). + * + * When the parent directory does not exist: + * - ray_file_open(path, READ) fails → errno = ENOENT → saved_errno = ENOENT + * - ray_file_open(path, READ|WRITE|CREATE) also fails → errno = ENOENT + * - saved_errno != EROFS && errno != EROFS is TRUE → returns RAY_ERR_IO + */ +static test_result_t test_sym_load_no_parent_dir(void) { + ray_err_t err = ray_sym_load("/tmp/no_such_dir_sym_xq7/sym.sym"); + TEST_ASSERT_EQ_I(err, RAY_ERR_IO); + PASS(); +} + +/* ---- sym_save_bad_slot_type -------------------------------------------- */ + +/* sym_save_impl merge loop rejects a slot from the disk file that is not + * a -RAY_STR atom (sym.c lines 1089-1093: `s->type != -RAY_STR` check). + * + * Write a LSTG (generic list) file with one -RAY_I64 (integer) atom entry. + * ray_col_load will deserialise this as a RAY_LIST, and sym_save_impl will + * see `s->type == -RAY_I64 != -RAY_STR` → RAY_ERR_CORRUPT. + * + * LSTG format (col.c LIST_MAGIC = 0x4754534CU, LE = 4C 53 54 47): + * [4B magic][1B outer-type RAY_LIST=0][8B count][1B elem-type -RAY_I64=0xFB][8B value] + */ +static test_result_t test_sym_save_bad_slot_type(void) { + static const uint8_t lstg_buf[] = { + 0x4C, 0x53, 0x54, 0x47, /* LIST_MAGIC "LSTG" LE */ + 0x00, /* outer type = RAY_LIST = 0 */ + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* count = 1 */ + 0xFB, /* elem type = -RAY_I64 = -5 = 0xFB */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 /* i64 value = 0 */ + }; + const char* path = "/tmp/sym_test_bad_slot.sym"; + bool ok = write_strl_raw(path, lstg_buf, sizeof(lstg_buf)); + TEST_ASSERT_TRUE(ok); + + /* Intern one sym so persisted_count != str_count (skip early return). */ + ray_sym_intern("bad_slot_sym", 12); + + ray_err_t err = ray_sym_save(path); + TEST_ASSERT_EQ_I(err, RAY_ERR_CORRUPT); + remove(path); + PASS(); +} + +/* ---- sym_save_tmppath_overflow ----------------------------------------- */ + +/* ray_sym_save rejects a path that would overflow the internal tmp_path[] + * buffer (sym.c line 1060-1061: "%s.tmp" overflow). + * The lock_path[] buffer uses "%s.lk" (3-char suffix), which overflows at + * strlen >= 1021. The tmp_path[] buffer uses "%s.tmp" (4-char suffix), + * which overflows at strlen >= 1020. So a path of exactly 1020 chars + * passes the lock_path check but fails the tmp_path check. + * Intern one sym first so persisted_count != str_count to skip the + * early-return optimisation. */ +static test_result_t test_sym_save_tmppath_overflow(void) { + ray_sym_intern("tmppath_overflow_test", 21); + + char long_path[2048]; + memset(long_path, 'c', 1020); + long_path[1020] = '\0'; + + ray_err_t err = ray_sym_save(long_path); + TEST_ASSERT_EQ_I(err, RAY_ERR_IO); + PASS(); +} + +/* ---- sym_save_diverge_id ----------------------------------------------- */ + +/* sym_save_impl merge path rejects divergent symbol tables where a symbol + * from the disk file interns to a different ID than its disk position + * (sym.c lines 1104-1113: `id != i` check). + * + * Setup: + * 1. Write a two-entry STRL file: ["", "apple"] to a tmp path. + * 2. Intern "banana" in memory — gets id=1 (since "" is always id=0). + * 3. Call ray_sym_save to the same path. + * 4. sym_save_impl finds the existing file, tries to merge: + * - entry[0]="" → ray_sym_intern_no_split("",0) → id=0 == 0 ✓ + * - entry[1]="apple" → but "banana" already holds id=1, + * so "apple" gets id=2 ≠ 1 → RAY_ERR_CORRUPT ✓ + */ +static test_result_t test_sym_save_diverge_id(void) { + static const uint8_t strl_buf[] = { + 0x53, 0x54, 0x52, 0x4C, /* magic "STRL" */ + 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* disk_count = 2 */ + 0x00, 0x00, 0x00, 0x00, /* entry 0: slen=0 "" */ + 0x05, 0x00, 0x00, 0x00, /* entry 1: slen=5 */ + 'a', 'p', 'p', 'l', 'e' /* "apple" */ + }; + const char* path = "/tmp/sym_test_diverge_id.sym"; + bool ok = write_strl_raw(path, strl_buf, sizeof(strl_buf)); + TEST_ASSERT_TRUE(ok); + + /* Intern "banana" — takes id=1, displacing the expected "apple" slot. */ + int64_t banana_id = ray_sym_intern("banana", 6); + TEST_ASSERT_EQ_I(banana_id, 1); + + ray_err_t err = ray_sym_save(path); + TEST_ASSERT_EQ_I(err, RAY_ERR_CORRUPT); + remove(path); + PASS(); +} + /* ══════════════════════════════════════════ * ray_like_fn (src/ops/strop.c) coverage * ══════════════════════════════════════════ */ @@ -2190,6 +2524,19 @@ const test_entry_t sym_entries[] = { { "sym/ensure_cap_large", test_sym_ensure_cap_large, sym_setup, sym_teardown }, { "sym/dotted_leading_dot", test_sym_dotted_leading_dot, sym_setup, sym_teardown }, + /* Additional sym.c coverage: load/save edge cases */ + { "sym/load_neg_disk_count", test_sym_load_neg_disk_count, sym_setup, sym_teardown }, + { "sym/load_slen_overflow", test_sym_load_slen_overflow, sym_setup, sym_teardown }, + { "sym/load_prefix_mismatch_strl", test_sym_load_prefix_mismatch_strl,sym_setup, sym_teardown }, + { "sym/load_trailing_junk", test_sym_load_trailing_junk, sym_setup, sym_teardown }, + { "sym/load_long_path", test_sym_load_long_path, sym_setup, sym_teardown }, + { "sym/save_long_path", test_sym_save_long_path, sym_setup, sym_teardown }, + { "sym/cache_segs_many_dots", test_sym_cache_segs_many_dots, sym_setup, sym_teardown }, + { "sym/load_no_parent_dir", test_sym_load_no_parent_dir, sym_setup, sym_teardown }, + { "sym/save_bad_slot_type", test_sym_save_bad_slot_type, sym_setup, sym_teardown }, + { "sym/save_tmppath_overflow", test_sym_save_tmppath_overflow, sym_setup, sym_teardown }, + { "sym/save_diverge_id", test_sym_save_diverge_id, sym_setup, sym_teardown }, + /* ray_like_fn (src/ops/strop.c) — vector and sym-atom paths */ { "sym/like_fn/bad_pattern_type", test_like_fn_bad_pattern_type, sym_setup, sym_teardown }, { "sym/like_fn/str_atom_exact", test_like_fn_str_atom_exact, sym_setup, sym_teardown },