diff --git a/ndc_stdlib/src/sequence.rs b/ndc_stdlib/src/sequence.rs index ea6c7f8d..21879c74 100644 --- a/ndc_stdlib/src/sequence.rs +++ b/ndc_stdlib/src/sequence.rs @@ -560,8 +560,7 @@ mod inner { /// Returns the `k` sized combinations of the given sequence `seq` as a lazy iterator of tuples. #[function(return_type = Iterator)] - pub fn combinations(seq: SeqValue, k: i64) -> anyhow::Result { - let k = k as usize; + pub fn combinations(seq: SeqValue, k: usize) -> anyhow::Result { let iter = CombinationsIter::new(seq, k) .ok_or_else(|| anyhow!("combinations requires a sequence"))?; Ok(Value::iterator(iter.into_shared())) @@ -576,8 +575,7 @@ mod inner { /// Returns the `k` sized permutations of the given sequence `seq` as a list of tuples. #[function(return_type = Vec<_>)] - pub fn permutations(seq: SeqValue, k: i64) -> anyhow::Result { - let k = k as usize; + pub fn permutations(seq: SeqValue, k: usize) -> anyhow::Result { Ok(Value::list( seq.try_into_iter() .ok_or_else(|| anyhow!("permutations requires a sequence"))? @@ -738,8 +736,7 @@ mod inner { /// Return a list that represents the powerset of the elements of `seq` that are exactly `length` long. #[function(name = "subsequences", return_type = Vec<_>)] - pub fn subsequences_len(seq: SeqValue, length: i64) -> anyhow::Result { - let length = length as usize; + pub fn subsequences_len(seq: SeqValue, length: usize) -> anyhow::Result { Ok(Value::list( seq.try_into_iter() .ok_or_else(|| anyhow!("subsequences requires a sequence"))? diff --git a/ndc_vm/src/iterator.rs b/ndc_vm/src/iterator.rs index f4d2c25d..36d17ec0 100644 --- a/ndc_vm/src/iterator.rs +++ b/ndc_vm/src/iterator.rs @@ -398,6 +398,11 @@ pub struct CombinationsIter { buffer: Vec, /// `Some` only for lazy (Shared) sources; set to `None` once exhausted. source: Option, + /// Combination size. The `indices` vector is only allocated once the + /// buffer is confirmed to hold at least `k` elements, so an absurdly + /// large `k` (e.g. `i64::MAX`) over a short source yields nothing + /// instead of overflowing the allocator with `(0..k).collect()`. + k: usize, indices: Vec, first: bool, } @@ -413,7 +418,8 @@ impl CombinationsIter { Some(Self { buffer, source, - indices: (0..k).collect(), + k, + indices: Vec::new(), first: true, }) } @@ -445,7 +451,7 @@ impl CombinationsIter { impl VmIterator for CombinationsIter { fn next(&mut self) -> Option { - let k = self.indices.len(); + let k = self.k; if k == 0 { return if self.first { @@ -457,10 +463,19 @@ impl VmIterator for CombinationsIter { } if self.first { - self.first = false; if !self.ensure_index(k - 1) { + // Source can't supply `k` elements: stay exhausted. `first` + // is left set so a re-poll returns `None` again instead of + // falling into the else branch and indexing the still-empty + // `indices`. return None; } + // The buffer now holds at least `k` elements, so this allocation + // is bounded by data we actually materialised — no capacity + // overflow even when `k` is enormous. Only now is it safe to + // leave the first-call branch. + self.indices = (0..k).collect(); + self.first = false; } else { // For lazy sources: pull one more element before choosing the // pivot. Without this, when `indices[k-1]` reaches the end of @@ -510,6 +525,7 @@ impl VmIterator for CombinationsIter { Some(Rc::new(RefCell::new(Self { buffer: self.buffer.clone(), source: source_copy, + k: self.k, indices: self.indices.clone(), first: self.first, }))) diff --git a/tests/functional/programs/900_bugs/bug0027_combinations_huge_k.ndc b/tests/functional/programs/900_bugs/bug0027_combinations_huge_k.ndc new file mode 100644 index 00000000..44fb00d0 --- /dev/null +++ b/tests/functional/programs/900_bugs/bug0027_combinations_huge_k.ndc @@ -0,0 +1,23 @@ +// `combinations(seq, k)` built its index vector eagerly with +// `(0..k).collect()`. A huge `k` (e.g. `i64::MAX`) over a short source +// asked the allocator for an `i64::MAX`-element `Vec` and aborted +// with "capacity overflow". The index vector is now only allocated once +// the buffer is confirmed to hold at least `k` elements, so an +// over-large `k` simply yields no combinations instead of panicking. +assert_eq(combinations([1, 2, 3], 9223372036854775807).list(), []); + +// `k` larger than the source length yields nothing (boundary just above n). +assert_eq(combinations([1, 2, 3], 4).list(), []); + +// Re-polling an exhausted over-large combination stays empty rather than +// indexing a never-initialised index vector. Covers both eager and lazy +// (enumerate) sources, drained twice. +let eager = combinations([1, 2, 3], 5); +assert_eq(eager.list(), []); +assert_eq(eager.list(), []); +let lazy = combinations([1, 2, 3].enumerate(), 5); +assert_eq(lazy.list(), []); +assert_eq(lazy.list(), []); + +// Valid combinations are still produced correctly. +assert_eq([1, 2, 3, 4].combinations(2).list(), [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]); diff --git a/tests/functional/programs/900_bugs/bug0028_combinations_negative_k.ndc b/tests/functional/programs/900_bugs/bug0028_combinations_negative_k.ndc new file mode 100644 index 00000000..9c134ae6 --- /dev/null +++ b/tests/functional/programs/900_bugs/bug0028_combinations_negative_k.ndc @@ -0,0 +1,7 @@ +// `combinations(seq, k)` took `k` as an `i64` and cast it with `k as usize`, +// turning a negative `k` into `usize::MAX` and aborting with "capacity +// overflow" while building the index vector. `k` is now a `usize` argument, +// so a negative value is rejected by argument conversion with a graceful +// error — matching the sibling `windows`/`chunks`/`take` functions. +// expect-error: expected a non-negative integer, but the value was negative +combinations([1, 2, 3], -1); diff --git a/tests/functional/programs/900_bugs/bug0029_permutations_negative_k.ndc b/tests/functional/programs/900_bugs/bug0029_permutations_negative_k.ndc new file mode 100644 index 00000000..64b08879 --- /dev/null +++ b/tests/functional/programs/900_bugs/bug0029_permutations_negative_k.ndc @@ -0,0 +1,5 @@ +// `permutations(seq, k)` cast `k as usize`, silently turning a negative `k` +// into `usize::MAX` and returning an empty list. `k` is now a `usize` +// argument, so a negative value is rejected with a graceful error instead. +// expect-error: expected a non-negative integer, but the value was negative +permutations([1, 2, 3], -1); diff --git a/tests/functional/programs/900_bugs/bug0030_subsequences_negative_length.ndc b/tests/functional/programs/900_bugs/bug0030_subsequences_negative_length.ndc new file mode 100644 index 00000000..b16c05ce --- /dev/null +++ b/tests/functional/programs/900_bugs/bug0030_subsequences_negative_length.ndc @@ -0,0 +1,6 @@ +// `subsequences(seq, length)` cast `length as usize`, silently turning a +// negative `length` into `usize::MAX` and returning an empty list. `length` +// is now a `usize` argument, so a negative value is rejected with a graceful +// error instead. +// expect-error: expected a non-negative integer, but the value was negative +subsequences([1, 2, 3], -1);