Skip to content

Commit 832cd72

Browse files
committed
New method circular_array_windows().
This is very like the existing `circular_tuple_windows`, but imposes the minimum possible bounds on the input iterator: it must have cloneable items because each item is returned N times, and it must be Sized so that it can be stored in a struct. Unlike `circular_tuple_windows`, it doesn't require the input iterator itself to have extra traits, like Clone or ExactSizeItreator. Because the return type is an array (as suggested in #1084), we must handle the zero-length case, because you can't have a constraint `N>0`. In that situation we still read to the end of the input iterator, discard each item as we read it, and return a zero-length array per item, preserving the invariant that this iterator is the same length as the input one. In this implementation, I've used `Vec` to store items that will be reused, and so the method depends on the `use_alloc` feature.
1 parent 6c2e8f6 commit 832cd72

4 files changed

Lines changed: 359 additions & 0 deletions

File tree

src/array_impl.rs

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
use std::iter::{ExactSizeIterator, FusedIterator};
2+
3+
/// An iterator over all windows, wrapping back to the first elements when the
4+
/// window would otherwise exceed the length of the iterator, producing arrays
5+
/// of a specific size.
6+
///
7+
/// See [`.circular_array_windows()`](crate::Itertools::circular_array_windows)
8+
/// for more information.
9+
#[derive(Debug, Clone)]
10+
pub struct CircularArrayWindows<I, const N: usize>
11+
where
12+
I: Iterator + Sized,
13+
I::Item: Clone,
14+
{
15+
iter: I,
16+
state: CircularArrayWindowsState<I::Item, N>,
17+
}
18+
19+
#[derive(Debug, Clone)]
20+
enum CircularArrayWindowsState<T: Clone, const N: usize> {
21+
// Simple state enumeration. `NotStarted` allows us to be lazy, by
22+
// deferring fetching anything from the input iterator until we're
23+
// called on to return our first window. This could almost just be
24+
// an `Option<CircularArrayWindowsInner>`, except that we need to
25+
// distinguish `Done` from `NotStarted`.
26+
NotStarted,
27+
Running(CircularArrayWindowsInner<T, N>),
28+
Done,
29+
}
30+
31+
#[derive(Debug, Clone)]
32+
struct CircularArrayWindowsInner<T: Clone, const N: usize> {
33+
// The first N-1 elements read from the iterator are also stored
34+
// in `prefix`. `prefix_needed` counts down from N-1 to indicate
35+
// how many are yet to be stored.
36+
//
37+
// When the input iterator runs out, we begin reusing elements
38+
// from `prefix`. At this point `cyclepos` changes from None to
39+
// Some, and gives the current read position in `prefix`.
40+
prefix_needed: usize,
41+
prefix: Vec<T>,
42+
cyclepos: Option<usize>,
43+
44+
// During normal use, `ringbuf` contains the N-1 elements of the
45+
// previous output window that will be reused in this window. We
46+
// read an Nth element and return an array consisting of `ringbuf`
47+
// followed by the new element. Then we discard the first element
48+
// from `ringbuf` and replace it with the new element.
49+
//
50+
// As the name suggests, `ringbuf` is physically stored as a ring
51+
// buffer. `ringpos` indicates which element is logically first.
52+
ringbuf: Vec<T>,
53+
ringpos: usize,
54+
55+
// We want to generate the same number of output windows as the
56+
// input iterator had elements. In cases where the input runs out
57+
// early and we start recycling `prefix` before we finish writing
58+
// it, that's fiddly to keep track of using only the variables
59+
// above. So instead `balance` handles knowing when to stop. It is
60+
// incremented for every element we read from the input iterator,
61+
// and decremented for every window we output.
62+
balance: usize,
63+
}
64+
65+
impl<T: Clone, const N: usize> CircularArrayWindowsInner<T, N> {
66+
/// Make an empty `CircularArrayWindowsInner`.
67+
fn empty() -> Self {
68+
let nm1 = N - 1;
69+
70+
let mut prefix = Vec::new();
71+
let mut ringbuf = Vec::new();
72+
prefix.reserve_exact(nm1);
73+
ringbuf.reserve_exact(nm1);
74+
75+
Self {
76+
prefix_needed: nm1,
77+
prefix,
78+
cyclepos: None,
79+
ringbuf,
80+
ringpos: 0,
81+
balance: 0,
82+
}
83+
}
84+
85+
/// Return the next item in the logical input sequence (consisting
86+
/// of the contents of the input iterator followed by N-1 items
87+
/// recycling from the beginning).
88+
fn get_item(&mut self, iter: &mut impl Iterator<Item = T>) -> T {
89+
if let Some(pos) = &mut self.cyclepos {
90+
// The input iterator has already run out, so clone an
91+
// element from `prefix`, wrapping round to the start as
92+
// necessary.
93+
let item = self.prefix[*pos].clone();
94+
*pos = (*pos + 1) % self.prefix.len();
95+
item
96+
} else if let Some(item) = iter.next() {
97+
// Read from the input iterator.
98+
self.balance += 1;
99+
if self.prefix_needed > 0 {
100+
// We haven't finished filling `prefix` yet, so push a
101+
// clone of the item on to it.
102+
self.prefix.push(item.clone());
103+
self.prefix_needed -= 1;
104+
}
105+
item
106+
} else {
107+
// The input iterator has run out right now, so clone the
108+
// first element of `prefix`, and set cyclepos to point to
109+
// the next one.
110+
self.cyclepos = Some(1 % self.prefix.len());
111+
self.prefix[0].clone()
112+
}
113+
}
114+
115+
/// Construct an array window to return, given the newly read item
116+
/// to go on the end of the output.
117+
fn make_window(&mut self, new_item: T) -> [T; N] {
118+
let window = std::array::from_fn(|i| {
119+
if i + 1 < N {
120+
// The first N-1 items come from `ringbuf`
121+
self.ringbuf[(i + self.ringpos) % self.ringbuf.len()].clone()
122+
} else {
123+
// The last item is the new one we just read
124+
new_item.clone()
125+
}
126+
});
127+
128+
// Replace the oldest item in `ringbuf` with the new one.
129+
self.ringbuf[self.ringpos] = new_item;
130+
self.ringpos = (self.ringpos + 1) % self.ringbuf.len();
131+
132+
self.balance -= 1;
133+
window
134+
}
135+
}
136+
137+
impl<I, const N: usize> Iterator for CircularArrayWindows<I, N>
138+
where
139+
I: Iterator + Sized,
140+
I::Item: Clone,
141+
{
142+
type Item = [I::Item; N];
143+
144+
fn next(&mut self) -> Option<[I::Item; N]> {
145+
if N < 2 {
146+
// The generic code doesn't work sensibly when N is too
147+
// small, because `ringbuf` and `prefix` have size 0. So
148+
// these cases are handled completely separately by this
149+
// simpler code, which just consumes an item from the
150+
// iterator and returns a window based on just that item.
151+
return match &self.state {
152+
CircularArrayWindowsState::Done => None,
153+
_ => match self.iter.next() {
154+
Some(item) => {
155+
// For N=1 we wrap the item into a singleton
156+
// array. For N=0 we don't even do that – we
157+
// just throw it away and return [].
158+
let mut opt = Some(item);
159+
Some(std::array::from_fn(|_| opt.take().unwrap()))
160+
}
161+
None => {
162+
self.state = CircularArrayWindowsState::Done;
163+
None
164+
}
165+
},
166+
};
167+
}
168+
match &mut self.state {
169+
// Initialisation code, when next() is called for the first time
170+
CircularArrayWindowsState::NotStarted => match self.iter.next() {
171+
None => {
172+
// The input iterator was completely empty
173+
self.state = CircularArrayWindowsState::Done;
174+
None
175+
}
176+
Some(first) => {
177+
// We have at least one item, so we can definitely
178+
// populate `prefix` (even if we have to make N-1
179+
// copies of this element). Set up an `inner`.
180+
let mut inner = CircularArrayWindowsInner::empty();
181+
182+
// Call `get_item` even on the first item, using a
183+
// throwaway iterator that just returns the item
184+
// we're already holding. That way `prefix` and
185+
// `balance` are updated the same as they will be
186+
// for future items.
187+
let first = inner.get_item(&mut Some(first).into_iter());
188+
// Put the new item into the ring buffer.
189+
inner.ringbuf.push(first);
190+
for _ in 2..N {
191+
// Now read N-2 further items and fill up the
192+
// rest of `prefix` and `ringbuf`.
193+
let item = inner.get_item(&mut self.iter);
194+
inner.ringbuf.push(item);
195+
}
196+
197+
// Now we've read N-1 items, and we're ready to
198+
// read the Nth and return a window.
199+
let last = inner.get_item(&mut self.iter);
200+
let window = inner.make_window(last);
201+
202+
self.state = CircularArrayWindowsState::Running(inner);
203+
204+
Some(window)
205+
}
206+
},
207+
CircularArrayWindowsState::Running(inner) => {
208+
if inner.cyclepos.is_some() && inner.balance == 0 {
209+
// The input iterator has run out, and we've
210+
// emitted as many windows as we read items, so
211+
// we've finished.
212+
self.state = CircularArrayWindowsState::Done;
213+
None
214+
} else {
215+
// Normal case. Fetch an item and return a window.
216+
let last = inner.get_item(&mut self.iter);
217+
let window = inner.make_window(last);
218+
Some(window)
219+
}
220+
}
221+
CircularArrayWindowsState::Done => None,
222+
}
223+
}
224+
}
225+
226+
// Because `CircularArrayWindowsState::Done` is an absorbing state,
227+
// this iterator gets fusedness for free.
228+
impl<I, const N: usize> FusedIterator for CircularArrayWindows<I, N>
229+
where
230+
I: Iterator + Sized,
231+
I::Item: Clone,
232+
{
233+
}
234+
235+
// We return exactly one window per input item, so if the input
236+
// iterator knows its length, then so do we.
237+
impl<I, const N: usize> ExactSizeIterator for CircularArrayWindows<I, N>
238+
where
239+
I: Iterator + Sized + ExactSizeIterator,
240+
I::Item: Clone,
241+
{
242+
fn len(&self) -> usize {
243+
self.iter.len()
244+
}
245+
}
246+
247+
pub fn circular_array_windows<I, const N: usize>(iter: I) -> CircularArrayWindows<I, N>
248+
where
249+
I: Iterator + Sized,
250+
I::Item: Clone,
251+
{
252+
CircularArrayWindows {
253+
iter,
254+
state: CircularArrayWindowsState::NotStarted,
255+
}
256+
}

src/lib.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ pub mod structs {
9898
TakeWhileRef, TupleCombinations, Update, WhileSome,
9999
};
100100
#[cfg(feature = "use_alloc")]
101+
pub use crate::array_impl::CircularArrayWindows;
102+
#[cfg(feature = "use_alloc")]
101103
pub use crate::combinations::{ArrayCombinations, Combinations};
102104
#[cfg(feature = "use_alloc")]
103105
pub use crate::combinations_with_replacement::CombinationsWithReplacement;
@@ -174,6 +176,8 @@ pub use crate::unziptuple::{multiunzip, MultiUnzip};
174176
pub use crate::with_position::Position;
175177
pub use crate::ziptuple::multizip;
176178
mod adaptors;
179+
#[cfg(feature = "use_alloc")]
180+
mod array_impl;
177181
mod either_or_both;
178182
pub use crate::either_or_both::EitherOrBoth;
179183
#[doc(hidden)]
@@ -900,6 +904,39 @@ pub trait Itertools: Iterator {
900904
tuple_impl::tuples(self)
901905
}
902906

907+
/// Return an iterator over all windows, wrapping back to the first
908+
/// elements when the window would otherwise exceed the length of the
909+
/// iterator, producing arrays of size `N`.
910+
///
911+
/// `circular_array_windows` clones the iterator elements so that
912+
/// they can be part of successive windows, this makes it most
913+
/// suited for iterators of references and other values that are
914+
/// cheap to copy.
915+
///
916+
/// ```
917+
/// use itertools::Itertools;
918+
/// let mut v = Vec::new();
919+
/// for [a, b] in (1..5).circular_array_windows() {
920+
/// v.push([a, b]);
921+
/// }
922+
/// assert_eq!(v, vec![[1, 2], [2, 3], [3, 4], [4, 1]]);
923+
///
924+
/// let mut it = (1..5).circular_array_windows();
925+
/// assert_eq!(Some([1, 2, 3]), it.next());
926+
/// assert_eq!(Some([2, 3, 4]), it.next());
927+
/// assert_eq!(Some([3, 4, 1]), it.next());
928+
/// assert_eq!(Some([4, 1, 2]), it.next());
929+
/// assert_eq!(None, it.next());
930+
/// ```
931+
#[cfg(feature = "use_alloc")]
932+
fn circular_array_windows<const N: usize>(self) -> CircularArrayWindows<Self, N>
933+
where
934+
Self: Sized,
935+
Self::Item: Clone,
936+
{
937+
array_impl::circular_array_windows(self)
938+
}
939+
903940
/// Split into an iterator pair that both yield all elements from
904941
/// the original iterator.
905942
///

tests/laziness.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ must_use_tests! {
104104
let _ = Panicking.circular_tuple_windows::<(_, _)>();
105105
let _ = Panicking.circular_tuple_windows::<(_, _, _)>();
106106
}
107+
circular_array_windows {
108+
let _ = Panicking.circular_array_windows::<0>();
109+
let _ = Panicking.circular_array_windows::<1>();
110+
let _ = Panicking.circular_array_windows::<2>();
111+
let _ = Panicking.circular_array_windows::<3>();
112+
}
107113
tuples {
108114
let _ = Panicking.tuples::<(_,)>();
109115
let _ = Panicking.tuples::<(_, _)>();

tests/quick.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,6 +1300,66 @@ quickcheck! {
13001300
}
13011301
}
13021302

1303+
// array iterators
1304+
quickcheck! {
1305+
fn equal_circular_array_windows_0(a: Vec<u8>) -> bool {
1306+
let x = a.iter().map(|_| [&0u8; 0] );
1307+
let y = a.iter().circular_array_windows::<0>();
1308+
assert_eq!(a.len(), y.len());
1309+
itertools::assert_equal(x,y);
1310+
true
1311+
}
1312+
1313+
fn equal_circular_array_windows_1(a: Vec<u8>) -> bool {
1314+
let x = a.iter().map(|e| [e] );
1315+
let y = a.iter().circular_array_windows::<1>();
1316+
assert_eq!(a.len(), y.len());
1317+
itertools::assert_equal(x,y);
1318+
true
1319+
}
1320+
1321+
fn equal_circular_array_windows_2(a: Vec<u8>) -> bool {
1322+
let x = (0..a.len()).map(|start_idx| [
1323+
&a[start_idx],
1324+
&a[(start_idx + 1) % a.len()],
1325+
]);
1326+
let y = a.iter().circular_array_windows::<2>();
1327+
assert_eq!(a.len(), y.len());
1328+
itertools::assert_equal(x,y);
1329+
true
1330+
}
1331+
1332+
fn equal_circular_array_windows_3(a: Vec<u8>) -> bool {
1333+
let x = (0..a.len()).map(|start_idx| [
1334+
&a[start_idx],
1335+
&a[(start_idx + 1) % a.len()],
1336+
&a[(start_idx + 2) % a.len()],
1337+
]);
1338+
let y = a.iter().circular_array_windows::<3>();
1339+
assert_eq!(a.len(), y.len());
1340+
itertools::assert_equal(x,y);
1341+
true
1342+
}
1343+
1344+
fn fused_circular_array_windows_is_fused(a: Vec<u8>) -> bool {
1345+
let mut w0 = a.iter().circular_array_windows::<0>();
1346+
let mut w1 = a.iter().circular_array_windows::<1>();
1347+
let mut w2 = a.iter().circular_array_windows::<2>();
1348+
for _ in 0..a.len() {
1349+
assert!(w0.next().is_some());
1350+
assert!(w1.next().is_some());
1351+
assert!(w2.next().is_some());
1352+
}
1353+
assert!(w0.next().is_none());
1354+
assert!(w1.next().is_none());
1355+
assert!(w2.next().is_none());
1356+
assert!(w0.next().is_none());
1357+
assert!(w1.next().is_none());
1358+
assert!(w2.next().is_none());
1359+
true
1360+
}
1361+
}
1362+
13031363
// with_position
13041364
quickcheck! {
13051365
fn with_position_exact_size_1(a: Vec<u8>) -> bool {

0 commit comments

Comments
 (0)