tor_netdoc/util/batching_split_before.rs
1//! Iterator extension for splitting into batches, each introduced by a batch-starting item
2//!
3//! See
4//! [`IteratorExt::batching_split_before_loose`] and
5//! [`IteratorExt::batching_split_before_with_header`].
6//!
7//! # **UNSTABLE**
8//!
9//! This whole module is UNSTABLE and not part of the semver guarantees.
10//! You'll only see it if you ran rustdoc with `--document-private-items`.
11// This is achieved with `#[doc(hidden)]` on the top-level module reexport
12// in `lib.rs`, which is the only place all of this isactually exposed.
13
14use std::iter;
15use std::marker::PhantomData;
16
17use crate::util::PeekableIterator;
18
19/// Iterator for the header, transformable into a [`Batches`] yielding subsequent batches
20///
21/// Returned by
22/// [`.batching_split_before_with_header()`](IteratorExt::batching_split_before_with_header).
23///
24/// This type is both:
25/// * An [`Iterator`], which returns the items in the header
26/// (before the first batch-starting item);
27/// * Transformable using [.subsequent()](BatchesWithHeader::subsequent)
28/// into a [`Batches`], which yields the remainder of the input,
29/// split into batches.
30///
31/// `II` is the iterator item type. `I` is the input iterator.
32/// `F` is the predicate for testing if an item is batch-starting.
33pub struct BatchesWithHeader<II, I, F> {
34 /// Input
35 input: Input<II, I, F>,
36}
37
38/// Input, shared by our public structs
39struct Input<II, I, F> {
40 /// The input iterator
41 unfiltered: I,
42 /// Callback to test if this is batch-starting
43 batch_starting: F,
44 /// We're like a function that yields II
45 marker: PhantomData<fn() -> II>,
46}
47
48/// An iterator-like object yielding an iterator for each batch.
49///
50/// Each call to [`.next_batch()`](Batches::next_batch)
51/// yields an iterator for one subsequent batch,
52/// which in turn will yield the individual items.
53///
54/// `Batches` is not an [`Iterator`] because
55/// its returned item type (the sub-iterator)
56/// borrows it mutably.
57///
58/// `II` is the iterator item type. `I` is the input iterator.
59/// `F` is the predicate for testing if an item is batch-starting.
60pub struct Batches<II, I, F> {
61 /// Input
62 input: Input<II, I, F>,
63 /// Should we avoid draining the end of the previous batch
64 no_drain: Option<NoDrainToken>,
65 /// Should we yield even (one) batch-starting item
66 yield_one: Option<EvenYieldOneBatchStarting>,
67}
68
69/// Token stored (or not) in the state to indicate not to drain the previous batch
70///
71/// (We use `Option<NoDrainToken>` rather than `bool` because
72/// booleans can be very confusing, and because
73/// `Option` has good ergonomics with [`.take()`](Option::take) and `?`.)
74struct NoDrainToken;
75
76/// Token stored (or not) in the state to indicate to yield even a batch-starting item
77///
78/// (We use `Option<NoDrainToken>` rather than `bool` because
79/// booleans can be very confusing, and because
80/// `Option` has good ergonomics with [`.take()`](Option::take) and `?`.)
81struct EvenYieldOneBatchStarting;
82
83/// Iterator to yield the members of a batch.
84///
85/// This is the iterator returned by
86/// [`.next_batch()`](Batches::next_batch).
87///
88/// `II` is the iterator item type. `I` is the input iterator.
89/// `F` is the predicate for testing if an item is batch-starting.
90pub struct Batch<'p, II, I, F> {
91 /// The parent, with all the actual state etc.
92 ///
93 /// It is less confusing to keep all the state in the parent iterator.
94 parent: &'p mut Batches<II, I, F>,
95}
96
97impl<II, I, F> Input<II, I, F>
98where
99 I: Iterator<Item = II> + PeekableIterator,
100 F: FnMut(&II) -> bool,
101{
102 /// Yield the next item - unless it is batch-starting.
103 fn next_non_starting(&mut self) -> Option<II> {
104 let item = self.unfiltered.peek()?;
105 if (self.batch_starting)(item) {
106 return None;
107 };
108 self.unfiltered.next()
109 }
110}
111
112impl<II, I, F> Iterator for BatchesWithHeader<II, I, F>
113where
114 I: Iterator<Item = II> + PeekableIterator,
115 F: FnMut(&II) -> bool,
116{
117 type Item = II;
118
119 fn next(&mut self) -> Option<II> {
120 self.input.next_non_starting()
121 }
122}
123
124impl<II, I, F> BatchesWithHeader<II, I, F>
125where
126 I: Iterator<Item = II> + PeekableIterator,
127 F: FnMut(&II) -> bool,
128{
129 /// Proceed from the header to the subsequent batches
130 ///
131 /// Any un-yielded items remaining in the header will be discarded.
132 pub fn subsequent(mut self) -> Batches<II, I, F> {
133 // Discard any un-yielded contents of the header
134 let _ = self.by_ref().count();
135
136 Batches {
137 input: self.input,
138 yield_one: None,
139 no_drain: None,
140 }
141 }
142}
143
144impl<II, I, F> Iterator for Batch<'_, II, I, F>
145where
146 I: Iterator<Item = II> + PeekableIterator,
147 F: FnMut(&II) -> bool,
148{
149 type Item = II;
150
151 fn next(&mut self) -> Option<II> {
152 if self.parent.yield_one.take().is_some() {
153 self.parent.input.unfiltered.next()
154 } else {
155 self.parent.input.next_non_starting()
156 }
157 }
158}
159
160impl<II, I: Iterator<Item = II> + PeekableIterator, F: FnMut(&II) -> bool> Batches<II, I, F> {
161 /// Proceed to the next batch
162 ///
163 /// If the input is exhausted (ie, there is no next batch), returns `None`.
164 ///
165 /// Any un-yielded items remaining in the previous batch will be discarded.
166 //
167 // Batches is a LendingIterator - its returned item type borrows from the
168 // iterator itself - so can't impl Iterator.
169 // <https://rust-lang.github.io/generic-associated-types-initiative/design_patterns/iterable.html>
170 pub fn next_batch(&mut self) -> Option<Batch<'_, II, I, F>> {
171 // Drain to the end of the batch
172 if self.no_drain.take().is_none() {
173 let _ = Batch { parent: self }.count();
174 }
175 let _: &II = self.input.unfiltered.peek()?;
176 self.yield_one = Some(EvenYieldOneBatchStarting);
177 Some(Batch { parent: self })
178 }
179
180 /// Map each batch
181 pub fn map<T>(
182 mut self,
183 mut f: impl FnMut(Batch<'_, II, I, F>) -> T,
184 ) -> impl Iterator<Item = T> {
185 iter::from_fn(move || {
186 let batch = self.next_batch()?;
187 Some(f(batch))
188 })
189 }
190}
191
192/// **Extension trait providing `batching_split_before`**
193pub trait IteratorExt: Iterator + Sized {
194 /// Splits the input into a header followed by batches started according to a predicate
195 ///
196 /// The input is divided into:
197 /// * A header, containing no batch-starting items
198 /// * Zero or more subsequent batches, each with precisely one batch-starting item
199 ///
200 /// The returned value from `batching_split_before_with_header` is an iterator,
201 /// which yields the elements in the header - before the first batch-starting item.
202 ///
203 /// After processing the header, call
204 /// [`.subsequent()`](BatchesWithHeader::subsequent)
205 /// which will return a [`Batches`],
206 /// which is a meta-iterator-like-object which yields the subsequent batches.
207 ///
208 /// Each subsequent batch is then returned by calling
209 /// [`.next_batch()`](Batches::next_batch)
210 /// which yields a separate sub-iterator.
211 ///
212 /// A new batch is recognised for each input item for which `batch_starting` returns true.
213 ///
214 /// This method is named **with_header** because it separates out the header,
215 /// using a typestate pattern, which is convenient for processing the header
216 /// separately.
217 ///
218 /// (You will want to iterate the first batch by reference,
219 /// so that the iteration doesn't consume the [`BatchesWithHeader`],
220 /// which is what you will need to call `.subsequent()`.
221 /// The API insists that you process the batches sequentially:
222 /// you can only be processing one batch at a time.)
223 ///
224 /// # **UNSTABLE**
225 ///
226 /// This method is UNSTABLE and not part of the semver guarantees.
227 /// You'll only see it if you ran rustdoc with `--document-private-items`.
228 ///
229 /// # Example
230 ///
231 /// ```
232 /// use itertools::Itertools as _;
233 /// use tor_netdoc::batching_split_before::IteratorExt as _;
234 ///
235 /// let mut batches = (1..10).peekable().batching_split_before_with_header(|v| v % 3 == 0);
236 /// assert_eq!(batches.by_ref().collect_vec(), [ 1, 2 ]);
237 ///
238 /// let mut batches = batches.subsequent();
239 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 3, 4, 5 ]);
240 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 6, 7, 8 ]);
241 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 9 ]);
242 /// assert!(batches.next_batch().is_none());
243 /// ```
244 fn batching_split_before_with_header<F>(
245 self,
246 batch_starting: F,
247 ) -> BatchesWithHeader<Self::Item, Self, F>
248 where
249 F: FnMut(&Self::Item) -> bool,
250 {
251 let input = Input {
252 unfiltered: self,
253 batch_starting,
254 marker: PhantomData,
255 };
256 BatchesWithHeader { input }
257 }
258
259 /// Splits the input into batches, with new batches started according to a predicate
260 ///
261 /// The input is divided into batches, just before each batch-starting item.
262 /// The batch-starting item is included as the first item of every batch,
263 /// except the first batch if the input starts with a non-batch-starting-item.
264 ///
265 /// If the input iterator is empty, there are no batches.
266 ///
267 /// This method is named **loose** because it neither
268 /// insists that the iterator start with a batch-starting item,
269 /// nor returns batches which always start with a batch-starting item.
270 /// It is up to the caller to handle a possible first batch with no batch-starting item.
271 ///
272 /// Each batch is returned by calling
273 /// [`.next_batch()`](Batches::next_batch)
274 /// which yields a separate sub-iterator.
275 ///
276 /// A new batch is recognised for each input item for which `batch_start` returns true.
277 ///
278 /// (The API insists that you process the batches sequentially:
279 /// you can only be processing one batch at a time.)
280 ///
281 /// # **UNSTABLE**
282 ///
283 /// This method is UNSTABLE and not part of the semver guarantees.
284 /// You'll only see it if you ran rustdoc with `--document-private-items`.
285 ///
286 /// # Example
287 ///
288 /// ```
289 /// use itertools::Itertools as _;
290 /// use tor_netdoc::batching_split_before::IteratorExt as _;
291 ///
292 /// let mut batches = (1..10).peekable().batching_split_before_loose(|v| v % 3 == 0);
293 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 1, 2 ]);
294 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 3, 4, 5 ]);
295 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 6, 7, 8 ]);
296 /// assert_eq!(batches.next_batch().unwrap().collect_vec(), [ 9 ]);
297 /// assert!(batches.next_batch().is_none());
298 /// ```
299 fn batching_split_before_loose<F>(self, batch_starting: F) -> Batches<Self::Item, Self, F>
300 where
301 F: FnMut(&Self::Item) -> bool,
302 {
303 let input = Input {
304 unfiltered: self,
305 batch_starting,
306 marker: PhantomData,
307 };
308 Batches {
309 input,
310 no_drain: Some(NoDrainToken),
311 yield_one: None,
312 }
313 }
314}
315impl<I: Iterator> IteratorExt for I {}
316
317#[cfg(test)]
318mod tests {
319 // @@ begin test lint list maintained by maint/add_warning @@
320 #![allow(clippy::bool_assert_comparison)]
321 #![allow(clippy::clone_on_copy)]
322 #![allow(clippy::dbg_macro)]
323 #![allow(clippy::mixed_attributes_style)]
324 #![allow(clippy::print_stderr)]
325 #![allow(clippy::print_stdout)]
326 #![allow(clippy::single_char_pattern)]
327 #![allow(clippy::unwrap_used)]
328 #![allow(clippy::unchecked_duration_subtraction)]
329 #![allow(clippy::useless_vec)]
330 #![allow(clippy::needless_pass_by_value)]
331 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
332 use super::*;
333 use crate::util::*;
334 use itertools::chain;
335 use std::fmt::Debug;
336 use std::iter;
337
338 struct TrackingPeekable<I: Iterator>(Peekable<I>);
339 impl<I: Iterator> Iterator for TrackingPeekable<I>
340 where
341 I::Item: Debug,
342 {
343 type Item = I::Item;
344 fn next(&mut self) -> Option<I::Item> {
345 let v = self.0.next();
346 eprintln!(" iter yielded {v:?}");
347 v
348 }
349 }
350 impl<I: Iterator> PeekableIterator for TrackingPeekable<I>
351 where
352 I::Item: Debug,
353 {
354 fn peek(&mut self) -> Option<&I::Item> {
355 let v = self.0.peek();
356 eprintln!(" iter peeked {v:?}");
357 v
358 }
359 }
360
361 #[test]
362 fn test_batching_split_before() {
363 fn chk_exp(mut iter: impl Iterator<Item = u32>, exp: &[u32]) {
364 eprintln!(" exp {exp:?}");
365 for exp in exp.iter().cloned() {
366 assert_eq!(iter.next(), Some(exp));
367 }
368 assert_eq!(iter.next(), None);
369 assert_eq!(iter.next(), None);
370 assert_eq!(iter.next(), None);
371 }
372
373 let chk_breakdown = |input: &[u32], iexp: &[u32], sexp: &[&[u32]]| {
374 let chk_batches = |mut subseq: Batches<_, _, _>, sexp: &mut dyn Iterator<Item = _>| {
375 loop {
376 match (subseq.next_batch(), sexp.next()) {
377 (Some(batch), Some(sexp)) => chk_exp(batch, sexp),
378 (None, None) => break,
379 (b, e) => panic!("({:?}, {e:?}", b.map(|_| ())),
380 }
381 }
382 assert!(subseq.next_batch().is_none());
383 assert!(subseq.next_batch().is_none());
384 };
385
386 eprintln!("input {input:?}");
387 let input = || TrackingPeekable(input.iter().cloned().peekable());
388 let is_starting = |v: &u32| *v >= 10;
389
390 {
391 let mut header = input().batching_split_before_with_header(is_starting);
392
393 chk_exp(&mut header, iexp);
394 eprintln!(" subsequent...");
395 let subseq = header.subsequent();
396 let mut sexp = sexp.iter().cloned();
397 chk_batches(subseq, &mut sexp);
398 }
399
400 {
401 let batches = input().batching_split_before_loose(is_starting);
402
403 let mut sexp =
404 chain!(iter::once(iexp), sexp.iter().cloned(),).filter(|s| !s.is_empty());
405
406 chk_batches(batches, &mut sexp);
407 }
408 };
409
410 chk_breakdown(&[], &[], &[]);
411
412 chk_breakdown(&[10], &[], &[&[10]]);
413
414 chk_breakdown(
415 &[1, 2, 30, 4, 5, 60, 7, 8],
416 &[1, 2],
417 &[&[30, 4, 5], &[60, 7, 8]],
418 );
419 }
420}