From bd916b4746822dd839bcf1f57b2fbed068f84534 Mon Sep 17 00:00:00 2001 From: xtqqczze <45661989+xtqqczze@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:43:51 +0000 Subject: [PATCH] shuf: use memchr in split_seps --- Cargo.lock | 1 + src/uu/shuf/Cargo.toml | 5 +++-- src/uu/shuf/src/shuf.rs | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5bd7b89b36..c55cc64036f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4059,6 +4059,7 @@ dependencies = [ "codspeed-divan-compat", "fluent", "itoa", + "memchr", "rand 0.10.0", "rand_chacha 0.10.0", "rustc-hash", diff --git a/src/uu/shuf/Cargo.toml b/src/uu/shuf/Cargo.toml index d59f022e1af..7eaa497475f 100644 --- a/src/uu/shuf/Cargo.toml +++ b/src/uu/shuf/Cargo.toml @@ -20,13 +20,14 @@ path = "src/shuf.rs" [dependencies] clap = { workspace = true } +fluent = { workspace = true } itoa = { workspace = true } +memchr = { workspace = true } rand = { workspace = true } rand_chacha = { workspace = true } +rustc-hash = { workspace = true } sha3 = { workspace = true } uucore = { workspace = true } -fluent = { workspace = true } -rustc-hash = { workspace = true } [[bin]] name = "shuf" diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index de725cc4e20..2b6ed70fbcc 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -13,6 +13,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use clap::{Arg, ArgAction, Command, builder::ValueParser}; +use memchr::memchr_iter; use rand::{ RngExt as _, rngs::ThreadRng, @@ -267,14 +268,13 @@ fn read_input_file(filename: &Path) -> UResult> { } } -fn split_seps(data: &[u8], sep: u8) -> Vec<&[u8]> { +pub fn split_seps(data: &[u8], sep: u8) -> Vec<&[u8]> { // A single trailing separator is ignored. // If data is empty (and does not even contain a single 'sep' // to indicate the presence of an empty element), then behave // as if the input contained no elements at all. - const PREDICTED_LINE_LENGTH: usize = 64; - let predicted_capacity = data.len() / PREDICTED_LINE_LENGTH; - let mut elements = Vec::with_capacity(predicted_capacity); + let sep_count = memchr_iter(sep, data).count(); + let mut elements = Vec::with_capacity(sep_count + 1); elements.extend(data.split(|&b| b == sep)); let _ = elements.pop_if(|e| e.is_empty()); elements