From b29c82da90bf3843e2a551c36cd156185794b505 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Tue, 23 Dec 2025 12:34:46 +0100 Subject: Initial commit --- src/main.rs | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 src/main.rs (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..87fd1a4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,175 @@ +use anyhow::anyhow; +use rand::seq::IndexedRandom; +use regex::{Regex, RegexBuilder, RegexSet}; +use std::collections::HashMap; +use std::io; +use std::ops::Range; +use std::path::PathBuf; + +mod config; + +struct Config { + patterns: Vec, + set: RegexSet, +} + +impl Config { + fn new(patterns: Vec) -> Self { + let set = RegexSet::new(patterns.iter().map(|x| x.to_string())).unwrap(); + Self { patterns, set } + } +} + +struct Match { + line: String, + group: Range, +} + +impl Match { + fn new(line: String, group: Range) -> Self { + Self { line, group } + } + + fn prefix(&self) -> &str { + &self.line[0..self.group.start] + } + + fn infix(&self) -> &str { + &self.line[self.group.clone()] + } + + fn suffix(&self) -> &str { + &self.line[self.group.end..self.line.len()] + } +} + +struct PatternGroup { + matches: Vec, +} + +impl PatternGroup { + fn new(matches: Vec) -> Self { + Self { matches } + } + + fn calculate_groups(&self) -> Vec { + let mut groups = Vec::with_capacity(self.matches.len()); + groups.push(0); + for i in 1..self.matches.len() { + if self.matches[i - 1].prefix() != self.matches[i].prefix() + || self.matches[i - 1].suffix() != self.matches[i].suffix() + { + groups.push(i); + } + } + groups + } +} + +fn load_config(path: &PathBuf) -> anyhow::Result { + if !path.try_exists()? { + return Ok(default_config()); + } + let groups = config::load_groups(path)?; + if let Some(patterns_str) = groups.get("groups") { + let mut patterns_regex = Vec::with_capacity(patterns_str.len()); + for pattern in patterns_str { + let regex = RegexBuilder::new(pattern).size_limit(42_000).build()?; + if regex.captures_len() != 1 { + return Err(anyhow!( + "Invalid pattern {}, capture groups are not one", + pattern + )); + } + patterns_regex.push(regex); + } + return Ok(Config::new(patterns_regex)); + } + Ok(Config::new(vec![])) +} + +fn default_config() -> Config { + Config::new(vec![Regex::new(r"part([0-9]+)").unwrap()]) +} + +fn get_group(config: &Config, input: &str) -> (i64, Range) { + if let Some(index) = config.set.matches(input).iter().next() { + return ( + index as i64, + config.patterns[index] + .captures(input) + .unwrap() + .get(1) + .unwrap() + .range(), + ); + } + (-1, 0..input.len()) +} + +fn print_group(matches: &[Match]) { + for m in matches { + println!("{}", m.line); + } +} + +fn main() -> anyhow::Result<()> { + let xdg_dirs = xdg::BaseDirectories::new(); + let config; + if let Some(config_path) = xdg_dirs.get_config_file("shuf-grp.conf") { + config = load_config(&config_path) + .unwrap_or_else(|err| panic!("Error parsing {}: {}", config_path.display(), err)); + } else { + eprintln!("Using default config, create ~/.config/shuf-grp.conf"); + config = default_config(); + } + + let stdin = io::stdin(); + let mut groups = HashMap::new(); + loop { + let mut buffer = String::new(); + let bytes = stdin.read_line(&mut buffer)?; + if bytes == 0 { + break; + } + buffer.truncate(buffer.trim_end().len()); + let (group, part) = get_group(&config, &buffer); + let m = Match::new(buffer, part); + if let std::collections::hash_map::Entry::Vacant(e) = groups.entry(group) { + e.insert(PatternGroup::new(vec![m])); + } else { + let pattern_group: &mut PatternGroup = groups.get_mut(&group).unwrap(); + match pattern_group.matches.binary_search_by(|x| { + x.prefix().cmp(m.prefix()).then( + x.suffix() + .cmp(m.suffix()) + .then(natord::compare(x.infix(), m.infix())), + ) + }) { + Ok(pos) => pattern_group.matches.insert(pos, m), + Err(pos) => pattern_group.matches.insert(pos, m), + } + } + } + + let mut indexes = Vec::<(i64, Range)>::with_capacity(groups.len()); + + for pattern_group in groups.iter_mut() { + let groups = pattern_group.1.calculate_groups(); + for i in 1..groups.len() { + indexes.push((*pattern_group.0, groups[i - 1]..groups[i])); + } + indexes.push(( + *pattern_group.0, + groups[groups.len() - 1]..pattern_group.1.matches.len(), + )); + } + + let mut rng = rand::rng(); + + for (group, range) in indexes.choose_multiple(&mut rng, indexes.len()) { + print_group(&groups.get(group).unwrap().matches[range.clone()]); + } + + Ok(()) +} -- cgit v1.2.3-70-g09d2