use anyhow::anyhow; use rand::seq::IndexedRandom; use regex::{Regex, RegexBuilder, RegexSet}; use std::collections::HashMap; use std::io; use std::ops::Range; use std::path::PathBuf; mod config; struct Config { patterns: Vec, set: RegexSet, } impl Config { fn new(patterns: Vec) -> Self { let set = RegexSet::new(patterns.iter().map(|x| x.to_string())).unwrap(); Self { patterns, set } } } struct Match { line: String, group: Range, } impl Match { fn new(line: String, group: Range) -> Self { Self { line, group } } fn prefix(&self) -> &str { &self.line[0..self.group.start] } fn infix(&self) -> &str { &self.line[self.group.clone()] } fn suffix(&self) -> &str { &self.line[self.group.end..self.line.len()] } } struct PatternGroup { matches: Vec, } impl PatternGroup { fn new(matches: Vec) -> Self { Self { matches } } fn calculate_groups(&self) -> Vec { let mut groups = Vec::with_capacity(self.matches.len()); groups.push(0); for i in 1..self.matches.len() { if self.matches[i - 1].prefix() != self.matches[i].prefix() || self.matches[i - 1].suffix() != self.matches[i].suffix() { groups.push(i); } } groups } } fn load_config(path: &PathBuf) -> anyhow::Result { if !path.try_exists()? { return Ok(default_config()); } let groups = config::load_groups(path)?; if let Some(patterns_str) = groups.get("groups") { let mut patterns_regex = Vec::with_capacity(patterns_str.len()); for pattern in patterns_str { let regex = RegexBuilder::new(pattern).size_limit(42_000).build()?; if regex.captures_len() != 2 { return Err(anyhow!( "Invalid pattern {}, capture groups are not one", pattern )); } patterns_regex.push(regex); } return Ok(Config::new(patterns_regex)); } Ok(Config::new(vec![])) } fn default_config() -> Config { Config::new(vec![Regex::new(r"part([0-9]+)").unwrap()]) } fn get_group(config: &Config, input: &str) -> (i64, Range) { if let Some(index) = config.set.matches(input).iter().next() { return ( index as i64, config.patterns[index] .captures(input) .unwrap() .get(1) .unwrap() .range(), ); } (-1, 0..0) } fn print_group(matches: &[Match]) { for m in matches { println!("{}", m.line); } } fn main() -> anyhow::Result<()> { let xdg_dirs = xdg::BaseDirectories::new(); let config; if let Some(config_path) = xdg_dirs.get_config_file("shuf-grp.conf") { config = load_config(&config_path) .unwrap_or_else(|err| panic!("Error parsing {}: {}", config_path.display(), err)); } else { eprintln!("Using default config, create ~/.config/shuf-grp.conf"); config = default_config(); } let stdin = io::stdin(); let mut groups = HashMap::new(); loop { let mut buffer = String::new(); let bytes = stdin.read_line(&mut buffer)?; if bytes == 0 { break; } buffer.truncate(buffer.trim_end().len()); let (group, part) = get_group(&config, &buffer); let m = Match::new(buffer, part); if let std::collections::hash_map::Entry::Vacant(e) = groups.entry(group) { e.insert(PatternGroup::new(vec![m])); } else { let pattern_group: &mut PatternGroup = groups.get_mut(&group).unwrap(); match pattern_group.matches.binary_search_by(|x| { x.prefix().cmp(m.prefix()).then( x.suffix() .cmp(m.suffix()) .then(natord::compare(x.infix(), m.infix())), ) }) { Ok(pos) => pattern_group.matches.insert(pos, m), Err(pos) => pattern_group.matches.insert(pos, m), } } } let mut indexes = Vec::<(i64, Range)>::with_capacity(groups.len()); for pattern_group in groups.iter_mut() { let groups = pattern_group.1.calculate_groups(); for i in 1..groups.len() { indexes.push((*pattern_group.0, groups[i - 1]..groups[i])); } indexes.push(( *pattern_group.0, groups[groups.len() - 1]..pattern_group.1.matches.len(), )); } let mut rng = rand::rng(); for (group, range) in indexes.choose_multiple(&mut rng, indexes.len()) { print_group(&groups.get(group).unwrap().matches[range.clone()]); } Ok(()) }