diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-12-23 12:34:46 +0100 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-12-23 12:34:46 +0100 |
| commit | b29c82da90bf3843e2a551c36cd156185794b505 (patch) | |
| tree | 1eecf8721bcb327c815bb3549247d22579909794 | |
Initial commit
| -rw-r--r-- | .dir-locals.el | 4 | ||||
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Cargo.lock | 214 | ||||
| -rw-r--r-- | Cargo.toml | 11 | ||||
| -rw-r--r-- | src/config.rs | 37 | ||||
| -rw-r--r-- | src/main.rs | 175 |
6 files changed, 442 insertions, 0 deletions
diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..853a60f --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,4 @@ +;;; Directory Local Variables -*- no-byte-compile: t; -*- +;;; For more information see (info "(emacs) Directory Variables") + +((rust-mode . ((flycheck-rust-binary-name . "shuf-grp")))) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..3fa7e00 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,214 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "libc" +version = "0.2.178" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "natord" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "shuf-grp" +version = "0.1.0" +dependencies = [ + "anyhow", + "natord", + "rand", + "regex", + "xdg", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "xdg" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fb433233f2df9344722454bc7e96465c9d03bff9d77c248f9e7523fe79585b5" + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3cb11b7 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "shuf-grp" +version = "0.1.0" +edition = "2024" + +[dependencies] +xdg = "3.0.0" +regex = "1.12.2" +anyhow = "1.0.100" +rand = "0.9.2" +natord = "1.0.9" diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..6564c84 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,37 @@ +use std::collections::HashMap; +use std::fs::File; +use std::io::{self, BufRead}; +use std::path::Path; +use std::path::PathBuf; + +pub fn load_groups(path: &PathBuf) -> io::Result<HashMap<String, Vec<String>>> { + let lines = read_lines(path)?; + let mut groups = HashMap::new(); + let mut group_name = String::new(); + for mut line in lines.map_while(Result::ok) { + line.truncate(line.trim_end().len()); + if line.starts_with("#") || line.is_empty() { + continue; + } + if line.starts_with("[") && line.ends_with("]") { + group_name = line[1..line.len() - 2].to_string(); + if !groups.contains_key(&group_name) { + groups.insert(group_name.clone(), vec![]); + } + continue; + } + if group_name.is_empty() { + groups.insert(group_name.clone(), vec![]); + } + groups.get_mut(&group_name).unwrap().push(line); + } + Ok(groups) +} + +fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>> +where + P: AsRef<Path>, +{ + let file = File::open(filename)?; + Ok(io::BufReader::new(file).lines()) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..87fd1a4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,175 @@ +use anyhow::anyhow; +use rand::seq::IndexedRandom; +use regex::{Regex, RegexBuilder, RegexSet}; +use std::collections::HashMap; +use std::io; +use std::ops::Range; +use std::path::PathBuf; + +mod config; + +struct Config { + patterns: Vec<Regex>, + set: RegexSet, +} + +impl Config { + fn new(patterns: Vec<Regex>) -> Self { + let set = RegexSet::new(patterns.iter().map(|x| x.to_string())).unwrap(); + Self { patterns, set } + } +} + +struct Match { + line: String, + group: Range<usize>, +} + +impl Match { + fn new(line: String, group: Range<usize>) -> Self { + Self { line, group } + } + + fn prefix(&self) -> &str { + &self.line[0..self.group.start] + } + + fn infix(&self) -> &str { + &self.line[self.group.clone()] + } + + fn suffix(&self) -> &str { + &self.line[self.group.end..self.line.len()] + } +} + +struct PatternGroup { + matches: Vec<Match>, +} + +impl PatternGroup { + fn new(matches: Vec<Match>) -> Self { + Self { matches } + } + + fn calculate_groups(&self) -> Vec<usize> { + let mut groups = Vec::with_capacity(self.matches.len()); + groups.push(0); + for i in 1..self.matches.len() { + if self.matches[i - 1].prefix() != self.matches[i].prefix() + || self.matches[i - 1].suffix() != self.matches[i].suffix() + { + groups.push(i); + } + } + groups + } +} + +fn load_config(path: &PathBuf) -> anyhow::Result<Config> { + if !path.try_exists()? { + return Ok(default_config()); + } + let groups = config::load_groups(path)?; + if let Some(patterns_str) = groups.get("groups") { + let mut patterns_regex = Vec::with_capacity(patterns_str.len()); + for pattern in patterns_str { + let regex = RegexBuilder::new(pattern).size_limit(42_000).build()?; + if regex.captures_len() != 1 { + return Err(anyhow!( + "Invalid pattern {}, capture groups are not one", + pattern + )); + } + patterns_regex.push(regex); + } + return Ok(Config::new(patterns_regex)); + } + Ok(Config::new(vec![])) +} + +fn default_config() -> Config { + Config::new(vec![Regex::new(r"part([0-9]+)").unwrap()]) +} + +fn get_group(config: &Config, input: &str) -> (i64, Range<usize>) { + if let Some(index) = config.set.matches(input).iter().next() { + return ( + index as i64, + config.patterns[index] + .captures(input) + .unwrap() + .get(1) + .unwrap() + .range(), + ); + } + (-1, 0..input.len()) +} + +fn print_group(matches: &[Match]) { + for m in matches { + println!("{}", m.line); + } +} + +fn main() -> anyhow::Result<()> { + let xdg_dirs = xdg::BaseDirectories::new(); + let config; + if let Some(config_path) = xdg_dirs.get_config_file("shuf-grp.conf") { + config = load_config(&config_path) + .unwrap_or_else(|err| panic!("Error parsing {}: {}", config_path.display(), err)); + } else { + eprintln!("Using default config, create ~/.config/shuf-grp.conf"); + config = default_config(); + } + + let stdin = io::stdin(); + let mut groups = HashMap::new(); + loop { + let mut buffer = String::new(); + let bytes = stdin.read_line(&mut buffer)?; + if bytes == 0 { + break; + } + buffer.truncate(buffer.trim_end().len()); + let (group, part) = get_group(&config, &buffer); + let m = Match::new(buffer, part); + if let std::collections::hash_map::Entry::Vacant(e) = groups.entry(group) { + e.insert(PatternGroup::new(vec![m])); + } else { + let pattern_group: &mut PatternGroup = groups.get_mut(&group).unwrap(); + match pattern_group.matches.binary_search_by(|x| { + x.prefix().cmp(m.prefix()).then( + x.suffix() + .cmp(m.suffix()) + .then(natord::compare(x.infix(), m.infix())), + ) + }) { + Ok(pos) => pattern_group.matches.insert(pos, m), + Err(pos) => pattern_group.matches.insert(pos, m), + } + } + } + + let mut indexes = Vec::<(i64, Range<usize>)>::with_capacity(groups.len()); + + for pattern_group in groups.iter_mut() { + let groups = pattern_group.1.calculate_groups(); + for i in 1..groups.len() { + indexes.push((*pattern_group.0, groups[i - 1]..groups[i])); + } + indexes.push(( + *pattern_group.0, + groups[groups.len() - 1]..pattern_group.1.matches.len(), + )); + } + + let mut rng = rand::rng(); + + for (group, range) in indexes.choose_multiple(&mut rng, indexes.len()) { + print_group(&groups.get(group).unwrap().matches[range.clone()]); + } + + Ok(()) +} |
