diff options
| -rw-r--r-- | server/Cargo.lock | 18 | ||||
| -rw-r--r-- | server/Cargo.toml | 2 | ||||
| -rw-r--r-- | server/api/src/api_model.rs | 38 | ||||
| -rw-r--r-- | server/src/main.rs | 1 | ||||
| -rw-r--r-- | server/src/tests.rs | 197 | ||||
| -rw-r--r-- | server/src/trans.rs | 193 |
6 files changed, 449 insertions, 0 deletions
diff --git a/server/Cargo.lock b/server/Cargo.lock index 9d1b25e..a68c895 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -485,6 +485,12 @@ dependencies = [ ] [[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] name = "digest" version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -583,6 +589,7 @@ dependencies = [ "futures", "ldap3", "log", + "pretty_assertions", "reqwest", "rmp-serde", "rocket", @@ -616,6 +623,7 @@ dependencies = [ "log", "md5", "pathdiff", + "pretty_assertions", "serde", "testdir", "tokio", @@ -1821,6 +1829,16 @@ dependencies = [ ] [[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + +[[package]] name = "proc-macro2" version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/server/Cargo.toml b/server/Cargo.toml index f62a2db..894009b 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -13,6 +13,7 @@ resolver = "2" anyhow = "1.0" futures = "0.3.31" log = { version = "0.4.25", features = ["release_max_level_warn"] } +pretty_assertions = "1.4.1" rmp-serde = "1.3" serde = { version = "1.0", features = ["derive"] } testdir = "0.9.3" @@ -26,6 +27,7 @@ eyeballs-common = { path = "common" } futures.workspace = true ldap3 = { version = "0.11.5", default-features = false, features = [ "native-tls", "tls", "tls-native", "tokio-native-tls" ] } log.workspace = true +pretty_assertions.workspace = true rmp-serde.workspace = true rocket = { version = "0.5.1", features = ["json", "secrets"] } rocket_db_pools = { version = "0.2.0", features = ["sqlx_mysql"] } diff --git a/server/api/src/api_model.rs b/server/api/src/api_model.rs index 7602480..7dd2a20 100644 --- a/server/api/src/api_model.rs +++ b/server/api/src/api_model.rs @@ -265,3 +265,41 @@ pub struct UserKeys { pub more: bool, pub keys: Vec<UserKey>, } + +#[derive(Debug, Deserialize, PartialEq, Serialize, ToSchema)] +pub struct LocalizationString { + #[schema(example = "IDS_GENERIC_WELCOME")] + pub id: String, + #[schema(example = "strings/strings.grd")] + pub file: String, + #[schema(example = "Generic greating")] + pub description: String, + #[schema(example = "This should be a positive greating")] + pub meaning: String, + #[schema(example = "Hello!")] + pub source: String, + pub placeholders: Vec<LocalizationPlaceholder>, + pub placeholder_offset: Vec<usize>, + #[schema(example = "123456")] + pub translation_id: i64, + pub translations: Vec<TranslationString>, +} + +#[derive(Debug, Deserialize, PartialEq, Serialize, ToSchema)] +pub struct LocalizationPlaceholder { + #[schema(example = "NAME")] + pub id: String, + #[schema(example = "%1$d")] + pub content: String, + #[schema(example = "42")] + pub example: String, +} + +#[derive(Debug, Deserialize, PartialEq, Serialize, ToSchema)] +pub struct TranslationString { + #[schema(example = "sv")] + pub language: String, + #[schema(example = "Hej!")] + pub translation: String, + pub placeholder_offset: Vec<usize>, +} diff --git a/server/src/main.rs b/server/src/main.rs index 973febe..66faec3 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -26,6 +26,7 @@ mod auth; mod authorized_keys; mod db_utils; mod git_root; +mod trans; use auth::AuthApiAddon; diff --git a/server/src/tests.rs b/server/src/tests.rs index cc71bf3..5f956fb 100644 --- a/server/src/tests.rs +++ b/server/src/tests.rs @@ -1,3 +1,4 @@ +use pretty_assertions::assert_eq; use rocket::figment::util::map; use rocket::figment::value::{Map, Value}; use rocket::http::{ContentType, Header, Status}; @@ -5,11 +6,13 @@ use rocket::local::asynchronous::{Client, LocalRequest}; use sqlx::mysql::{MySql, MySqlConnectOptions, MySqlPoolOptions}; use sqlx::{Acquire, Executor, Pool}; use std::fmt::Display; +use std::path::PathBuf; use std::sync::OnceLock; use stdext::function_name; use testdir::testdir; use crate::api_model; +use crate::trans; struct RealIP(&'static str); @@ -966,3 +969,197 @@ async fn test_user_keys_del() { assert_eq!(user_keys.more, false); assert_eq!(user_keys.keys.len(), 0); } + +#[tokio::test] +async fn test_collect_strings() { + let base = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("common/src/testdata/grit"); + let mut strings = trans::collect_strings(base, vec!["base.grd".to_string()]) + .await + .unwrap(); + + // Sort translations before comparison, they come unsorted from collect_strings. + for string in &mut strings { + string + .translations + .sort_by(|a, b| a.language.cmp(&b.language)); + } + + assert_eq!( + strings, + vec![ + api_model::LocalizationString { + id: "IDS_EXTRA".to_string(), + // TODO: Should be extra.grpd + file: "base.grd".to_string(), + description: "Extra title".to_string(), + meaning: "".to_string(), + source: "Extra title".to_string(), + placeholders: vec![], + placeholder_offset: vec![], + translation_id: 3567801202192813433, + translations: vec![], + }, + api_model::LocalizationString { + id: "IDS_BOOKMARKS_FRAGMENT_TITLE".to_string(), + file: "base.grd".to_string(), + description: "Title which is shown on the main bookmarks view.".to_string(), + meaning: "".to_string(), + source: "Bookmarks".to_string(), + placeholders: vec![], + placeholder_offset: vec![], + translation_id: 8820817407110198400, + translations: vec![ + api_model::TranslationString { + language: "en-gb".to_string(), + translation: "Bookmarks".to_string(), + placeholder_offset: vec![], + }, + api_model::TranslationString { + language: "my".to_string(), + translation: "ဝက်ဘ်လိပ်စာ မှတ်ထားမှုများ".to_string(), + placeholder_offset: vec![], + }, + api_model::TranslationString { + language: "my-zg".to_string(), + translation: "ဝက္ဘ္လိပ္စာ မွတ္ထားမွုမ်ား".to_string(), + placeholder_offset: vec![], + }, + api_model::TranslationString { + language: "sv".to_string(), + translation: "Bokmärken".to_string(), + placeholder_offset: vec![], + }, + ], + }, + api_model::LocalizationString { + id: "IDS_GENERIC_WELCOME".to_string(), + file: "base.grd".to_string(), + description: "Generic welcome string.".to_string(), + meaning: "".to_string(), + source: "Welcome to ".to_string(), + placeholders: vec![ + api_model::LocalizationPlaceholder { + id: "STRING".to_string(), + content: "%1$s".to_string(), + example: "Opera".to_string(), + }, + ], + placeholder_offset: vec![11], + translation_id: 8443102241046796905, + translations: vec![ + api_model::TranslationString { + language: "en-gb".to_string(), + translation: "Welcome to ".to_string(), + placeholder_offset: vec![11], + }, + api_model::TranslationString { + language: "my".to_string(), + translation: " မှ ကြိုဆိုပါသည်".to_string(), + placeholder_offset: vec![0], + }, + api_model::TranslationString { + language: "my-zg".to_string(), + translation: " မွ ႀကိဳဆိုပါသည္".to_string(), + placeholder_offset: vec![0], + }, + api_model::TranslationString { + language: "sv".to_string(), + translation: "Välkommen till ".to_string(), + placeholder_offset: vec![16], + }, + ], + }, + api_model::LocalizationString { + id: "IDS_START_TERMS".to_string(), + file: "base.grd".to_string(), + description: "First startup information about the license and privacy terms.".to_string(), + meaning: "".to_string(), + source: "By using this application you are agreeing to Opera's Terms of Service. Also, you can learn how Opera handles and protects your data in our Privacy Statement.".to_string(), + placeholders: vec![ + api_model::LocalizationPlaceholder { + id: "TOS_BEGIN".to_string(), + content: "<tos>".to_string(), + example: "".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "TOS_END".to_string(), + content: "</tos>".to_string(), + example: "".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "PRIVACY_BEGIN".to_string(), + content: "<privacy>".to_string(), + example: "".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "PRIVACY_END".to_string(), + content: "</privacy>".to_string(), + example: "".to_string(), + }, + ], + placeholder_offset: vec![54, 70, 140, 157], + translation_id: 2466140279568640908, + translations: vec![ + api_model::TranslationString { + language: "en-gb".to_string(), + translation: "By using this application you are agreeing to Opera's Terms of Service. Also, you can learn how Opera handles and protects your data in our Privacy Statement.".to_string(), + placeholder_offset: vec![54, 70, 140, 157], + }, + api_model::TranslationString { + language: "my".to_string(), + translation: "ဤအပလီကေးရှင်းကို အသုံးပြုခြင်းဖြင့် သင်သည် Opera ၏ ဝန်ဆောင်မှုစည်းမျဉ်းများ ကို သဘောတူရာ ရောက်ပါသည်။ ထို့အပြင် ကျွန်ုပ်တို့၏ကိုယ်ရေးလုံခြုံမှု ထုတ်ပြန်ချက် ထဲတွင် သင့်ဒေတာများကို Opera ၏ ကိုင်တွယ်ပုံနှင့် ကာကွယ်ပုံတို့ကိုလည်း လေ့လာနိုင်သည်။".to_string(), + placeholder_offset: vec![133, 205, 342, 433], + }, + api_model::TranslationString { + language: "my-zg".to_string(), + translation: "ဤအပလီေကးရွင္းကို အသုံးျပဳျခင္းျဖင့္ သင္သည္ Opera ၏ ဝန္ေဆာင္မွုစည္းမ်ဥ္းမ်ား ကို သေဘာတူရာ ေရာက္ပါသည္။ ထို႔အျပင္ ကၽြန္ုပ္တို႔၏ကိုယ္ေရးလုံျခဳံမွု ထုတ္ျပန္ခ်က္ ထဲတြင္ သင့္ေဒတာမ်ားကို Opera ၏ ကိုင္တြယ္ပုံႏွင့္ ကာကြယ္ပုံတို႔ကိုလည္း ေလ့လာနိုင္သည္။".to_string(), + placeholder_offset: vec![133, 205, 342, 433], + }, + api_model::TranslationString { + language: "sv".to_string(), + translation: "I och med din användning av det här programmet samtycker du till Operas Licensvillkor. Du kan också läsa om hur Opera hanterar och skyddar dina data i vårt Sekretessmeddelande.".to_string(), + placeholder_offset: vec![74, 87, 161, 180], + }, + ], + }, + api_model::LocalizationString { + id: "IDS_BOOKMARKS_FOLDERS_DELETED".to_string(), + file: "base.grd".to_string(), + description: "Message which is shown when one or more folders have been deleted from the bookmark list.".to_string(), + meaning: "".to_string(), + source: "{BOOKMARKS, plural,\n one { folder deleted}\n few { folders deleted}\n many { folders deleted}\n other { folders deleted}}".to_string(), + placeholders: vec![ + api_model::LocalizationPlaceholder { + id: "COUNT".to_string(), + content: "%1$d".to_string(), + example: "1".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "COUNT".to_string(), + content: "%1$d".to_string(), + example: "15".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "COUNT".to_string(), + content: "%1$d".to_string(), + example: "100".to_string(), + }, + api_model::LocalizationPlaceholder { + id: "COUNT".to_string(), + content: "%1$d".to_string(), + example: "42".to_string(), + }, + ], + placeholder_offset: vec![34, 65, 98, 132], + translation_id: 7770247413830876286, + translations: vec![ + api_model::TranslationString { + language: "en-gb".to_string(), + translation: "{BOOKMARKS, plural,\n one { folder deleted}\n few { folders deleted}\n many { folders deleted}\n other { folders deleted}}".to_string(), + placeholder_offset: vec![35, 67, 101, 136], + }, + ], + }, + ], + ) +} diff --git a/server/src/trans.rs b/server/src/trans.rs new file mode 100644 index 0000000..dcef078 --- /dev/null +++ b/server/src/trans.rs @@ -0,0 +1,193 @@ +use anyhow; +use std::collections::{HashMap, HashSet}; +use std::iter::{repeat, IntoIterator}; +use std::path::{Path, PathBuf}; +use tokio::task::JoinSet; + +use eyeballs_api::api_model; +use eyeballs_common::grit; + +fn schedule_translations( + tasks: &mut JoinSet<anyhow::Result<grit::TranslationFile>>, + known: &mut HashSet<String>, + path: &Path, + files: &Vec<grit::IfFile>, +) { + for file in files { + match file { + grit::IfFile::File(file) => { + if known.insert(file.path.to_string()) { + tasks.spawn(grit::parse_xlf(path.join(file.path.as_str()))); + } + } + grit::IfFile::If { expr: _, file } => { + schedule_translations(tasks, known, path, file); + } + } + } +} + +fn push_strings( + strings: &mut Vec<api_model::LocalizationString>, + file: &String, + messages: Vec<grit::IfMessagePart>, +) { + for message in messages { + match message { + grit::IfMessagePart::Message(message) => { + let mut source = String::new(); + let mut placeholders = Vec::<api_model::LocalizationPlaceholder>::new(); + let mut placeholder_offset = Vec::<usize>::new(); + + let translation_id = grit::get_message_id(&message); + + let mut offset: usize = 0; + for text in message.content { + match text { + grit::TextPlaceholder::Text(text) => { + source.push_str(text.as_str()); + offset += text.len(); + } + grit::TextPlaceholder::Placeholder { + name, + content, + example, + } => { + placeholders.push(api_model::LocalizationPlaceholder { + id: name, + content, + example: example.unwrap_or_default(), + }); + placeholder_offset.push(offset); + } + } + } + + strings.push(api_model::LocalizationString { + id: message.name, + file: file.to_string(), + description: message.desc, + meaning: message.meaning.unwrap_or_default(), + source, + placeholders, + placeholder_offset, + translation_id, + translations: Vec::<api_model::TranslationString>::new(), + }); + } + grit::IfMessagePart::If { expr: _, message } => { + push_strings(strings, file, message); + } + grit::IfMessagePart::Part(_) => { + // There should be none of these as we use parse_grit_with_parts + assert!(false); + } + } + } +} + +fn push_translation( + string: &mut api_model::LocalizationString, + language: &String, + unit: grit::TranslationUnit, +) { + let mut translation = String::new(); + let mut placeholder_offset = Vec::<usize>::with_capacity(string.placeholders.len()); + // Fill offset vec with zeros, it's not guaranteed that they will be in the same order + // below so easier to index directly. + placeholder_offset.extend(repeat(0).take(string.placeholders.len())); + + // There can be multiple placeholders with the same name, so when doing name lookup, + // skip the previous hits. + let mut placeholder_last = HashMap::<String, usize>::new(); + + let mut offset: usize = 0; + for text in unit.target { + match text { + grit::TextPlaceholder::Text(text) => { + translation.push_str(text.as_str()); + offset += text.len(); + } + grit::TextPlaceholder::Placeholder { + name, + content: _, + example: _, + } => { + let previous = placeholder_last.get(name.as_str()).map_or(0, |x| x + 1); + if let Some(index) = string + .placeholders + .iter() + .skip(previous) + .position(|x| x.id == name) + { + placeholder_last.insert(name, previous + index); + placeholder_offset[previous + index] = offset; + } + } + } + } + + string.translations.push(api_model::TranslationString { + language: language.to_string(), + translation, + placeholder_offset, + }) +} + +pub async fn collect_strings( + base: impl AsRef<Path>, + grits: impl IntoIterator<Item = String>, +) -> anyhow::Result<Vec<api_model::LocalizationString>> { + let mut grit_tasks = JoinSet::new(); + for grit_name in grits { + let grit_path = base.as_ref().join(grit_name.as_str()); + grit_tasks.spawn(async move { + let tmp = grit::parse_grit_with_parts(grit_path.as_path()).await; + (grit_path, grit_name, tmp) + }); + } + + let mut parsed_grits = + Vec::<(PathBuf, String, anyhow::Result<grit::Grit>)>::with_capacity(grit_tasks.len()); + while let Some(res) = grit_tasks.join_next().await { + parsed_grits.push(res?); + } + + let mut strings = Vec::<api_model::LocalizationString>::new(); + let mut translation_tasks = JoinSet::new(); + let mut known_translations = HashSet::<String>::new(); + + for (grit_path, grit_name, maybe_grit) in parsed_grits { + let grit = maybe_grit?; + schedule_translations( + &mut translation_tasks, + &mut known_translations, + grit_path.parent().unwrap(), + &grit.translations.file, + ); + + let first_index = strings.len(); + + push_strings(&mut strings, &grit_name, grit.release.messages.messages); + + let mut id_to_string = HashMap::<i64, usize>::with_capacity(strings.len() - first_index); + for i in first_index..strings.len() { + id_to_string.insert(strings[i].translation_id, i); + } + + while let Some(res) = translation_tasks.join_next().await { + let translation_file = res??; + for unit in translation_file.units { + if let Some(index) = id_to_string.get(&unit.id) { + push_translation( + &mut strings[*index], + &translation_file.target_language, + unit, + ); + } + } + } + } + + Ok(strings) +} |
