summaryrefslogtreecommitdiff
path: root/server/common/src/grit.rs
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-06-07 00:38:22 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-06-07 00:39:28 +0200
commita60c0f58470f78545544d01d525eae481511abec (patch)
treef06d8ace50fcf7f773db6e03d142081c3a9ac62b /server/common/src/grit.rs
parentad893a4fb44244132d710d7f94fc99a7d83f1b87 (diff)
grit: Add basic parsing of xlf
This doesn't read the full xliff format, it only reads the translation target string and the id for the translation unit.
Diffstat (limited to 'server/common/src/grit.rs')
-rw-r--r--server/common/src/grit.rs377
1 files changed, 377 insertions, 0 deletions
diff --git a/server/common/src/grit.rs b/server/common/src/grit.rs
index a510724..c0351c4 100644
--- a/server/common/src/grit.rs
+++ b/server/common/src/grit.rs
@@ -1,6 +1,7 @@
#![allow(dead_code)]
use anyhow::Error;
+use std::collections::VecDeque;
use std::fs;
use std::io::{BufReader, Read};
use std::path::Path;
@@ -123,6 +124,20 @@ pub struct GritPart {
pub messages: Vec<IfMessage>,
}
+#[derive(Debug, PartialEq)]
+pub struct TranslationFile {
+ pub target_language: String,
+
+ pub units: Vec<TranslationUnit>,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct TranslationUnit {
+ pub id: i64,
+
+ pub target: Vec<TextPlaceholder>,
+}
+
fn get_opt_attribute<'a>(attributes: &'a Vec<OwnedAttribute>, name: &str) -> Option<&'a str> {
for attribute in attributes {
if attribute.name.local_name == name {
@@ -1189,3 +1204,365 @@ pub fn get_message_id(message: &Message) -> i64 {
// Avoid returning negative ids
message_id & 0x7fffffffffffffff
}
+
+fn parse_translation_unit_target_element<R: Read>(
+ _attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<Vec<TextPlaceholder>> {
+ let mut content = Vec::<TextPlaceholder>::new();
+ let mut first = true;
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => match name.local_name.as_str() {
+ "ph" => {
+ first = false;
+ content.push(parse_translation_placeholder_element(&attributes, reader)?);
+ }
+ _ => {
+ return Err(Error::msg(format!(
+ "Unexpected {0} in file",
+ name.local_name
+ )));
+ }
+ },
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "target");
+ break;
+ }
+ XmlEvent::Characters(data) => content.push(TextPlaceholder::Text(if first {
+ first = false;
+ data.trim_start().to_string()
+ } else {
+ data
+ })),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ if !first {
+ match content.last_mut().unwrap() {
+ TextPlaceholder::Text(data) => {
+ data.truncate(data.trim_end().len());
+ if data.is_empty() {
+ content.pop();
+ }
+ }
+ TextPlaceholder::Placeholder {
+ name: _,
+ content: _,
+ example: _,
+ } => {}
+ }
+ }
+
+ Ok(content)
+}
+
+fn parse_translation_placeholder_element<R: Read>(
+ attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<TextPlaceholder> {
+ let id = get_attribute(attributes, "id")?;
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes: _,
+ namespace: _,
+ } => {
+ return Err(Error::msg(format!("Unexpected {0} in ph", name.local_name)));
+ }
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "ph");
+ break;
+ }
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ Ok(TextPlaceholder::Placeholder {
+ name: id.to_string(),
+ content: String::new(),
+ example: None,
+ })
+}
+
+fn parse_translation_unit_element<R: Read>(
+ attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<TranslationUnit> {
+ let id = get_attribute(attributes, "id")?.parse::<i64>()?;
+
+ let mut target: Option<Vec<TextPlaceholder>> = None;
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => match name.local_name.as_str() {
+ "target" => {
+ if target.is_some() {
+ return Err(Error::msg("Two target in trans-unit"));
+ }
+ target = Some(parse_translation_unit_target_element(&attributes, reader)?);
+ }
+ _ => {
+ reader.skip()?;
+ }
+ },
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "trans-unit");
+ break;
+ }
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ Ok(TranslationUnit {
+ id,
+ target: target.expect("No target in trans-unit"),
+ })
+}
+
+fn parse_translation_body_element<R: Read>(
+ _attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<Vec<TranslationUnit>> {
+ let mut units = Vec::<TranslationUnit>::new();
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => match name.local_name.as_str() {
+ "trans-unit" => {
+ units.push(parse_translation_unit_element(&attributes, reader)?);
+ }
+ _ => {
+ reader.skip()?;
+ }
+ },
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "body");
+ break;
+ }
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ Ok(units)
+}
+
+fn parse_translation_file_element<R: Read>(
+ attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<TranslationFile> {
+ let target_language = get_attribute(attributes, "target-language")?;
+
+ let mut units: Option<Vec<TranslationUnit>> = None;
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => match name.local_name.as_str() {
+ "body" => {
+ if units.is_some() {
+ return Err(Error::msg("More than one body in file"));
+ }
+ units = Some(parse_translation_body_element(&attributes, reader)?);
+ }
+ _ => {
+ reader.skip()?;
+ }
+ },
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "file");
+ break;
+ }
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ Ok(TranslationFile {
+ target_language: target_language.to_string(),
+ units: units.expect("body element in file"),
+ })
+}
+
+fn parse_xliff_element<R: Read>(
+ _attributes: &Vec<OwnedAttribute>,
+ reader: &mut EventReader<R>,
+) -> anyhow::Result<TranslationFile> {
+ let mut file = VecDeque::<TranslationFile>::new();
+
+ loop {
+ let event = reader.next()?;
+ match event {
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => match name.local_name.as_str() {
+ "file" => {
+ file.push_back(parse_translation_file_element(&attributes, reader)?);
+ }
+ _ => (),
+ },
+ XmlEvent::EndElement { name } => {
+ assert!(name.local_name == "xliff");
+ break;
+ }
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::EndDocument => panic!("Unexpected EOD"),
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+
+ if file.is_empty() {
+ Err(Error::msg("No file in xliff"))
+ } else if file.len() == 1 {
+ Ok(file.pop_front().unwrap())
+ } else {
+ let mut ret = file.pop_front().unwrap();
+ while !file.is_empty() {
+ let other = file.pop_front().unwrap();
+ if other.target_language == ret.target_language {
+ let end = ret.units.len();
+ ret.units.splice(end..end, other.units);
+ } else {
+ return Err(Error::msg(
+ "Multiple translations in the same file, not supported yet",
+ ));
+ }
+ }
+
+ Ok(ret)
+ }
+}
+
+pub async fn parse_xlf(path: impl AsRef<Path>) -> anyhow::Result<TranslationFile> {
+ let path = path.as_ref().to_path_buf();
+ spawn_blocking(move || {
+ let file = fs::File::open(path)?;
+ let reader = BufReader::new(file);
+ let mut ereader = ParserConfig::new()
+ .ignore_comments(true)
+ .whitespace_to_characters(true)
+ .cdata_to_characters(true)
+ .create_reader(reader);
+ let mut ret: Option<TranslationFile> = None;
+ loop {
+ let event = ereader.next()?;
+ match event {
+ XmlEvent::StartDocument {
+ version: _,
+ encoding: _,
+ standalone: _,
+ } => (),
+ XmlEvent::StartElement {
+ name,
+ attributes,
+ namespace: _,
+ } => {
+ if name.local_name == "xliff" {
+ ret = Some(parse_xliff_element(&attributes, &mut ereader)?);
+ } else {
+ return Err(Error::msg("Document root != xliff"));
+ }
+ }
+ XmlEvent::EndDocument => break,
+ XmlEvent::EndElement { name: _ } => panic!("Unexpected EoE"),
+ XmlEvent::Characters(_) => (),
+
+ XmlEvent::ProcessingInstruction { name: _, data: _ } => (),
+ XmlEvent::CData(_) => (),
+ XmlEvent::Comment(_) => (),
+ XmlEvent::Whitespace(_) => (),
+ }
+ }
+ Ok(ret.unwrap())
+ })
+ .await
+ .unwrap()
+}