From 7dd49c6293172b494c78918507242cdb55d35137 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Sun, 21 Jan 2024 12:31:30 +0100 Subject: WIP --- sax/src/utils.cc | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'sax/src/utils.cc') diff --git a/sax/src/utils.cc b/sax/src/utils.cc index f0366d5..e3a53b1 100644 --- a/sax/src/utils.cc +++ b/sax/src/utils.cc @@ -9,7 +9,7 @@ namespace sax { namespace { -std::string cleanup_encoding(std::string const& str) { +std::string cleanup_encoding(std::string_view str) { std::string ret; ret.reserve(str.size()); for (auto c : str) { @@ -29,29 +29,29 @@ std::string cleanup_encoding(std::string const& str) { // Names inspired by: // https://www.iana.org/assignments/character-sets/character-sets.xhtml std::unique_ptr pick_decoder_for_encoding( - std::string const& encoding, DecoderFactory* factory) { + std::string_view encoding, DecoderFactory* factory) { auto clean_enc = cleanup_encoding(encoding); - if (clean_enc == "utf-8" || clean_enc == "utf8") { + if (clean_enc == "utf-8" || clean_enc == "utf8") return create_utf8_decoder(); - } - if (clean_enc == "utf-16" || clean_enc == "utf16") { + + if (clean_enc == "utf-16" || clean_enc == "utf16") return create_utf16_decoder(); - } - if (clean_enc == "utf-16be" || clean_enc == "utf16be") { + + if (clean_enc == "utf-16be" || clean_enc == "utf16be") return create_utf16be_decoder(); - } - if (clean_enc == "utf-16le" || clean_enc == "utf16le") { + + if (clean_enc == "utf-16le" || clean_enc == "utf16le") return create_utf16le_decoder(); - } - if (clean_enc == "utf-32" || clean_enc == "utf32") { + + if (clean_enc == "utf-32" || clean_enc == "utf32") return create_utf32_decoder(); - } - if (clean_enc == "utf-32be" || clean_enc == "utf32be") { + + if (clean_enc == "utf-32be" || clean_enc == "utf32be") return create_utf32be_decoder(); - } - if (clean_enc == "utf-32le" || clean_enc == "utf32le") { + + if (clean_enc == "utf-32le" || clean_enc == "utf32le") return create_utf32le_decoder(); - } + if (clean_enc == "ascii" || clean_enc == "us-ascii" || clean_enc == "usascii" || clean_enc == "iso-ir-6" || clean_enc == "ansi-x3-4-1968" || clean_enc == "ansi-x3-4-1986" || @@ -59,9 +59,10 @@ std::unique_ptr pick_decoder_for_encoding( clean_enc == "us" || clean_enc == "ibm367" || clean_enc == "cp367") { return create_ascii_decoder(); } - if (factory) { + + if (factory) return factory->create(encoding); - } + return nullptr; } -- cgit v1.2.3-70-g09d2