diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2023-06-13 10:07:16 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2023-06-13 10:07:16 +0200 |
| commit | fc4547b412e28164af1bf8981234c6af959ccc0b (patch) | |
| tree | 061253e7a4f6abaca282223b36d10f0bed8cad23 /sax/src/utils.cc | |
WIP
Diffstat (limited to 'sax/src/utils.cc')
| -rw-r--r-- | sax/src/utils.cc | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/sax/src/utils.cc b/sax/src/utils.cc new file mode 100644 index 0000000..f0366d5 --- /dev/null +++ b/sax/src/utils.cc @@ -0,0 +1,70 @@ +#include "utils.hh" + +#include "decoder.hh" +#include "sax_decoder.hh" +#include "sax_decoder_factory.hh" + +namespace modxml { +namespace sax { + +namespace { + +std::string cleanup_encoding(std::string const& str) { + std::string ret; + ret.reserve(str.size()); + for (auto c : str) { + if (c >= 'A' && c <= 'Z') { + ret.push_back(c | 0x20); + } else if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + ret.push_back(c); + } else if (c == '.' || c == '_' || c == '-') { + ret.push_back('-'); + } + } + return ret; +} + +} // namespace + +// Names inspired by: +// https://www.iana.org/assignments/character-sets/character-sets.xhtml +std::unique_ptr<Decoder> pick_decoder_for_encoding( + std::string const& encoding, DecoderFactory* factory) { + auto clean_enc = cleanup_encoding(encoding); + if (clean_enc == "utf-8" || clean_enc == "utf8") { + return create_utf8_decoder(); + } + if (clean_enc == "utf-16" || clean_enc == "utf16") { + return create_utf16_decoder(); + } + if (clean_enc == "utf-16be" || clean_enc == "utf16be") { + return create_utf16be_decoder(); + } + if (clean_enc == "utf-16le" || clean_enc == "utf16le") { + return create_utf16le_decoder(); + } + if (clean_enc == "utf-32" || clean_enc == "utf32") { + return create_utf32_decoder(); + } + if (clean_enc == "utf-32be" || clean_enc == "utf32be") { + return create_utf32be_decoder(); + } + if (clean_enc == "utf-32le" || clean_enc == "utf32le") { + return create_utf32le_decoder(); + } + if (clean_enc == "ascii" || clean_enc == "us-ascii" || + clean_enc == "usascii" || clean_enc == "iso-ir-6" || + clean_enc == "ansi-x3-4-1968" || clean_enc == "ansi-x3-4-1986" || + clean_enc == "iso-646-irv1991" || clean_enc == "iso646-us" || + clean_enc == "us" || clean_enc == "ibm367" || clean_enc == "cp367") { + return create_ascii_decoder(); + } + if (factory) { + return factory->create(encoding); + } + return nullptr; +} + +} // namespace sax + +} // namespace modxml |
