diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDF_String.cc | 57 | ||||
-rw-r--r-- | libqpdf/QUtil.cc | 61 |
2 files changed, 62 insertions, 56 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index 633f1699..7cfb6bcc 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -64,65 +64,10 @@ QPDF_String::~QPDF_String() { } -enum encoding_e { e_utf16 }; - -static -std::string -transcode_utf8(std::string const& utf8_val, encoding_e encoding) -{ - std::string result = "\xfe\xff"; - size_t len = utf8_val.length(); - for (size_t i = 0; i < len; ++i) - { - unsigned char ch = static_cast<unsigned char>(utf8_val.at(i)); - if (ch < 128) - { - result += QUtil::toUTF16(ch); - } - else - { - size_t bytes_needed = 0; - unsigned bit_check = 0x40; - unsigned char to_clear = 0x80; - while (ch & bit_check) - { - ++bytes_needed; - to_clear |= bit_check; - bit_check >>= 1; - } - - if (((bytes_needed > 5) || (bytes_needed < 1)) || - ((i + bytes_needed) >= len)) - { - result += "\xff\xfd"; - } - else - { - unsigned long codepoint = (ch & ~to_clear); - while (bytes_needed > 0) - { - --bytes_needed; - ch = utf8_val.at(++i); - if ((ch & 0xc0) != 0x80) - { - --i; - codepoint = 0xfffd; - break; - } - codepoint <<= 6; - codepoint += (ch & 0x3f); - } - result += QUtil::toUTF16(codepoint); - } - } - } - return result; -} - QPDF_String* QPDF_String::new_utf16(std::string const& utf8_val) { - return new QPDF_String(transcode_utf8(utf8_val, e_utf16)); + return new QPDF_String(QUtil::utf8_to_utf16(utf8_val)); } std::string diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 7c2d9bc9..ba4aea2c 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -893,6 +893,67 @@ QUtil::parse_numrange(char const* range, int max) return result; } +enum encoding_e { e_utf16 }; + +static +std::string +transcode_utf8(std::string const& utf8_val, encoding_e encoding) +{ + std::string result = "\xfe\xff"; + size_t len = utf8_val.length(); + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = static_cast<unsigned char>(utf8_val.at(i)); + if (ch < 128) + { + result += QUtil::toUTF16(ch); + } + else + { + size_t bytes_needed = 0; + unsigned bit_check = 0x40; + unsigned char to_clear = 0x80; + while (ch & bit_check) + { + ++bytes_needed; + to_clear |= bit_check; + bit_check >>= 1; + } + + if (((bytes_needed > 5) || (bytes_needed < 1)) || + ((i + bytes_needed) >= len)) + { + result += "\xff\xfd"; + } + else + { + unsigned long codepoint = (ch & ~to_clear); + while (bytes_needed > 0) + { + --bytes_needed; + ch = utf8_val.at(++i); + if ((ch & 0xc0) != 0x80) + { + --i; + codepoint = 0xfffd; + break; + } + codepoint <<= 6; + codepoint += (ch & 0x3f); + } + result += QUtil::toUTF16(codepoint); + } + } + } + return result; +} + +std::string +QUtil::utf8_to_utf16(std::string const& utf8) +{ + return transcode_utf8(utf8, e_utf16); +} + std::string QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) { |