diff options
Diffstat (limited to 'libqpdf/QPDF_String.cc')
-rw-r--r-- | libqpdf/QPDF_String.cc | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index 60a3e0df..eb31a808 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -64,6 +64,58 @@ QPDF_String::~QPDF_String() { } +QPDF_String* +QPDF_String::new_utf16(std::string const& utf8_val) +{ + std::string result = "\xfe\xff"; + size_t len = utf8_val.length(); + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = static_cast<unsigned char>(utf8_val.at(i)); + if (ch < 128) + { + result += QUtil::toUTF16(ch); + } + else + { + size_t bytes_needed = 0; + unsigned bit_check = 0x40; + unsigned char to_clear = 0x80; + while (ch & bit_check) + { + ++bytes_needed; + to_clear |= bit_check; + bit_check >>= 1; + } + + if (((bytes_needed > 5) || (bytes_needed < 1)) || + ((i + bytes_needed) >= len)) + { + result += "\xff\xfd"; + } + else + { + unsigned long codepoint = (ch & ~to_clear); + while (bytes_needed > 0) + { + --bytes_needed; + ch = utf8_val.at(++i); + if ((ch & 0xc0) != 0x80) + { + --i; + codepoint = 0xfffd; + break; + } + codepoint <<= 6; + codepoint += (ch & 0x3f); + } + result += QUtil::toUTF16(codepoint); + } + } + } + return new QPDF_String(result); +} + std::string QPDF_String::unparse() { |