From 952a665a4ed51400b5925e7cd69f08f0aeb374fe Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 21 Jun 2018 14:03:45 -0400 Subject: Better support for creating Unicode strings --- libqpdf/QPDF_String.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'libqpdf/QPDF_String.cc') diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index 60a3e0df..eb31a808 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -64,6 +64,58 @@ QPDF_String::~QPDF_String() { } +QPDF_String* +QPDF_String::new_utf16(std::string const& utf8_val) +{ + std::string result = "\xfe\xff"; + size_t len = utf8_val.length(); + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = static_cast(utf8_val.at(i)); + if (ch < 128) + { + result += QUtil::toUTF16(ch); + } + else + { + size_t bytes_needed = 0; + unsigned bit_check = 0x40; + unsigned char to_clear = 0x80; + while (ch & bit_check) + { + ++bytes_needed; + to_clear |= bit_check; + bit_check >>= 1; + } + + if (((bytes_needed > 5) || (bytes_needed < 1)) || + ((i + bytes_needed) >= len)) + { + result += "\xff\xfd"; + } + else + { + unsigned long codepoint = (ch & ~to_clear); + while (bytes_needed > 0) + { + --bytes_needed; + ch = utf8_val.at(++i); + if ((ch & 0xc0) != 0x80) + { + --i; + codepoint = 0xfffd; + break; + } + codepoint <<= 6; + codepoint += (ch & 0x3f); + } + result += QUtil::toUTF16(codepoint); + } + } + } + return new QPDF_String(result); +} + std::string QPDF_String::unparse() { -- cgit v1.2.3-70-g09d2