From 952a665a4ed51400b5925e7cd69f08f0aeb374fe Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 21 Jun 2018 14:03:45 -0400 Subject: Better support for creating Unicode strings --- libqpdf/QPDFObjectHandle.cc | 20 +++++++++++++++++ libqpdf/QPDF_String.cc | 52 +++++++++++++++++++++++++++++++++++++++++++++ libqpdf/qpdf/QPDF_String.hh | 1 + 3 files changed, 73 insertions(+) (limited to 'libqpdf') diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 5c111cc8..da609cc2 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1221,6 +1221,20 @@ QPDFObjectHandle::unparseResolved() return this->m->obj->unparse(); } +std::string +QPDFObjectHandle::unparseBinary() +{ + if (this->isString()) + { + return dynamic_cast( + this->m->obj.getPointer())->unparse(true); + } + else + { + return unparse(); + } +} + QPDFObjectHandle QPDFObjectHandle::parse(std::string const& object_str, std::string const& object_description) @@ -1845,6 +1859,12 @@ QPDFObjectHandle::newString(std::string const& str) return QPDFObjectHandle(new QPDF_String(str)); } +QPDFObjectHandle +QPDFObjectHandle::newUnicodeString(std::string const& utf8_str) +{ + return QPDFObjectHandle(QPDF_String::new_utf16(utf8_str)); +} + QPDFObjectHandle QPDFObjectHandle::newOperator(std::string const& value) { diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index 60a3e0df..eb31a808 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -64,6 +64,58 @@ QPDF_String::~QPDF_String() { } +QPDF_String* +QPDF_String::new_utf16(std::string const& utf8_val) +{ + std::string result = "\xfe\xff"; + size_t len = utf8_val.length(); + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = static_cast(utf8_val.at(i)); + if (ch < 128) + { + result += QUtil::toUTF16(ch); + } + else + { + size_t bytes_needed = 0; + unsigned bit_check = 0x40; + unsigned char to_clear = 0x80; + while (ch & bit_check) + { + ++bytes_needed; + to_clear |= bit_check; + bit_check >>= 1; + } + + if (((bytes_needed > 5) || (bytes_needed < 1)) || + ((i + bytes_needed) >= len)) + { + result += "\xff\xfd"; + } + else + { + unsigned long codepoint = (ch & ~to_clear); + while (bytes_needed > 0) + { + --bytes_needed; + ch = utf8_val.at(++i); + if ((ch & 0xc0) != 0x80) + { + --i; + codepoint = 0xfffd; + break; + } + codepoint <<= 6; + codepoint += (ch & 0x3f); + } + result += QUtil::toUTF16(codepoint); + } + } + } + return new QPDF_String(result); +} + std::string QPDF_String::unparse() { diff --git a/libqpdf/qpdf/QPDF_String.hh b/libqpdf/qpdf/QPDF_String.hh index abf8291a..b4858c49 100644 --- a/libqpdf/qpdf/QPDF_String.hh +++ b/libqpdf/qpdf/QPDF_String.hh @@ -9,6 +9,7 @@ class QPDF_String: public QPDFObject { public: QPDF_String(std::string const& val); + static QPDF_String* new_utf16(std::string const& utf8_val); virtual ~QPDF_String(); virtual std::string unparse(); virtual QPDFObject::object_type_e getTypeCode() const; -- cgit v1.2.3-54-g00ecf