aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QUtil.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-01-05 19:00:18 +0100
committerJay Berkenbilt <ejb@ql.org>2019-01-06 04:59:27 +0100
commit089ce5902ec155ff3dce7bed7c12006a587d3db0 (patch)
tree8fdda222bc40b051ab811f621e8ce5b7661c6df5 /libqpdf/QUtil.cc
parentae18bfd142f2f9a19505971d4d197cb37ee86eee (diff)
downloadqpdf-089ce5902ec155ff3dce7bed7c12006a587d3db0.tar.zst
Move utf8_to_utf16 into QUtil
Diffstat (limited to 'libqpdf/QUtil.cc')
-rw-r--r--libqpdf/QUtil.cc61
1 files changed, 61 insertions, 0 deletions
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 7c2d9bc9..ba4aea2c 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -893,6 +893,67 @@ QUtil::parse_numrange(char const* range, int max)
return result;
}
+enum encoding_e { e_utf16 };
+
+static
+std::string
+transcode_utf8(std::string const& utf8_val, encoding_e encoding)
+{
+ std::string result = "\xfe\xff";
+ size_t len = utf8_val.length();
+ for (size_t i = 0; i < len; ++i)
+ {
+ unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
+ if (ch < 128)
+ {
+ result += QUtil::toUTF16(ch);
+ }
+ else
+ {
+ size_t bytes_needed = 0;
+ unsigned bit_check = 0x40;
+ unsigned char to_clear = 0x80;
+ while (ch & bit_check)
+ {
+ ++bytes_needed;
+ to_clear |= bit_check;
+ bit_check >>= 1;
+ }
+
+ if (((bytes_needed > 5) || (bytes_needed < 1)) ||
+ ((i + bytes_needed) >= len))
+ {
+ result += "\xff\xfd";
+ }
+ else
+ {
+ unsigned long codepoint = (ch & ~to_clear);
+ while (bytes_needed > 0)
+ {
+ --bytes_needed;
+ ch = utf8_val.at(++i);
+ if ((ch & 0xc0) != 0x80)
+ {
+ --i;
+ codepoint = 0xfffd;
+ break;
+ }
+ codepoint <<= 6;
+ codepoint += (ch & 0x3f);
+ }
+ result += QUtil::toUTF16(codepoint);
+ }
+ }
+ }
+ return result;
+}
+
+std::string
+QUtil::utf8_to_utf16(std::string const& utf8)
+{
+ return transcode_utf8(utf8, e_utf16);
+}
+
std::string
QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
{