aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-06-21 19:05:48 +0200
committerJay Berkenbilt <ejb@ql.org>2018-06-21 21:57:13 +0200
commite44c395c51518bafbf8f8466ea5a0f4b1f2b2efe (patch)
tree95da31b1da0055ee2b605272f7079c7ea230584c
parent44674a3e58882ea95d6ee54fa3e16c553c0afb3e (diff)
downloadqpdf-e44c395c51518bafbf8f8466ea5a0f4b1f2b2efe.tar.zst
QUtil::toUTF16
-rw-r--r--include/qpdf/QUtil.hh6
-rw-r--r--libqpdf/QUtil.cc35
-rw-r--r--libtests/qtest/qutil/qutil.out8
-rw-r--r--libtests/qutil.cc26
4 files changed, 75 insertions, 0 deletions
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index a6be68f8..a81b0a9e 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -140,6 +140,12 @@ namespace QUtil
QPDF_DLL
std::string toUTF8(unsigned long uval);
+ // Return a string containing the byte representation of the
+ // UTF-16 BE encoding for the unicode value passed in.
+ // Unrepresentable code points are converted to U+FFFD.
+ QPDF_DLL
+ std::string toUTF16(unsigned long uval);
+
// If secure random number generation is supported on your
// platform and qpdf was not compiled with insecure random number
// generation, this returns a cryptographically secure random
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 39118854..e2bc0bac 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval)
return result;
}
+std::string
+QUtil::toUTF16(unsigned long uval)
+{
+ std::string result;
+ if ((uval >= 0xd800) && (uval <= 0xdfff))
+ {
+ result = "\xff\xfd";
+ }
+ else if (uval <= 0xffff)
+ {
+ char out[2];
+ out[0] = (uval & 0xff00) >> 8;
+ out[1] = (uval & 0xff);
+ result = std::string(out, 2);
+ }
+ else if (uval <= 0x10ffff)
+ {
+ char out[4];
+ uval -= 0x10000;
+ unsigned short high = ((uval & 0xffc00) >> 10) + 0xd800;
+ unsigned short low = (uval & 0x3ff) + 0xdc00;
+ out[0] = (high & 0xff00) >> 8;
+ out[1] = (high & 0xff);
+ out[2] = (low & 0xff00) >> 8;
+ out[3] = (low & 0xff);
+ result = std::string(out, 4);
+ }
+ else
+ {
+ result = "\xff\xfd";
+ }
+
+ return result;
+}
+
// Random data support
long
diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out
index c0ca1ce0..8223bf5b 100644
--- a/libtests/qtest/qutil/qutil.out
+++ b/libtests/qtest/qutil/qutil.out
@@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0
0x16059 -> f0 96 81 99
0x7fffffff -> fd bf bf bf bf bf
0x80000000: bounds error in QUtil::toUTF8
+---- utf16
+0x41 -> 00 41
+0xf7 -> 00 f7
+0x3c0 -> 03 c0
+0x16059 -> d8 18 dc 59
+0xdead -> ff fd
+0x7fffffff -> ff fd
+0x80000000 -> ff fd
---- whoami
quack1
quack2
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index ddb5815d..a7479fb5 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -193,6 +193,30 @@ void to_utf8_test()
}
}
+static void print_utf16(unsigned long val)
+{
+ std::string result = QUtil::toUTF16(val);
+ std::cout << "0x" << QUtil::int_to_string_base(val, 16) << " ->";
+ for (std::string::iterator iter = result.begin();
+ iter != result.end(); ++iter)
+ {
+ std::cout << " " << QUtil::int_to_string_base(
+ static_cast<int>(static_cast<unsigned char>(*iter)), 16, 2);
+ }
+ std::cout << std::endl;
+}
+
+void to_utf16_test()
+{
+ print_utf16(0x41UL);
+ print_utf16(0xF7UL);
+ print_utf16(0x3c0UL);
+ print_utf16(0x16059UL);
+ print_utf16(0xdeadUL);
+ print_utf16(0x7fffffffUL);
+ print_utf16(0x80000000UL);
+}
+
void print_whoami(char const* str)
{
PointerHolder<char> dup(true, QUtil::copy_string(str));
@@ -299,6 +323,8 @@ int main(int argc, char* argv[])
getenv_test();
std::cout << "---- utf8" << std::endl;
to_utf8_test();
+ std::cout << "---- utf16" << std::endl;
+ to_utf16_test();
std::cout << "---- whoami" << std::endl;
get_whoami_test();
std::cout << "---- file" << std::endl;