aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QUtil.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-02-15 21:56:06 +0100
committerJay Berkenbilt <ejb@ql.org>2022-02-15 22:13:12 +0100
commita478cbb6dc0e630b919813ad0e7ae1a72510c69d (patch)
treed7106d522f0bf2691c16e76eead59f5707ab67c7 /libqpdf/QUtil.cc
parentfbd3e56da787d18e7a8794580d0e95b7669d1bc4 (diff)
downloadqpdf-a478cbb6dc0e630b919813ad0e7ae1a72510c69d.tar.zst
Silently/transparently recognize UTF-16LE as UTF-16 (fixes #649)
The PDF spec only allows UTF-16BE, but most readers seem to accept UTF-16LE as well, so now qpdf does too.
Diffstat (limited to 'libqpdf/QUtil.cc')
-rw-r--r--libqpdf/QUtil.cc14
1 files changed, 11 insertions, 3 deletions
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index f01746b6..d0802334 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -2400,7 +2400,8 @@ bool
QUtil::is_utf16(std::string const& val)
{
return ((val.length() >= 2) &&
- (val.at(0) == '\xfe') && (val.at(1) == '\xff'));
+ (((val.at(0) == '\xfe') && (val.at(1) == '\xff')) ||
+ ((val.at(0) == '\xff') && (val.at(1) == '\xfe'))));
}
std::string
@@ -2414,8 +2415,13 @@ QUtil::utf16_to_utf8(std::string const& val)
unsigned long codepoint = 0L;
size_t len = val.length();
size_t start = 0;
+ bool is_le = false;
if (is_utf16(val))
{
+ if (static_cast<unsigned char>(val.at(0)) == 0xff)
+ {
+ is_le = true;
+ }
start += 2;
}
// If the string has an odd number of bytes, the last byte is
@@ -2428,10 +2434,12 @@ QUtil::utf16_to_utf8(std::string const& val)
// codepoint not followed by a low codepoint will be
// discarded, and a low codepoint not preceded by a high
// codepoint will just get its low 10 bits output.
+ auto msb = is_le ? i+1 : i;
+ auto lsb = is_le ? i : i+1;
unsigned short bits =
QIntC::to_ushort(
- (static_cast<unsigned char>(val.at(i)) << 8) +
- static_cast<unsigned char>(val.at(i+1)));
+ (static_cast<unsigned char>(val.at(msb)) << 8) +
+ static_cast<unsigned char>(val.at(lsb)));
if ((bits & 0xFC00) == 0xD800)
{
codepoint = 0x10000U + ((bits & 0x3FFU) << 10U);