diff options
author | Jay Berkenbilt <ejb@ql.org> | 2022-02-22 14:04:11 +0100 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2022-02-22 14:10:05 +0100 |
commit | f7ac5915909c7197acf84265f8d8ad41b95a36a8 (patch) | |
tree | dcd2ef6a564ee70c22c64e305f0dab274a19258a /libqpdf/QPDF_String.cc | |
parent | 07a2bb332d5e352fb6a339f6f44268a41a14a0a2 (diff) | |
download | qpdf-f7ac5915909c7197acf84265f8d8ad41b95a36a8.tar.zst |
Recognize explicit UTF-8 strings (fixes #654)
Diffstat (limited to 'libqpdf/QPDF_String.cc')
-rw-r--r-- | libqpdf/QPDF_String.cc | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index f0153b1c..931ccd61 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -183,6 +183,15 @@ QPDF_String::getUTF8Val() const { return QUtil::utf16_to_utf8(this->val); } + else if ((val.length() >= 3) && + (val[0] == '\xEF') && + (val[1] == '\xBB') && + (val[2] == '\xBF')) + { + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with + // the above bytes, which is just UTF-8 encoding of U+FEFF. + return this->val.substr(3); + } else { return QUtil::pdf_doc_to_utf8(this->val); |