aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDF_String.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-02-22 14:04:11 +0100
committerJay Berkenbilt <ejb@ql.org>2022-02-22 14:10:05 +0100
commitf7ac5915909c7197acf84265f8d8ad41b95a36a8 (patch)
treedcd2ef6a564ee70c22c64e305f0dab274a19258a /libqpdf/QPDF_String.cc
parent07a2bb332d5e352fb6a339f6f44268a41a14a0a2 (diff)
downloadqpdf-f7ac5915909c7197acf84265f8d8ad41b95a36a8.tar.zst
Recognize explicit UTF-8 strings (fixes #654)
Diffstat (limited to 'libqpdf/QPDF_String.cc')
-rw-r--r--libqpdf/QPDF_String.cc9
1 files changed, 9 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
index f0153b1c..931ccd61 100644
--- a/libqpdf/QPDF_String.cc
+++ b/libqpdf/QPDF_String.cc
@@ -183,6 +183,15 @@ QPDF_String::getUTF8Val() const
{
return QUtil::utf16_to_utf8(this->val);
}
+ else if ((val.length() >= 3) &&
+ (val[0] == '\xEF') &&
+ (val[1] == '\xBB') &&
+ (val[2] == '\xBF'))
+ {
+ // PDF 2.0 allows UTF-8 strings when explicitly prefixed with
+ // the above bytes, which is just UTF-8 encoding of U+FEFF.
+ return this->val.substr(3);
+ }
else
{
return QUtil::pdf_doc_to_utf8(this->val);