aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-18 00:47:57 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:06:27 +0100
commit4bb3046f0b139337a00e9182c9b47d1a3f8f8bb3 (patch)
tree455bf56b35aeda95a9e4581f7e0c31cf7d07c877 /include
parent2780a1871d2603e9b273580fb7978d277832c2fc (diff)
downloadqpdf-4bb3046f0b139337a00e9182c9b47d1a3f8f8bb3.tar.zst
Properly handle strings with PDF Doc Encoding (fixes #179)
The QPDF_String::getUTF8Val() method was not treating strings that weren't explicitly Unicode as PDF Doc Encoded. This only affects characters in the range 0x80 through 0xa0.
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/QPDFObjectHandle.hh7
1 files changed, 7 insertions, 0 deletions
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 53b219ce..a2f54a73 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -442,6 +442,13 @@ class QPDFObjectHandle
// Methods for string objects
QPDF_DLL
std::string getStringValue();
+ // If a string starts with the UTF-16 marker, it is converted from
+ // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded
+ // with PDF Doc Encoding. PDF Doc Encoding is identical to
+ // ISO-8859-1 except in the range from 0200 through 0240, where
+ // there is a mapping of characters to Unicode. QPDF versions
+ // prior to version erroneously left characters in that range
+ // unmapped.
QPDF_DLL
std::string getUTF8Value();