From 4bb3046f0b139337a00e9182c9b47d1a3f8f8bb3 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 17 Feb 2018 18:47:57 -0500 Subject: Properly handle strings with PDF Doc Encoding (fixes #179) The QPDF_String::getUTF8Val() method was not treating strings that weren't explicitly Unicode as PDF Doc Encoded. This only affects characters in the range 0x80 through 0xa0. --- include/qpdf/QPDFObjectHandle.hh | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 53b219ce..a2f54a73 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -442,6 +442,13 @@ class QPDFObjectHandle // Methods for string objects QPDF_DLL std::string getStringValue(); + // If a string starts with the UTF-16 marker, it is converted from + // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded + // with PDF Doc Encoding. PDF Doc Encoding is identical to + // ISO-8859-1 except in the range from 0200 through 0240, where + // there is a mapping of characters to Unicode. QPDF versions + // prior to version erroneously left characters in that range + // unmapped. QPDF_DLL std::string getUTF8Value(); -- cgit v1.2.3-54-g00ecf