aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-02-16 01:22:35 +0100
committerJay Berkenbilt <ejb@ql.org>2022-02-16 01:22:35 +0100
commite810fe678a5615e3a4bfa16543bfdbdad78cd273 (patch)
treefe92db87bbbd806ade9b0818126787dbd9d3c2a2
parente35abe2fb7ee8daac5841178429fc116f0b60597 (diff)
downloadqpdf-e810fe678a5615e3a4bfa16543bfdbdad78cd273.tar.zst
Fix asymmetry between newUnicodeString and getUTF8Value
-rw-r--r--ChangeLog5
-rw-r--r--libqpdf/QPDF_String.cc3
-rw-r--r--qpdf/test_driver.cc26
3 files changed, 30 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 08cb1b16..02b80264 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2022-02-15 Jay Berkenbilt <ejb@ql.org>
+ * Fix asymmetrical logic between
+ QPDFObjectHandle::newUnicodeString() and
+ QPDFObjectHandle::getUTF8Val(). The asymmetrical logic didn't
+ matter before fixing the PDF Doc transcoding bugs.
+
* When analyzing PDF strings, recognize UTF-16LE as UTF-16. The
PDF spec only allows UTF-16BE, but most readers seem to allow
both. Fixes #649.
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
index 0fcec2d3..f0153b1c 100644
--- a/libqpdf/QPDF_String.cc
+++ b/libqpdf/QPDF_String.cc
@@ -32,8 +32,7 @@ QPDF_String*
QPDF_String::new_utf16(std::string const& utf8_val)
{
std::string result;
- if (! (QUtil::utf8_to_ascii(utf8_val, result, '?') ||
- QUtil::utf8_to_pdf_doc(utf8_val, result, '?')))
+ if (! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'))
{
result = QUtil::utf8_to_utf16(utf8_val);
}
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index dcae002f..e3fc7007 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -3325,12 +3325,34 @@ static void test_85(QPDF& pdf, char const* arg2)
assert(s == "/Test");
}
+static void test_86(QPDF& pdf, char const* arg2)
+{
+ // Test symmetry between newUnicodeString and getUTF8Value for
+ // strings that can't be encoded as PDFDoc but don't contain any
+ // high code points.
+
+ std::string utf8_val("\x1f");
+ std::string utf16_val("\xfe\xff\x00\x1f", 4);
+ std::string result;
+ assert(QUtil::utf8_to_ascii(utf8_val, result, '?'));
+ assert(result == "\x1f");
+ assert(! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'));
+ assert(result == "?");
+ assert(QUtil::utf8_to_utf16(utf8_val) == utf16_val);
+ assert(QUtil::utf16_to_utf8(utf16_val) == utf8_val);
+ auto h = QPDFObjectHandle::newUnicodeString("\x1f");
+ assert(h.getStringValue() == std::string("\xfe\xff\x00\x1f", 4));
+ assert(h.getUTF8Value() == "\x1f");
+}
+
void runtest(int n, char const* filename1, char const* arg2)
{
// Most tests here are crafted to work on specific files. Look at
// the test suite to see how the test is invoked to find the file
// that the test is supposed to operate on.
+ std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86};
+
if (n == 0)
{
// Throw in some random test cases that don't fit anywhere
@@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
p, size);
}
- else if ((n == 61) || (n == 81) || (n == 83) || (n == 84) || (n == 85))
+ else if (ignore_filename.count(n))
{
// Ignore filename argument entirely
}
@@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2)
{72, test_72}, {73, test_73}, {74, test_74}, {75, test_75},
{76, test_76}, {77, test_77}, {78, test_78}, {79, test_79},
{80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
- {84, test_84}, {85, test_85},
+ {84, test_84}, {85, test_85}, {86, test_86},
};
auto fn = test_functions.find(n);