Fix asymmetry between newUnicodeString and getUTF8Value

author: Jay Berkenbilt <ejb@ql.org> 2022-02-16 01:22:35 +0100
committer: Jay Berkenbilt <ejb@ql.org> 2022-02-16 01:22:35 +0100
commit: e810fe678a5615e3a4bfa16543bfdbdad78cd273 (patch)
tree: fe92db87bbbd806ade9b0818126787dbd9d3c2a2
parent: e35abe2fb7ee8daac5841178429fc116f0b60597 (diff)
download: qpdf-e810fe678a5615e3a4bfa16543bfdbdad78cd273.tar.zst
3 files changed, 30 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 08cb1b16..02b80264 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2022-02-15  Jay Berkenbilt  <ejb@ql.org>
 
+	* Fix asymmetrical logic between
+	QPDFObjectHandle::newUnicodeString() and
+	QPDFObjectHandle::getUTF8Val(). The asymmetrical logic didn't
+	matter before fixing the PDF Doc transcoding bugs.
+
 	* When analyzing PDF strings, recognize UTF-16LE as UTF-16. The
 	PDF spec only allows UTF-16BE, but most readers seem to allow
 	both. Fixes #649.
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
index 0fcec2d3..f0153b1c 100644
--- a/libqpdf/QPDF_String.cc
+++ b/libqpdf/QPDF_String.cc
@@ -32,8 +32,7 @@ QPDF_String*
 QPDF_String::new_utf16(std::string const& utf8_val)
 {
     std::string result;
-    if (! (QUtil::utf8_to_ascii(utf8_val, result, '?') ||
-           QUtil::utf8_to_pdf_doc(utf8_val, result, '?')))
+    if (! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'))
     {
         result = QUtil::utf8_to_utf16(utf8_val);
     }
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index dcae002f..e3fc7007 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -3325,12 +3325,34 @@ static void test_85(QPDF& pdf, char const* arg2)
     assert(s == "/Test");
 }
 
+static void test_86(QPDF& pdf, char const* arg2)
+{
+    // Test symmetry between newUnicodeString and getUTF8Value for
+    // strings that can't be encoded as PDFDoc but don't contain any
+    // high code points.
+
+    std::string utf8_val("\x1f");
+    std::string utf16_val("\xfe\xff\x00\x1f", 4);
+    std::string result;
+    assert(QUtil::utf8_to_ascii(utf8_val, result, '?'));
+    assert(result == "\x1f");
+    assert(! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'));
+    assert(result == "?");
+    assert(QUtil::utf8_to_utf16(utf8_val) == utf16_val);
+    assert(QUtil::utf16_to_utf8(utf16_val) == utf8_val);
+    auto h = QPDFObjectHandle::newUnicodeString("\x1f");
+    assert(h.getStringValue() == std::string("\xfe\xff\x00\x1f", 4));
+    assert(h.getUTF8Value() == "\x1f");
+}
+
 void runtest(int n, char const* filename1, char const* arg2)
 {
     // Most tests here are crafted to work on specific files.  Look at
     // the test suite to see how the test is invoked to find the file
     // that the test is supposed to operate on.
 
+    std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86};
+
     if (n == 0)
     {
         // Throw in some random test cases that don't fit anywhere
@@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2)
         pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
                               p, size);
     }
-    else if ((n == 61) || (n == 81) || (n == 83) || (n == 84) || (n == 85))
+    else if (ignore_filename.count(n))
     {
         // Ignore filename argument entirely
     }
@@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2)
         {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75},
         {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79},
         {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
-        {84, test_84}, {85, test_85},
+        {84, test_84}, {85, test_85}, {86, test_86},
     };
 
     auto fn = test_functions.find(n);
author	Jay Berkenbilt <ejb@ql.org>	2022-02-16 01:22:35 +0100
committer	Jay Berkenbilt <ejb@ql.org>	2022-02-16 01:22:35 +0100
commit	e810fe678a5615e3a4bfa16543bfdbdad78cd273 (patch)
tree	fe92db87bbbd806ade9b0818126787dbd9d3c2a2
parent	e35abe2fb7ee8daac5841178429fc116f0b60597 (diff)
download	qpdf-e810fe678a5615e3a4bfa16543bfdbdad78cd273.tar.zst