aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--include/qpdf/QPDFObjectHandle.hh26
-rw-r--r--libqpdf/QPDFObjectHandle.cc103
-rw-r--r--qpdf/qpdf.testcov8
-rw-r--r--qpdf/qtest/qpdf.test10
-rw-r--r--qpdf/qtest/qpdf/merge-dict.out36
-rw-r--r--qpdf/qtest/qpdf/merge-dict.pdf148
-rw-r--r--qpdf/test_driver.cc11
8 files changed, 347 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index b7109173..75d4aa48 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2018-12-31 Jay Berkenbilt <ejb@ql.org>
+ * Add method QPDFObjectHandle::mergeDictionary(), which
+ recursively merges dictionaries with semantics designed for
+ merging resource dictionaries. See detailed description in
+ QPDFObjectHandle.hh.
+
* Add QPDFObjectHandle::Matrix, similar to
QPDFObjectHandle::Rectangle, as a convenience class for
six-element arrays that are used as matrices.
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index a4b469c2..030ee11c 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -559,6 +559,28 @@ class QPDFObjectHandle
QPDF_DLL
bool isOrHasName(std::string const&);
+ // Merge dictionaries with the following behavior, where "object"
+ // refers to the object whose method is invoked, and "other"
+ // refers to the argument:
+ // * If either object or other is not a dictionary, do nothing
+ // * Otherwise
+ // * For each key in other
+ // * If key is absent in object, insert it
+ // * If key is present in object
+ // * If both values are dictionaries, merge the dictionary from
+ // other into the one from object
+ // * If both values are arrays, append scalar elements from
+ // other's that are not present in object's onto object's,
+ // and ignore non-scalar elements in other's
+ // * Otherwise ignore
+ // The primary purpose of this method is to facilitate merging of
+ // resource dictionaries. Conflicts are ignored. If needed, a
+ // future version of qpdf may provide some mechanism for conflict
+ // resolution, such as providing a handler that is invoked with
+ // the path to the conflict.
+ QPDF_DLL
+ void mergeDictionary(QPDFObjectHandle other);
+
// Return the QPDF object that owns an indirect object. Returns
// null for a direct object.
QPDF_DLL
@@ -970,6 +992,10 @@ class QPDFObjectHandle
ParserCallbacks* callbacks);
std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray(
std::string const& description, std::string& all_description);
+ void mergeDictionaryInternal(
+ QPDFObjectHandle other,
+ std::set<QPDFObjGen>& visiting,
+ int depth);
static void warn(QPDF*, QPDFExc const&);
class Members
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 51df113c..300027b9 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const& value)
return false;
}
+void
+QPDFObjectHandle::mergeDictionary(QPDFObjectHandle other)
+{
+ std::set<QPDFObjGen> visiting;
+ mergeDictionaryInternal(other, visiting, 0);
+}
+
+void
+QPDFObjectHandle::mergeDictionaryInternal(
+ QPDFObjectHandle other,
+ std::set<QPDFObjGen>& visiting,
+ int depth)
+{
+ if (depth > 100)
+ {
+ // Arbitrarily limit depth to avoid stack overflow
+ return;
+ }
+ if (! (isDictionary() && other.isDictionary()))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
+ return;
+ }
+ std::set<std::string> other_keys = other.getKeys();
+ for (std::set<std::string>::iterator iter = other_keys.begin();
+ iter != other_keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ QPDFObjectHandle other_val = other.getKey(key);
+ if (hasKey(key))
+ {
+ QPDFObjectHandle this_val = getKey(key);
+ if (this_val.isDictionary() && other_val.isDictionary())
+ {
+ if (this_val.isIndirect() && other_val.isIndirect() &&
+ (this_val.getObjGen() == other_val.getObjGen()))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge equal indirect");
+ }
+ else if (this_val.isIndirect() &&
+ (visiting.count(this_val.getObjGen())))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge loop");
+ }
+ else
+ {
+ QPDFObjGen loop;
+ if (this_val.isIndirect())
+ {
+ loop = this_val.getObjGen();
+ visiting.insert(loop);
+ QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
+ this_val = this_val.shallowCopy();
+ replaceKey(key, this_val);
+ }
+ QTC::TC("qpdf", "QPDFObjectHandle nested merge");
+ this_val.mergeDictionaryInternal(
+ other_val, visiting, 1 + depth);
+ if (loop.getObj())
+ {
+ visiting.erase(loop);
+ }
+ }
+ }
+ else if (this_val.isArray() && other_val.isArray())
+ {
+ std::set<std::string> scalars;
+ int n = this_val.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ QPDFObjectHandle this_item = this_val.getArrayItem(i);
+ if (this_item.isScalar())
+ {
+ scalars.insert(this_item.unparse());
+ }
+ }
+ n = other_val.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ QPDFObjectHandle other_item = other_val.getArrayItem(i);
+ if (other_item.isScalar())
+ {
+ if (scalars.count(other_item.unparse()) == 0)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge array");
+ this_val.appendItem(other_item);
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
+ replaceKey(key, other_val);
+ }
+ }
+}
+
// Indirect object accessors
QPDF*
QPDFObjectHandle::getOwningQPDF()
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index e63fc6cc..330b6e54 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0
QPDFOutlineObjectHelper loop 0
qpdf required parameter 0
qpdf required choices 0
+QPDFObjectHandle merge top type mismatch 0
+QPDFObjectHandle merge shallow copy 0
+QPDFObjectHandle nested merge 0
+QPDFObjectHandle merge array 0
+QPDFObjectHandle merge array dup 0
+QPDFObjectHandle merge copy from other 0
+QPDFObjectHandle merge loop 0
+QPDFObjectHandle merge equal indirect 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 8c976f6d..9b1ae3e0 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -895,6 +895,16 @@ $td->runtest("detect foreign object in write",
show_ntests();
# ----------
+$td->notify("--- Merge Dictionary ---");
+$n_tests += 1;
+
+$td->runtest("merge dictionary",
+ {$td->COMMAND => "test_driver 50 merge-dict.pdf"},
+ {$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+show_ntests();
+# ----------
$td->notify("--- Parsing ---");
$n_tests += 17;
diff --git a/qpdf/qtest/qpdf/merge-dict.out b/qpdf/qtest/qpdf/merge-dict.out
new file mode 100644
index 00000000..d2422142
--- /dev/null
+++ b/qpdf/qtest/qpdf/merge-dict.out
@@ -0,0 +1,36 @@
+{
+ "/k1": "scalar1",
+ "/k2": 16059,
+ "/k3": {
+ "/a": "a",
+ "/b": "conflict: seen",
+ "/c": [
+ 2,
+ 3,
+ 1
+ ],
+ "/d": {
+ "/x": 24,
+ "/y": 25,
+ "/z": 26
+ },
+ "/e": "e"
+ },
+ "/k4": {
+ "/A": 65,
+ "/B": 66,
+ "/C": 67,
+ "/indirect2": "8 0 R",
+ "/recursive": "9 0 R"
+ },
+ "/k5": [
+ "/one",
+ 2,
+ "three",
+ [
+ "/four"
+ ],
+ "two"
+ ]
+}
+test 50 done
diff --git a/qpdf/qtest/qpdf/merge-dict.pdf b/qpdf/qtest/qpdf/merge-dict.pdf
new file mode 100644
index 00000000..1061ef51
--- /dev/null
+++ b/qpdf/qtest/qpdf/merge-dict.pdf
@@ -0,0 +1,148 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+3 0 obj
+<<
+ /Contents 4 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 6 0 R
+ >>
+ /ProcSet 7 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+44
+endobj
+
+6 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+7 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+8 0 obj
+<<
+ /a (a)
+ /b (b)
+ /c [1 2]
+ /d << /x 24 /y (not seen) >>
+>>
+endobj
+
+9 0 obj
+<<
+ /A 65
+ /B 66
+ /indirect2 8 0 R
+ /recursive 9 0 R
+>>
+endobj
+
+xref
+0 10
+0000000000 65535 f
+0000000025 00000 n
+0000000079 00000 n
+0000000161 00000 n
+0000000376 00000 n
+0000000475 00000 n
+0000000494 00000 n
+0000000612 00000 n
+0000000647 00000 n
+0000000729 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 10
+ /ID [<f8c8da17f88e0dccac9f73ad9d0ee411><f8c8da17f88e0dccac9f73ad9d0ee411>]
+ /Dict1 <<
+ /k1 (scalar1)
+ /k3 <<
+ /b (conflict: seen)
+ /c [2 3]
+ /d << /y 25 /z 26 >>
+ /e (e)
+ >>
+ /k4 9 0 R
+ /k5 [
+ /one
+ 2
+ (three)
+ [ /four ]
+ ]
+ >>
+ /Dict2 <<
+ /k1 (other: conflict: not seen)
+ /k2 16059
+ /k3 8 0 R
+ /k4 <<
+ /B (not seen)
+ /C 67
+ /indirect2 8 0 R
+ /recursive 8 0 R
+ >>
+ /k5 [
+ /one
+ (two)
+ << /six 6 >>
+ [ /five ]
+ ]
+ >>
+>>
+startxref
+805
+%%EOF
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index eae25a5c..8fb645f9 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2)
}
}
}
+ else if (n == 50)
+ {
+ // Test dictionary merge. This test is crafted to work with
+ // merge-dict.pdf
+ QPDFObjectHandle d1 = pdf.getTrailer().getKey("/Dict1");
+ QPDFObjectHandle d2 = pdf.getTrailer().getKey("/Dict2");
+ d1.mergeDictionary(d2);
+ std::cout << d1.getJSON().unparse() << std::endl;
+ // Top-level type mismatch
+ d1.mergeDictionary(d2.getKey("/k1"));
+ }
else
{
throw std::runtime_error(std::string("invalid test ") +