aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <jberkenbilt@users.noreply.github.com>2022-09-01 21:28:32 +0200
committerGitHub <noreply@github.com>2022-09-01 21:28:32 +0200
commit3d029fb17ef6b8ea9094394741f103608f698bad (patch)
tree91bee4e325142767086a4b8b70c7904af614cb1f
parentf8fd7d60e301b9b1bf4d705ce747e281c320487e (diff)
parent903a86643ad6559ded882fe8d6549c0c715f9fc2 (diff)
downloadqpdf-3d029fb17ef6b8ea9094394741f103608f698bad.tar.zst
Merge pull request #730 from m-holger/allpages
Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
-rw-r--r--include/qpdf/QPDF.hh5
-rw-r--r--libqpdf/QPDF_optimization.cc230
-rw-r--r--libqpdf/QPDF_pages.cc53
-rw-r--r--qpdf/qtest/qpdf/no-pages-types-fix.out4
-rw-r--r--qpdf/qtest/qpdf/no-pages-types.out4
5 files changed, 123 insertions, 173 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index bcd85cd2..81169fbd 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1240,7 +1240,6 @@ class QPDF
void getAllPagesInternal(
QPDFObjectHandle cur_pages,
- std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen);
void insertPage(QPDFObjectHandle newpage, int pos);
@@ -1627,10 +1626,8 @@ class QPDF
void pushInheritedAttributesToPageInternal(
QPDFObjectHandle,
std::map<std::string, std::vector<QPDFObjectHandle>>&,
- std::vector<QPDFObjectHandle>& all_pages,
bool allow_changes,
- bool warn_skipped_keys,
- std::set<QPDFObjGen>& visited);
+ bool warn_skipped_keys);
void updateObjectMaps(
ObjUser const& ou,
QPDFObjectHandle oh,
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc
index cafdbe64..91f2c385 100644
--- a/libqpdf/QPDF_optimization.cc
+++ b/libqpdf/QPDF_optimization.cc
@@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
// key_ancestors is a mapping of page attribute keys to a stack of
// Pages nodes that contain values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
- this->m->all_pages.clear();
- std::set<QPDFObjGen> visited;
pushInheritedAttributesToPageInternal(
this->m->trailer.getKey("/Root").getKey("/Pages"),
key_ancestors,
- this->m->all_pages,
allow_changes,
- warn_skipped_keys,
- visited);
+ warn_skipped_keys);
if (!key_ancestors.empty()) {
throw std::logic_error("key_ancestors not empty after"
" pushing inherited attributes to pages");
@@ -169,154 +165,112 @@ void
QPDF::pushInheritedAttributesToPageInternal(
QPDFObjectHandle cur_pages,
std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
- std::vector<QPDFObjectHandle>& pages,
bool allow_changes,
- bool warn_skipped_keys,
- std::set<QPDFObjGen>& visited)
+ bool warn_skipped_keys)
{
- QPDFObjGen this_og = cur_pages.getObjGen();
- if (visited.count(this_og) > 0) {
- throw QPDFExc(
- qpdf_e_pages,
- this->m->file->getName(),
- this->m->last_object_description,
- 0,
- "Loop detected in /Pages structure (inherited attributes)");
- }
- visited.insert(this_og);
-
- if (!cur_pages.isDictionary()) {
- throw QPDFExc(
- qpdf_e_damaged_pdf,
- this->m->file->getName(),
- this->m->last_object_description,
- this->m->file->getLastOffset(),
- "invalid object in page tree");
- }
-
- // Extract the underlying dictionary object
- std::string type = cur_pages.getKey("/Type").getName();
-
- if (type == "/Pages") {
- // Make a list of inheritable keys. Only the keys /MediaBox,
- // /CropBox, /Resources, and /Rotate are inheritable
- // attributes. Push this object onto the stack of pages nodes
- // that have values for this attribute.
-
- std::set<std::string> inheritable_keys;
- for (auto const& key: cur_pages.getKeys()) {
- if ((key == "/MediaBox") || (key == "/CropBox") ||
- (key == "/Resources") || (key == "/Rotate")) {
- if (!allow_changes) {
- throw QPDFExc(
- qpdf_e_internal,
- this->m->file->getName(),
- this->m->last_object_description,
- this->m->file->getLastOffset(),
- "optimize detected an "
- "inheritable attribute when called "
- "in no-change mode");
- }
+ // Make a list of inheritable keys. Only the keys /MediaBox,
+ // /CropBox, /Resources, and /Rotate are inheritable
+ // attributes. Push this object onto the stack of pages nodes
+ // that have values for this attribute.
+
+ std::set<std::string> inheritable_keys;
+ for (auto const& key: cur_pages.getKeys()) {
+ if ((key == "/MediaBox") || (key == "/CropBox") ||
+ (key == "/Resources") || (key == "/Rotate")) {
+ if (!allow_changes) {
+ throw QPDFExc(
+ qpdf_e_internal,
+ this->m->file->getName(),
+ this->m->last_object_description,
+ this->m->file->getLastOffset(),
+ "optimize detected an "
+ "inheritable attribute when called "
+ "in no-change mode");
+ }
- // This is an inheritable resource
- inheritable_keys.insert(key);
- QPDFObjectHandle oh = cur_pages.getKey(key);
- QTC::TC(
- "qpdf",
- "QPDF opt direct pages resource",
- oh.isIndirect() ? 0 : 1);
- if (!oh.isIndirect()) {
- if (!oh.isScalar()) {
- // Replace shared direct object non-scalar
- // resources with indirect objects to avoid
- // copying large structures around.
- cur_pages.replaceKey(key, makeIndirectObject(oh));
- oh = cur_pages.getKey(key);
- } else {
- // It's okay to copy scalars.
- QTC::TC("qpdf", "QPDF opt inherited scalar");
- }
- }
- key_ancestors[key].push_back(oh);
- if (key_ancestors[key].size() > 1) {
- QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
- }
- // Remove this resource from this node. It will be
- // reattached at the page level.
- cur_pages.removeKey(key);
- } else if (!((key == "/Type") || (key == "/Parent") ||
- (key == "/Kids") || (key == "/Count"))) {
- // Warn when flattening, but not if the key is at the top
- // level (i.e. "/Parent" not set), as we don't change these;
- // but flattening removes intermediate /Pages nodes.
- if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
- QTC::TC("qpdf", "QPDF unknown key not inherited");
- setLastObjectDescription(
- "Pages object", cur_pages.getObjGen());
- warn(
- qpdf_e_pages,
- this->m->last_object_description,
- 0,
- ("Unknown key " + key +
- " in /Pages object"
- " is being discarded as a result of"
- " flattening the /Pages tree"));
+ // This is an inheritable resource
+ inheritable_keys.insert(key);
+ QPDFObjectHandle oh = cur_pages.getKey(key);
+ QTC::TC(
+ "qpdf",
+ "QPDF opt direct pages resource",
+ oh.isIndirect() ? 0 : 1);
+ if (!oh.isIndirect()) {
+ if (!oh.isScalar()) {
+ // Replace shared direct object non-scalar
+ // resources with indirect objects to avoid
+ // copying large structures around.
+ cur_pages.replaceKey(key, makeIndirectObject(oh));
+ oh = cur_pages.getKey(key);
+ } else {
+ // It's okay to copy scalars.
+ QTC::TC("qpdf", "QPDF opt inherited scalar");
}
}
+ key_ancestors[key].push_back(oh);
+ if (key_ancestors[key].size() > 1) {
+ QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
+ }
+ // Remove this resource from this node. It will be
+ // reattached at the page level.
+ cur_pages.removeKey(key);
+ } else if (!((key == "/Type") || (key == "/Parent") ||
+ (key == "/Kids") || (key == "/Count"))) {
+ // Warn when flattening, but not if the key is at the top
+ // level (i.e. "/Parent" not set), as we don't change these;
+ // but flattening removes intermediate /Pages nodes.
+ if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
+ QTC::TC("qpdf", "QPDF unknown key not inherited");
+ setLastObjectDescription("Pages object", cur_pages.getObjGen());
+ warn(
+ qpdf_e_pages,
+ this->m->last_object_description,
+ 0,
+ ("Unknown key " + key +
+ " in /Pages object"
+ " is being discarded as a result of"
+ " flattening the /Pages tree"));
+ }
}
+ }
- // Visit descendant nodes.
- QPDFObjectHandle kids = cur_pages.getKey("/Kids");
- int n = kids.getArrayNItems();
- for (int i = 0; i < n; ++i) {
+ // Process descendant nodes.
+ for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
+ if (kid.isDictionaryOfType("/Pages")) {
pushInheritedAttributesToPageInternal(
- kids.getArrayItem(i),
- key_ancestors,
- pages,
- allow_changes,
- warn_skipped_keys,
- visited);
- }
-
- // For each inheritable key, pop the stack. If the stack
- // becomes empty, remove it from the map. That way, the
- // invariant that the list of keys in key_ancestors is exactly
- // those keys for which inheritable attributes are available.
-
- if (!inheritable_keys.empty()) {
- QTC::TC("qpdf", "QPDF opt inheritable keys");
- for (auto const& key: inheritable_keys) {
- key_ancestors[key].pop_back();
- if (key_ancestors[key].empty()) {
- QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
- key_ancestors.erase(key);
+ kid, key_ancestors, allow_changes, warn_skipped_keys);
+ } else {
+ // Add all available inheritable attributes not present in
+ // this object to this object.
+ for (auto const& iter: key_ancestors) {
+ std::string const& key = iter.first;
+ if (!kid.hasKey(key)) {
+ QTC::TC("qpdf", "QPDF opt resource inherited");
+ kid.replaceKey(key, iter.second.back());
+ } else {
+ QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
}
}
- } else {
- QTC::TC("qpdf", "QPDF opt no inheritable keys");
}
- } else if (type == "/Page") {
- // Add all available inheritable attributes not present in
- // this object to this object.
- for (auto const& iter: key_ancestors) {
- std::string const& key = iter.first;
- if (!cur_pages.hasKey(key)) {
- QTC::TC("qpdf", "QPDF opt resource inherited");
- cur_pages.replaceKey(key, iter.second.back());
- } else {
- QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
+ }
+
+ // For each inheritable key, pop the stack. If the stack
+ // becomes empty, remove it from the map. That way, the
+ // invariant that the list of keys in key_ancestors is exactly
+ // those keys for which inheritable attributes are available.
+
+ if (!inheritable_keys.empty()) {
+ QTC::TC("qpdf", "QPDF opt inheritable keys");
+ for (auto const& key: inheritable_keys) {
+ key_ancestors[key].pop_back();
+ if (key_ancestors[key].empty()) {
+ QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
+ key_ancestors.erase(key);
}
}
- pages.push_back(cur_pages);
} else {
- throw QPDFExc(
- qpdf_e_damaged_pdf,
- this->m->file->getName(),
- this->m->last_object_description,
- this->m->file->getLastOffset(),
- "invalid Type " + type + " in page tree");
+ QTC::TC("qpdf", "QPDF opt no inheritable keys");
}
- visited.erase(this_og);
}
void
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index 8c9bfbaa..80e89b02 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -82,7 +82,10 @@ QPDF::getAllPages()
getRoot().replaceKey("/Pages", pages);
}
seen.clear();
- getAllPagesInternal(pages, this->m->all_pages, visited, seen);
+ if (pages.hasKey("/Kids")) {
+ // Ensure we actually found a /Pages object.
+ getAllPagesInternal(pages, visited, seen);
+ }
}
return this->m->all_pages;
}
@@ -90,12 +93,11 @@ QPDF::getAllPages()
void
QPDF::getAllPagesInternal(
QPDFObjectHandle cur_node,
- std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen)
{
- QPDFObjGen this_og = cur_node.getObjGen();
- if (visited.count(this_og) > 0) {
+ QPDFObjGen cur_node_og = cur_node.getObjGen();
+ if (visited.count(cur_node_og) > 0) {
throw QPDFExc(
qpdf_e_pages,
this->m->file->getName(),
@@ -103,14 +105,19 @@ QPDF::getAllPagesInternal(
0,
"Loop detected in /Pages structure (getAllPages)");
}
- visited.insert(this_og);
- std::string wanted_type;
- if (cur_node.hasKey("/Kids")) {
- wanted_type = "/Pages";
- QPDFObjectHandle kids = cur_node.getKey("/Kids");
- int n = kids.getArrayNItems();
- for (int i = 0; i < n; ++i) {
- QPDFObjectHandle kid = kids.getArrayItem(i);
+ visited.insert(cur_node_og);
+ if (!cur_node.isDictionaryOfType("/Pages")) {
+ cur_node.warnIfPossible(
+ "/Type key should be /Pages but is not; overriding");
+ cur_node.replaceKey("/Type", "/Pages"_qpdf);
+ }
+ auto kids = cur_node.getKey("/Kids");
+ int n = kids.getArrayNItems();
+ for (int i = 0; i < n; ++i) {
+ auto kid = kids.getArrayItem(i);
+ if (kid.hasKey("/Kids")) {
+ getAllPagesInternal(kid, visited, seen);
+ } else {
if (!kid.isIndirect()) {
QTC::TC("qpdf", "QPDF handle direct page object");
cur_node.warnIfPossible(
@@ -129,23 +136,15 @@ QPDF::getAllPagesInternal(
kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
kids.setArrayItem(i, kid);
}
- getAllPagesInternal(kid, result, visited, seen);
+ if (!kid.isDictionaryOfType("/Page")) {
+ kid.warnIfPossible(
+ "/Type key should be /Page but is not; overriding");
+ kid.replaceKey("/Type", "/Page"_qpdf);
+ }
+ seen.insert(kid.getObjGen());
+ m->all_pages.push_back(kid);
}
- } else {
- wanted_type = "/Page";
- seen.insert(this_og);
- result.push_back(cur_node);
- }
-
- if (!cur_node.isDictionaryOfType(wanted_type)) {
- warn(
- qpdf_e_damaged_pdf,
- "page tree node",
- this->m->file->getLastOffset(),
- "/Type key should be " + wanted_type + " but is not; overriding");
- cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
}
- visited.erase(this_og);
}
void
diff --git a/qpdf/qtest/qpdf/no-pages-types-fix.out b/qpdf/qtest/qpdf/no-pages-types-fix.out
index 81e71eeb..b3d39593 100644
--- a/qpdf/qtest/qpdf/no-pages-types-fix.out
+++ b/qpdf/qtest/qpdf/no-pages-types-fix.out
@@ -1,3 +1,3 @@
-WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
-WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
+WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
+WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/no-pages-types.out b/qpdf/qtest/qpdf/no-pages-types.out
index 1221b68f..4f25c2fc 100644
--- a/qpdf/qtest/qpdf/no-pages-types.out
+++ b/qpdf/qtest/qpdf/no-pages-types.out
@@ -2,6 +2,6 @@ checking no-pages-types.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
-WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
-WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
+WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
+WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
qpdf: operation succeeded with warnings