From 4ccca20db0f507ad5a5d71711d5e03d3a6a2f3a2 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 12 Jul 2022 11:36:33 +0100 Subject: Remove redundant parameter from QPDF::getAllPagesInternal --- libqpdf/QPDF_pages.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index e1a3b2c1..7b801fdc 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -82,7 +82,7 @@ QPDF::getAllPages() getRoot().replaceKey("/Pages", pages); } seen.clear(); - getAllPagesInternal(pages, this->m->all_pages, visited, seen); + getAllPagesInternal(pages, visited, seen); } return this->m->all_pages; } @@ -90,7 +90,6 @@ QPDF::getAllPages() void QPDF::getAllPagesInternal( QPDFObjectHandle cur_node, - std::vector& result, std::set& visited, std::set& seen) { @@ -129,12 +128,12 @@ QPDF::getAllPagesInternal( kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); kids.setArrayItem(i, kid); } - getAllPagesInternal(kid, result, visited, seen); + getAllPagesInternal(kid, visited, seen); } } else { wanted_type = "/Page"; seen.insert(this_og); - result.push_back(cur_node); + m->all_pages.push_back(cur_node); } if (!cur_node.isDictionaryOfType(wanted_type)) { -- cgit v1.2.3-54-g00ecf From 9dea7d308069eeb5af3edd44198efa3d80ac83bf Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 12 Jul 2022 12:35:24 +0100 Subject: Tune QPDF::getAllPagesInternal Avoid calling getAllPagesInternal for each /Page object. --- libqpdf/QPDF_pages.cc | 58 +++++++++++++++++++--------------- qpdf/qtest/qpdf/no-pages-types-fix.out | 2 +- qpdf/qtest/qpdf/no-pages-types.out | 2 +- 3 files changed, 34 insertions(+), 28 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 7b801fdc..90d5a09f 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -82,7 +82,10 @@ QPDF::getAllPages() getRoot().replaceKey("/Pages", pages); } seen.clear(); - getAllPagesInternal(pages, visited, seen); + if (pages.hasKey("/Kids")) { + // Ensure we actually found a /Pages object. + getAllPagesInternal(pages, visited, seen); + } } return this->m->all_pages; } @@ -93,8 +96,8 @@ QPDF::getAllPagesInternal( std::set& visited, std::set& seen) { - QPDFObjGen this_og = cur_node.getObjGen(); - if (visited.count(this_og) > 0) { + QPDFObjGen cur_node_og = cur_node.getObjGen(); + if (visited.count(cur_node_og) > 0) { throw QPDFExc( qpdf_e_pages, this->m->file->getName(), @@ -102,14 +105,22 @@ QPDF::getAllPagesInternal( 0, "Loop detected in /Pages structure (getAllPages)"); } - visited.insert(this_og); - std::string wanted_type; - if (cur_node.hasKey("/Kids")) { - wanted_type = "/Pages"; - QPDFObjectHandle kids = cur_node.getKey("/Kids"); - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) { - QPDFObjectHandle kid = kids.getArrayItem(i); + visited.insert(cur_node_og); + if (!cur_node.isDictionaryOfType("/Pages")) { + warn( + qpdf_e_damaged_pdf, + "page tree node", + m->file->getLastOffset(), + "/Type key should be /Pages but is not; overriding"); + cur_node.replaceKey("/Type", "/Pages"_qpdf); + } + auto kids = cur_node.getKey("/Kids"); + int n = kids.getArrayNItems(); + for (int i = 0; i < n; ++i) { + auto kid = kids.getArrayItem(i); + if (kid.hasKey("/Kids")) { + getAllPagesInternal(kid, visited, seen); + } else { if (!kid.isIndirect()) { QTC::TC("qpdf", "QPDF handle direct page object"); cur_node.warnIfPossible( @@ -128,23 +139,18 @@ QPDF::getAllPagesInternal( kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); kids.setArrayItem(i, kid); } - getAllPagesInternal(kid, visited, seen); + if (!kid.isDictionaryOfType("/Page")) { + warn( + qpdf_e_damaged_pdf, + "page tree node", + this->m->file->getLastOffset(), + "/Type key should be /Page but is not; overriding"); + kid.replaceKey("/Type", "/Page"_qpdf); + } + seen.insert(kid.getObjGen()); + m->all_pages.push_back(kid); } - } else { - wanted_type = "/Page"; - seen.insert(this_og); - m->all_pages.push_back(cur_node); - } - - if (!cur_node.isDictionaryOfType(wanted_type)) { - warn( - qpdf_e_damaged_pdf, - "page tree node", - this->m->file->getLastOffset(), - "/Type key should be " + wanted_type + " but is not; overriding"); - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); } - visited.erase(this_og); } void diff --git a/qpdf/qtest/qpdf/no-pages-types-fix.out b/qpdf/qtest/qpdf/no-pages-types-fix.out index 81e71eeb..3a1ee1a2 100644 --- a/qpdf/qtest/qpdf/no-pages-types-fix.out +++ b/qpdf/qtest/qpdf/no-pages-types-fix.out @@ -1,3 +1,3 @@ -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/no-pages-types.out b/qpdf/qtest/qpdf/no-pages-types.out index 1221b68f..a6ef5f68 100644 --- a/qpdf/qtest/qpdf/no-pages-types.out +++ b/qpdf/qtest/qpdf/no-pages-types.out @@ -2,6 +2,6 @@ checking no-pages-types.pdf PDF Version: 1.3 File is not encrypted File is not linearized +WARNING: no-pages-types.pdf (page tree node, offset 135): /Type key should be /Pages but is not; overriding WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding qpdf: operation succeeded with warnings -- cgit v1.2.3-54-g00ecf From ff69773b35480de41f86f410a120524c4d57d5c0 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 12 Jul 2022 17:14:31 +0100 Subject: Fix warnings in QPDF::getAllPagesInternal --- libqpdf/QPDF_pages.cc | 10 ++-------- qpdf/qtest/qpdf/no-pages-types-fix.out | 4 ++-- qpdf/qtest/qpdf/no-pages-types.out | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 90d5a09f..a1bede25 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -107,10 +107,7 @@ QPDF::getAllPagesInternal( } visited.insert(cur_node_og); if (!cur_node.isDictionaryOfType("/Pages")) { - warn( - qpdf_e_damaged_pdf, - "page tree node", - m->file->getLastOffset(), + cur_node.warnIfPossible( "/Type key should be /Pages but is not; overriding"); cur_node.replaceKey("/Type", "/Pages"_qpdf); } @@ -140,10 +137,7 @@ QPDF::getAllPagesInternal( kids.setArrayItem(i, kid); } if (!kid.isDictionaryOfType("/Page")) { - warn( - qpdf_e_damaged_pdf, - "page tree node", - this->m->file->getLastOffset(), + kid.warnIfPossible( "/Type key should be /Page but is not; overriding"); kid.replaceKey("/Type", "/Page"_qpdf); } diff --git a/qpdf/qtest/qpdf/no-pages-types-fix.out b/qpdf/qtest/qpdf/no-pages-types-fix.out index 3a1ee1a2..b3d39593 100644 --- a/qpdf/qtest/qpdf/no-pages-types-fix.out +++ b/qpdf/qtest/qpdf/no-pages-types-fix.out @@ -1,3 +1,3 @@ -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/no-pages-types.out b/qpdf/qtest/qpdf/no-pages-types.out index a6ef5f68..4f25c2fc 100644 --- a/qpdf/qtest/qpdf/no-pages-types.out +++ b/qpdf/qtest/qpdf/no-pages-types.out @@ -2,6 +2,6 @@ checking no-pages-types.pdf PDF Version: 1.3 File is not encrypted File is not linearized -WARNING: no-pages-types.pdf (page tree node, offset 135): /Type key should be /Pages but is not; overriding -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding qpdf: operation succeeded with warnings -- cgit v1.2.3-54-g00ecf From 0356bcecc5f58b64d2cf02c68ca1934446cea9cb Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 12 Jul 2022 18:40:30 +0100 Subject: Tidy QPDF::pushInheritedAttributesToPageInternal Remove unnecessary parameters. Remove code that is unnecessary as result of a prior call to QPDF::getAllPages. Avoid clearing and rebuilding of m->all_pages. --- include/qpdf/QPDF.hh | 4 +-- libqpdf/QPDF_optimization.cc | 86 +++++++++++--------------------------------- 2 files changed, 22 insertions(+), 68 deletions(-) (limited to 'libqpdf') diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index d563fe8f..64f31edd 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1616,10 +1616,8 @@ class QPDF void pushInheritedAttributesToPageInternal( QPDFObjectHandle, std::map>&, - std::vector& all_pages, bool allow_changes, - bool warn_skipped_keys, - std::set& visited); + bool warn_skipped_keys); void updateObjectMaps( ObjUser const& ou, QPDFObjectHandle oh, diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index cafdbe64..4797be5e 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // key_ancestors is a mapping of page attribute keys to a stack of // Pages nodes that contain values for them. std::map> key_ancestors; - this->m->all_pages.clear(); - std::set visited; pushInheritedAttributesToPageInternal( this->m->trailer.getKey("/Root").getKey("/Pages"), key_ancestors, - this->m->all_pages, allow_changes, - warn_skipped_keys, - visited); + warn_skipped_keys); if (!key_ancestors.empty()) { throw std::logic_error("key_ancestors not empty after" " pushing inherited attributes to pages"); @@ -169,35 +165,9 @@ void QPDF::pushInheritedAttributesToPageInternal( QPDFObjectHandle cur_pages, std::map>& key_ancestors, - std::vector& pages, bool allow_changes, - bool warn_skipped_keys, - std::set& visited) + bool warn_skipped_keys) { - QPDFObjGen this_og = cur_pages.getObjGen(); - if (visited.count(this_og) > 0) { - throw QPDFExc( - qpdf_e_pages, - this->m->file->getName(), - this->m->last_object_description, - 0, - "Loop detected in /Pages structure (inherited attributes)"); - } - visited.insert(this_og); - - if (!cur_pages.isDictionary()) { - throw QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid object in page tree"); - } - - // Extract the underlying dictionary object - std::string type = cur_pages.getKey("/Type").getName(); - - if (type == "/Pages") { // Make a list of inheritable keys. Only the keys /MediaBox, // /CropBox, /Resources, and /Rotate are inheritable // attributes. Push this object onto the stack of pages nodes @@ -265,17 +235,25 @@ QPDF::pushInheritedAttributesToPageInternal( } } - // Visit descendant nodes. - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) { - pushInheritedAttributesToPageInternal( - kids.getArrayItem(i), - key_ancestors, - pages, - allow_changes, - warn_skipped_keys, - visited); + // Process descendant nodes. + for (auto& kid: cur_pages.getKey("/Kids").aitems()) { + if (kid.isDictionaryOfType("/Pages")) { + pushInheritedAttributesToPageInternal( + kid, key_ancestors, allow_changes, warn_skipped_keys); + } else { + // Add all available inheritable attributes not present in + // this object to this object. + for (auto const& iter: key_ancestors) { + std::string const& key = iter.first; + if (!kid.hasKey(key)) { + QTC::TC("qpdf", "QPDF opt resource inherited"); + kid.replaceKey(key, iter.second.back()); + } else { + QTC::TC( + "qpdf", "QPDF opt page resource hides ancestor"); + } + } + } } // For each inheritable key, pop the stack. If the stack @@ -295,28 +273,6 @@ QPDF::pushInheritedAttributesToPageInternal( } else { QTC::TC("qpdf", "QPDF opt no inheritable keys"); } - } else if (type == "/Page") { - // Add all available inheritable attributes not present in - // this object to this object. - for (auto const& iter: key_ancestors) { - std::string const& key = iter.first; - if (!cur_pages.hasKey(key)) { - QTC::TC("qpdf", "QPDF opt resource inherited"); - cur_pages.replaceKey(key, iter.second.back()); - } else { - QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); - } - } - pages.push_back(cur_pages); - } else { - throw QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid Type " + type + " in page tree"); - } - visited.erase(this_og); } void -- cgit v1.2.3-54-g00ecf From 903a86643ad6559ded882fe8d6549c0c715f9fc2 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 12 Jul 2022 20:16:00 +0100 Subject: Fix code formatting of QPDF::pushInheritedAttributesToPageInternal --- libqpdf/QPDF_optimization.cc | 186 +++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 94 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 4797be5e..91f2c385 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -168,111 +168,109 @@ QPDF::pushInheritedAttributesToPageInternal( bool allow_changes, bool warn_skipped_keys) { - // Make a list of inheritable keys. Only the keys /MediaBox, - // /CropBox, /Resources, and /Rotate are inheritable - // attributes. Push this object onto the stack of pages nodes - // that have values for this attribute. - - std::set inheritable_keys; - for (auto const& key: cur_pages.getKeys()) { - if ((key == "/MediaBox") || (key == "/CropBox") || - (key == "/Resources") || (key == "/Rotate")) { - if (!allow_changes) { - throw QPDFExc( - qpdf_e_internal, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "optimize detected an " - "inheritable attribute when called " - "in no-change mode"); - } + // Make a list of inheritable keys. Only the keys /MediaBox, + // /CropBox, /Resources, and /Rotate are inheritable + // attributes. Push this object onto the stack of pages nodes + // that have values for this attribute. + + std::set inheritable_keys; + for (auto const& key: cur_pages.getKeys()) { + if ((key == "/MediaBox") || (key == "/CropBox") || + (key == "/Resources") || (key == "/Rotate")) { + if (!allow_changes) { + throw QPDFExc( + qpdf_e_internal, + this->m->file->getName(), + this->m->last_object_description, + this->m->file->getLastOffset(), + "optimize detected an " + "inheritable attribute when called " + "in no-change mode"); + } - // This is an inheritable resource - inheritable_keys.insert(key); - QPDFObjectHandle oh = cur_pages.getKey(key); - QTC::TC( - "qpdf", - "QPDF opt direct pages resource", - oh.isIndirect() ? 0 : 1); - if (!oh.isIndirect()) { - if (!oh.isScalar()) { - // Replace shared direct object non-scalar - // resources with indirect objects to avoid - // copying large structures around. - cur_pages.replaceKey(key, makeIndirectObject(oh)); - oh = cur_pages.getKey(key); - } else { - // It's okay to copy scalars. - QTC::TC("qpdf", "QPDF opt inherited scalar"); - } - } - key_ancestors[key].push_back(oh); - if (key_ancestors[key].size() > 1) { - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); - } - // Remove this resource from this node. It will be - // reattached at the page level. - cur_pages.removeKey(key); - } else if (!((key == "/Type") || (key == "/Parent") || - (key == "/Kids") || (key == "/Count"))) { - // Warn when flattening, but not if the key is at the top - // level (i.e. "/Parent" not set), as we don't change these; - // but flattening removes intermediate /Pages nodes. - if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { - QTC::TC("qpdf", "QPDF unknown key not inherited"); - setLastObjectDescription( - "Pages object", cur_pages.getObjGen()); - warn( - qpdf_e_pages, - this->m->last_object_description, - 0, - ("Unknown key " + key + - " in /Pages object" - " is being discarded as a result of" - " flattening the /Pages tree")); + // This is an inheritable resource + inheritable_keys.insert(key); + QPDFObjectHandle oh = cur_pages.getKey(key); + QTC::TC( + "qpdf", + "QPDF opt direct pages resource", + oh.isIndirect() ? 0 : 1); + if (!oh.isIndirect()) { + if (!oh.isScalar()) { + // Replace shared direct object non-scalar + // resources with indirect objects to avoid + // copying large structures around. + cur_pages.replaceKey(key, makeIndirectObject(oh)); + oh = cur_pages.getKey(key); + } else { + // It's okay to copy scalars. + QTC::TC("qpdf", "QPDF opt inherited scalar"); } } + key_ancestors[key].push_back(oh); + if (key_ancestors[key].size() > 1) { + QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); + } + // Remove this resource from this node. It will be + // reattached at the page level. + cur_pages.removeKey(key); + } else if (!((key == "/Type") || (key == "/Parent") || + (key == "/Kids") || (key == "/Count"))) { + // Warn when flattening, but not if the key is at the top + // level (i.e. "/Parent" not set), as we don't change these; + // but flattening removes intermediate /Pages nodes. + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { + QTC::TC("qpdf", "QPDF unknown key not inherited"); + setLastObjectDescription("Pages object", cur_pages.getObjGen()); + warn( + qpdf_e_pages, + this->m->last_object_description, + 0, + ("Unknown key " + key + + " in /Pages object" + " is being discarded as a result of" + " flattening the /Pages tree")); + } } + } - // Process descendant nodes. - for (auto& kid: cur_pages.getKey("/Kids").aitems()) { - if (kid.isDictionaryOfType("/Pages")) { - pushInheritedAttributesToPageInternal( - kid, key_ancestors, allow_changes, warn_skipped_keys); - } else { - // Add all available inheritable attributes not present in - // this object to this object. - for (auto const& iter: key_ancestors) { - std::string const& key = iter.first; - if (!kid.hasKey(key)) { - QTC::TC("qpdf", "QPDF opt resource inherited"); - kid.replaceKey(key, iter.second.back()); - } else { - QTC::TC( - "qpdf", "QPDF opt page resource hides ancestor"); - } + // Process descendant nodes. + for (auto& kid: cur_pages.getKey("/Kids").aitems()) { + if (kid.isDictionaryOfType("/Pages")) { + pushInheritedAttributesToPageInternal( + kid, key_ancestors, allow_changes, warn_skipped_keys); + } else { + // Add all available inheritable attributes not present in + // this object to this object. + for (auto const& iter: key_ancestors) { + std::string const& key = iter.first; + if (!kid.hasKey(key)) { + QTC::TC("qpdf", "QPDF opt resource inherited"); + kid.replaceKey(key, iter.second.back()); + } else { + QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); } } } + } - // For each inheritable key, pop the stack. If the stack - // becomes empty, remove it from the map. That way, the - // invariant that the list of keys in key_ancestors is exactly - // those keys for which inheritable attributes are available. - - if (!inheritable_keys.empty()) { - QTC::TC("qpdf", "QPDF opt inheritable keys"); - for (auto const& key: inheritable_keys) { - key_ancestors[key].pop_back(); - if (key_ancestors[key].empty()) { - QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); - key_ancestors.erase(key); - } + // For each inheritable key, pop the stack. If the stack + // becomes empty, remove it from the map. That way, the + // invariant that the list of keys in key_ancestors is exactly + // those keys for which inheritable attributes are available. + + if (!inheritable_keys.empty()) { + QTC::TC("qpdf", "QPDF opt inheritable keys"); + for (auto const& key: inheritable_keys) { + key_ancestors[key].pop_back(); + if (key_ancestors[key].empty()) { + QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); + key_ancestors.erase(key); } - } else { - QTC::TC("qpdf", "QPDF opt no inheritable keys"); } + } else { + QTC::TC("qpdf", "QPDF opt no inheritable keys"); + } } void -- cgit v1.2.3-54-g00ecf