diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDF_optimization.cc | 230 | ||||
-rw-r--r-- | libqpdf/QPDF_pages.cc | 53 |
2 files changed, 118 insertions, 165 deletions
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index cafdbe64..91f2c385 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // key_ancestors is a mapping of page attribute keys to a stack of // Pages nodes that contain values for them. std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; - this->m->all_pages.clear(); - std::set<QPDFObjGen> visited; pushInheritedAttributesToPageInternal( this->m->trailer.getKey("/Root").getKey("/Pages"), key_ancestors, - this->m->all_pages, allow_changes, - warn_skipped_keys, - visited); + warn_skipped_keys); if (!key_ancestors.empty()) { throw std::logic_error("key_ancestors not empty after" " pushing inherited attributes to pages"); @@ -169,154 +165,112 @@ void QPDF::pushInheritedAttributesToPageInternal( QPDFObjectHandle cur_pages, std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, - std::vector<QPDFObjectHandle>& pages, bool allow_changes, - bool warn_skipped_keys, - std::set<QPDFObjGen>& visited) + bool warn_skipped_keys) { - QPDFObjGen this_og = cur_pages.getObjGen(); - if (visited.count(this_og) > 0) { - throw QPDFExc( - qpdf_e_pages, - this->m->file->getName(), - this->m->last_object_description, - 0, - "Loop detected in /Pages structure (inherited attributes)"); - } - visited.insert(this_og); - - if (!cur_pages.isDictionary()) { - throw QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid object in page tree"); - } - - // Extract the underlying dictionary object - std::string type = cur_pages.getKey("/Type").getName(); - - if (type == "/Pages") { - // Make a list of inheritable keys. Only the keys /MediaBox, - // /CropBox, /Resources, and /Rotate are inheritable - // attributes. Push this object onto the stack of pages nodes - // that have values for this attribute. - - std::set<std::string> inheritable_keys; - for (auto const& key: cur_pages.getKeys()) { - if ((key == "/MediaBox") || (key == "/CropBox") || - (key == "/Resources") || (key == "/Rotate")) { - if (!allow_changes) { - throw QPDFExc( - qpdf_e_internal, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "optimize detected an " - "inheritable attribute when called " - "in no-change mode"); - } + // Make a list of inheritable keys. Only the keys /MediaBox, + // /CropBox, /Resources, and /Rotate are inheritable + // attributes. Push this object onto the stack of pages nodes + // that have values for this attribute. + + std::set<std::string> inheritable_keys; + for (auto const& key: cur_pages.getKeys()) { + if ((key == "/MediaBox") || (key == "/CropBox") || + (key == "/Resources") || (key == "/Rotate")) { + if (!allow_changes) { + throw QPDFExc( + qpdf_e_internal, + this->m->file->getName(), + this->m->last_object_description, + this->m->file->getLastOffset(), + "optimize detected an " + "inheritable attribute when called " + "in no-change mode"); + } - // This is an inheritable resource - inheritable_keys.insert(key); - QPDFObjectHandle oh = cur_pages.getKey(key); - QTC::TC( - "qpdf", - "QPDF opt direct pages resource", - oh.isIndirect() ? 0 : 1); - if (!oh.isIndirect()) { - if (!oh.isScalar()) { - // Replace shared direct object non-scalar - // resources with indirect objects to avoid - // copying large structures around. - cur_pages.replaceKey(key, makeIndirectObject(oh)); - oh = cur_pages.getKey(key); - } else { - // It's okay to copy scalars. - QTC::TC("qpdf", "QPDF opt inherited scalar"); - } - } - key_ancestors[key].push_back(oh); - if (key_ancestors[key].size() > 1) { - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); - } - // Remove this resource from this node. It will be - // reattached at the page level. - cur_pages.removeKey(key); - } else if (!((key == "/Type") || (key == "/Parent") || - (key == "/Kids") || (key == "/Count"))) { - // Warn when flattening, but not if the key is at the top - // level (i.e. "/Parent" not set), as we don't change these; - // but flattening removes intermediate /Pages nodes. - if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { - QTC::TC("qpdf", "QPDF unknown key not inherited"); - setLastObjectDescription( - "Pages object", cur_pages.getObjGen()); - warn( - qpdf_e_pages, - this->m->last_object_description, - 0, - ("Unknown key " + key + - " in /Pages object" - " is being discarded as a result of" - " flattening the /Pages tree")); + // This is an inheritable resource + inheritable_keys.insert(key); + QPDFObjectHandle oh = cur_pages.getKey(key); + QTC::TC( + "qpdf", + "QPDF opt direct pages resource", + oh.isIndirect() ? 0 : 1); + if (!oh.isIndirect()) { + if (!oh.isScalar()) { + // Replace shared direct object non-scalar + // resources with indirect objects to avoid + // copying large structures around. + cur_pages.replaceKey(key, makeIndirectObject(oh)); + oh = cur_pages.getKey(key); + } else { + // It's okay to copy scalars. + QTC::TC("qpdf", "QPDF opt inherited scalar"); } } + key_ancestors[key].push_back(oh); + if (key_ancestors[key].size() > 1) { + QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); + } + // Remove this resource from this node. It will be + // reattached at the page level. + cur_pages.removeKey(key); + } else if (!((key == "/Type") || (key == "/Parent") || + (key == "/Kids") || (key == "/Count"))) { + // Warn when flattening, but not if the key is at the top + // level (i.e. "/Parent" not set), as we don't change these; + // but flattening removes intermediate /Pages nodes. + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { + QTC::TC("qpdf", "QPDF unknown key not inherited"); + setLastObjectDescription("Pages object", cur_pages.getObjGen()); + warn( + qpdf_e_pages, + this->m->last_object_description, + 0, + ("Unknown key " + key + + " in /Pages object" + " is being discarded as a result of" + " flattening the /Pages tree")); + } } + } - // Visit descendant nodes. - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) { + // Process descendant nodes. + for (auto& kid: cur_pages.getKey("/Kids").aitems()) { + if (kid.isDictionaryOfType("/Pages")) { pushInheritedAttributesToPageInternal( - kids.getArrayItem(i), - key_ancestors, - pages, - allow_changes, - warn_skipped_keys, - visited); - } - - // For each inheritable key, pop the stack. If the stack - // becomes empty, remove it from the map. That way, the - // invariant that the list of keys in key_ancestors is exactly - // those keys for which inheritable attributes are available. - - if (!inheritable_keys.empty()) { - QTC::TC("qpdf", "QPDF opt inheritable keys"); - for (auto const& key: inheritable_keys) { - key_ancestors[key].pop_back(); - if (key_ancestors[key].empty()) { - QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); - key_ancestors.erase(key); + kid, key_ancestors, allow_changes, warn_skipped_keys); + } else { + // Add all available inheritable attributes not present in + // this object to this object. + for (auto const& iter: key_ancestors) { + std::string const& key = iter.first; + if (!kid.hasKey(key)) { + QTC::TC("qpdf", "QPDF opt resource inherited"); + kid.replaceKey(key, iter.second.back()); + } else { + QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); } } - } else { - QTC::TC("qpdf", "QPDF opt no inheritable keys"); } - } else if (type == "/Page") { - // Add all available inheritable attributes not present in - // this object to this object. - for (auto const& iter: key_ancestors) { - std::string const& key = iter.first; - if (!cur_pages.hasKey(key)) { - QTC::TC("qpdf", "QPDF opt resource inherited"); - cur_pages.replaceKey(key, iter.second.back()); - } else { - QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); + } + + // For each inheritable key, pop the stack. If the stack + // becomes empty, remove it from the map. That way, the + // invariant that the list of keys in key_ancestors is exactly + // those keys for which inheritable attributes are available. + + if (!inheritable_keys.empty()) { + QTC::TC("qpdf", "QPDF opt inheritable keys"); + for (auto const& key: inheritable_keys) { + key_ancestors[key].pop_back(); + if (key_ancestors[key].empty()) { + QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); + key_ancestors.erase(key); } } - pages.push_back(cur_pages); } else { - throw QPDFExc( - qpdf_e_damaged_pdf, - this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid Type " + type + " in page tree"); + QTC::TC("qpdf", "QPDF opt no inheritable keys"); } - visited.erase(this_og); } void diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 8c9bfbaa..80e89b02 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -82,7 +82,10 @@ QPDF::getAllPages() getRoot().replaceKey("/Pages", pages); } seen.clear(); - getAllPagesInternal(pages, this->m->all_pages, visited, seen); + if (pages.hasKey("/Kids")) { + // Ensure we actually found a /Pages object. + getAllPagesInternal(pages, visited, seen); + } } return this->m->all_pages; } @@ -90,12 +93,11 @@ QPDF::getAllPages() void QPDF::getAllPagesInternal( QPDFObjectHandle cur_node, - std::vector<QPDFObjectHandle>& result, std::set<QPDFObjGen>& visited, std::set<QPDFObjGen>& seen) { - QPDFObjGen this_og = cur_node.getObjGen(); - if (visited.count(this_og) > 0) { + QPDFObjGen cur_node_og = cur_node.getObjGen(); + if (visited.count(cur_node_og) > 0) { throw QPDFExc( qpdf_e_pages, this->m->file->getName(), @@ -103,14 +105,19 @@ QPDF::getAllPagesInternal( 0, "Loop detected in /Pages structure (getAllPages)"); } - visited.insert(this_og); - std::string wanted_type; - if (cur_node.hasKey("/Kids")) { - wanted_type = "/Pages"; - QPDFObjectHandle kids = cur_node.getKey("/Kids"); - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) { - QPDFObjectHandle kid = kids.getArrayItem(i); + visited.insert(cur_node_og); + if (!cur_node.isDictionaryOfType("/Pages")) { + cur_node.warnIfPossible( + "/Type key should be /Pages but is not; overriding"); + cur_node.replaceKey("/Type", "/Pages"_qpdf); + } + auto kids = cur_node.getKey("/Kids"); + int n = kids.getArrayNItems(); + for (int i = 0; i < n; ++i) { + auto kid = kids.getArrayItem(i); + if (kid.hasKey("/Kids")) { + getAllPagesInternal(kid, visited, seen); + } else { if (!kid.isIndirect()) { QTC::TC("qpdf", "QPDF handle direct page object"); cur_node.warnIfPossible( @@ -129,23 +136,15 @@ QPDF::getAllPagesInternal( kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); kids.setArrayItem(i, kid); } - getAllPagesInternal(kid, result, visited, seen); + if (!kid.isDictionaryOfType("/Page")) { + kid.warnIfPossible( + "/Type key should be /Page but is not; overriding"); + kid.replaceKey("/Type", "/Page"_qpdf); + } + seen.insert(kid.getObjGen()); + m->all_pages.push_back(kid); } - } else { - wanted_type = "/Page"; - seen.insert(this_og); - result.push_back(cur_node); - } - - if (!cur_node.isDictionaryOfType(wanted_type)) { - warn( - qpdf_e_damaged_pdf, - "page tree node", - this->m->file->getLastOffset(), - "/Type key should be " + wanted_type + " but is not; overriding"); - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); } - visited.erase(this_og); } void |