diff options
Diffstat (limited to 'libqpdf/QPDF_optimization.cc')
-rw-r--r-- | libqpdf/QPDF_optimization.cc | 380 |
1 files changed, 178 insertions, 202 deletions
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index bf799a78..3d262b03 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -2,10 +2,10 @@ #include <qpdf/QPDF.hh> -#include <qpdf/QTC.hh> #include <qpdf/QPDFExc.hh> -#include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Array.hh> +#include <qpdf/QPDF_Dictionary.hh> +#include <qpdf/QTC.hh> #include <assert.h> QPDF::ObjUser::ObjUser() : @@ -39,18 +39,12 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : bool QPDF::ObjUser::operator<(ObjUser const& rhs) const { - if (this->ou_type < rhs.ou_type) - { + if (this->ou_type < rhs.ou_type) { return true; - } - else if (this->ou_type == rhs.ou_type) - { - if (this->pageno < rhs.pageno) - { + } else if (this->ou_type == rhs.ou_type) { + if (this->pageno < rhs.pageno) { return true; - } - else if (this->pageno == rhs.pageno) - { + } else if (this->pageno == rhs.pageno) { return (this->key < rhs.key); } } @@ -59,19 +53,18 @@ QPDF::ObjUser::operator<(ObjUser const& rhs) const } void -QPDF::optimize(std::map<int, int> const& object_stream_data, - bool allow_changes) +QPDF::optimize(std::map<int, int> const& object_stream_data, bool allow_changes) { optimize(object_stream_data, allow_changes, nullptr); } void -QPDF::optimize(std::map<int, int> const& object_stream_data, - bool allow_changes, - std::function<int(QPDFObjectHandle&)> skip_stream_parameters) +QPDF::optimize( + std::map<int, int> const& object_stream_data, + bool allow_changes, + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) { - if (! this->m->obj_user_to_objects.empty()) - { + if (!this->m->obj_user_to_objects.empty()) { // already optimized return; } @@ -80,11 +73,9 @@ QPDF::optimize(std::map<int, int> const& object_stream_data, // be an indirect reference. Force it to be so if it exists and // is direct. (This has been seen in the wild.) QPDFObjectHandle root = getRoot(); - if (root.getKey("/Outlines").isDictionary()) - { + if (root.getKey("/Outlines").isDictionary()) { QPDFObjectHandle outlines = root.getKey("/Outlines"); - if (! outlines.isIndirect()) - { + if (!outlines.isIndirect()) { QTC::TC("qpdf", "QPDF_optimization indirect outlines"); root.replaceKey("/Outlines", makeIndirectObject(outlines)); } @@ -96,35 +87,33 @@ QPDF::optimize(std::map<int, int> const& object_stream_data, // Traverse pages int n = toI(this->m->all_pages.size()); - for (int pageno = 0; pageno < n; ++pageno) - { - updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), - this->m->all_pages.at(toS(pageno)), - skip_stream_parameters); + for (int pageno = 0; pageno < n; ++pageno) { + updateObjectMaps( + ObjUser(ObjUser::ou_page, pageno), + this->m->all_pages.at(toS(pageno)), + skip_stream_parameters); } // Traverse document-level items std::set<std::string> keys = this->m->trailer.getKeys(); for (std::set<std::string>::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { + iter != keys.end(); + ++iter) { std::string const& key = *iter; - if (key == "/Root") - { + if (key == "/Root") { // handled separately - } - else - { - updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key), - this->m->trailer.getKey(key), - skip_stream_parameters); + } else { + updateObjectMaps( + ObjUser(ObjUser::ou_trailer_key, key), + this->m->trailer.getKey(key), + skip_stream_parameters); } } keys = root.getKeys(); for (std::set<std::string>::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { + iter != keys.end(); + ++iter) { // Technically, /I keys from /Thread dictionaries are supposed // to be handled separately, but we are going to disregard // that specification for now. There is loads of evidence @@ -133,9 +122,10 @@ QPDF::optimize(std::map<int, int> const& object_stream_data, // problems. std::string const& key = *iter; - updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), - root.getKey(key), - skip_stream_parameters); + updateObjectMaps( + ObjUser(ObjUser::ou_root_key, key), + root.getKey(key), + skip_stream_parameters); } ObjUser root_ou = ObjUser(ObjUser::ou_root); @@ -162,8 +152,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // The record of whether we've done this is cleared by // updateAllPagesCache(). If we're warning for skipped keys, // re-traverse unconditionally. - if (this->m->pushed_inherited_attributes_to_pages && (! warn_skipped_keys)) - { + if (this->m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { return; } @@ -172,18 +161,19 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // key_ancestors is a mapping of page attribute keys to a stack of // Pages nodes that contain values for them. - std::map<std::string, std::vector<QPDFObjectHandle> > key_ancestors; + std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; this->m->all_pages.clear(); std::set<QPDFObjGen> visited; pushInheritedAttributesToPageInternal( this->m->trailer.getKey("/Root").getKey("/Pages"), - key_ancestors, this->m->all_pages, allow_changes, warn_skipped_keys, + key_ancestors, + this->m->all_pages, + allow_changes, + warn_skipped_keys, visited); - if (! key_ancestors.empty()) - { - throw std::logic_error( - "key_ancestors not empty after" - " pushing inherited attributes to pages"); + if (!key_ancestors.empty()) { + throw std::logic_error("key_ancestors not empty after" + " pushing inherited attributes to pages"); } this->m->pushed_inherited_attributes_to_pages = true; } @@ -191,34 +181,36 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) void QPDF::pushInheritedAttributesToPageInternal( QPDFObjectHandle cur_pages, - std::map<std::string, std::vector<QPDFObjectHandle> >& key_ancestors, + std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, std::vector<QPDFObjectHandle>& pages, - bool allow_changes, bool warn_skipped_keys, + bool allow_changes, + bool warn_skipped_keys, std::set<QPDFObjGen>& visited) { QPDFObjGen this_og = cur_pages.getObjGen(); - if (visited.count(this_og) > 0) - { + if (visited.count(this_og) > 0) { throw QPDFExc( - qpdf_e_pages, this->m->file->getName(), - this->m->last_object_description, 0, + qpdf_e_pages, + this->m->file->getName(), + this->m->last_object_description, + 0, "Loop detected in /Pages structure (inherited attributes)"); } visited.insert(this_og); - if (! cur_pages.isDictionary()) - { - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid object in page tree"); + if (!cur_pages.isDictionary()) { + throw QPDFExc( + qpdf_e_damaged_pdf, + this->m->file->getName(), + this->m->last_object_description, + this->m->file->getLastOffset(), + "invalid object in page tree"); } // Extract the underlying dictionary object std::string type = cur_pages.getKey("/Type").getName(); - if (type == "/Pages") - { + if (type == "/Pages") { // Make a list of inheritable keys. Only the keys /MediaBox, // /CropBox, /Resources, and /Rotate are inheritable // attributes. Push this object onto the stack of pages nodes @@ -227,69 +219,68 @@ QPDF::pushInheritedAttributesToPageInternal( std::set<std::string> inheritable_keys; std::set<std::string> keys = cur_pages.getKeys(); for (std::set<std::string>::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { + iter != keys.end(); + ++iter) { std::string const& key = *iter; - if ( (key == "/MediaBox") || (key == "/CropBox") || - (key == "/Resources") || (key == "/Rotate") ) - { - if (! allow_changes) - { - throw QPDFExc(qpdf_e_internal, this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "optimize detected an " - "inheritable attribute when called " - "in no-change mode"); + if ((key == "/MediaBox") || (key == "/CropBox") || + (key == "/Resources") || (key == "/Rotate")) { + if (!allow_changes) { + throw QPDFExc( + qpdf_e_internal, + this->m->file->getName(), + this->m->last_object_description, + this->m->file->getLastOffset(), + "optimize detected an " + "inheritable attribute when called " + "in no-change mode"); } // This is an inheritable resource inheritable_keys.insert(key); QPDFObjectHandle oh = cur_pages.getKey(key); - QTC::TC("qpdf", "QPDF opt direct pages resource", - oh.isIndirect() ? 0 : 1); - if (! oh.isIndirect()) - { - if (! oh.isScalar()) - { + QTC::TC( + "qpdf", + "QPDF opt direct pages resource", + oh.isIndirect() ? 0 : 1); + if (!oh.isIndirect()) { + if (!oh.isScalar()) { // Replace shared direct object non-scalar // resources with indirect objects to avoid // copying large structures around. cur_pages.replaceKey(key, makeIndirectObject(oh)); oh = cur_pages.getKey(key); - } - else - { + } else { // It's okay to copy scalars. QTC::TC("qpdf", "QPDF opt inherited scalar"); } } key_ancestors[key].push_back(oh); - if (key_ancestors[key].size() > 1) - { + if (key_ancestors[key].size() > 1) { QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); } // Remove this resource from this node. It will be // reattached at the page level. cur_pages.removeKey(key); - } - else if (! ((key == "/Type") || (key == "/Parent") || - (key == "/Kids") || (key == "/Count"))) - { + } else if (!((key == "/Type") || (key == "/Parent") || + (key == "/Kids") || (key == "/Count"))) { // Warn when flattening, but not if the key is at the top // level (i.e. "/Parent" not set), as we don't change these; // but flattening removes intermediate /Pages nodes. - if ( (warn_skipped_keys) && (cur_pages.hasKey("/Parent")) ) - { + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { QTC::TC("qpdf", "QPDF unknown key not inherited"); - setLastObjectDescription("Pages object", - cur_pages.getObjectID(), - cur_pages.getGeneration()); - warn(QPDFExc(qpdf_e_pages, this->m->file->getName(), - this->m->last_object_description, 0, - "Unknown key " + key + " in /Pages object" - " is being discarded as a result of" - " flattening the /Pages tree")); + setLastObjectDescription( + "Pages object", + cur_pages.getObjectID(), + cur_pages.getGeneration()); + warn(QPDFExc( + qpdf_e_pages, + this->m->file->getName(), + this->m->last_object_description, + 0, + "Unknown key " + key + + " in /Pages object" + " is being discarded as a result of" + " flattening the /Pages tree")); } } } @@ -297,11 +288,14 @@ QPDF::pushInheritedAttributesToPageInternal( // Visit descendant nodes. QPDFObjectHandle kids = cur_pages.getKey("/Kids"); int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) - { + for (int i = 0; i < n; ++i) { pushInheritedAttributesToPageInternal( - kids.getArrayItem(i), key_ancestors, pages, - allow_changes, warn_skipped_keys, visited); + kids.getArrayItem(i), + key_ancestors, + pages, + allow_changes, + warn_skipped_keys, + visited); } // For each inheritable key, pop the stack. If the stack @@ -309,61 +303,53 @@ QPDF::pushInheritedAttributesToPageInternal( // invariant that the list of keys in key_ancestors is exactly // those keys for which inheritable attributes are available. - if (! inheritable_keys.empty()) - { + if (!inheritable_keys.empty()) { QTC::TC("qpdf", "QPDF opt inheritable keys"); for (std::set<std::string>::iterator iter = inheritable_keys.begin(); - iter != inheritable_keys.end(); ++iter) - { + iter != inheritable_keys.end(); + ++iter) { std::string const& key = (*iter); key_ancestors[key].pop_back(); - if (key_ancestors[key].empty()) - { + if (key_ancestors[key].empty()) { QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); key_ancestors.erase(key); } } - } - else - { + } else { QTC::TC("qpdf", "QPDF opt no inheritable keys"); } - } - else if (type == "/Page") - { + } else if (type == "/Page") { // Add all available inheritable attributes not present in // this object to this object. - for (std::map<std::string, std::vector<QPDFObjectHandle> >::iterator + for (std::map<std::string, std::vector<QPDFObjectHandle>>::iterator iter = key_ancestors.begin(); - iter != key_ancestors.end(); ++iter) - { + iter != key_ancestors.end(); + ++iter) { std::string const& key = (*iter).first; - if (! cur_pages.hasKey(key)) - { + if (!cur_pages.hasKey(key)) { QTC::TC("qpdf", "QPDF opt resource inherited"); cur_pages.replaceKey(key, (*iter).second.back()); - } - else - { + } else { QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); } } pages.push_back(cur_pages); - } - else - { - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid Type " + type + " in page tree"); + } else { + throw QPDFExc( + qpdf_e_damaged_pdf, + this->m->file->getName(), + this->m->last_object_description, + this->m->file->getLastOffset(), + "invalid Type " + type + " in page tree"); } visited.erase(this_og); } void QPDF::updateObjectMaps( - ObjUser const& ou, QPDFObjectHandle oh, + ObjUser const& ou, + QPDFObjectHandle oh, std::function<int(QPDFObjectHandle&)> skip_stream_parameters) { std::set<QPDFObjGen> visited; @@ -372,9 +358,11 @@ QPDF::updateObjectMaps( void QPDF::updateObjectMapsInternal( - ObjUser const& ou, QPDFObjectHandle oh, + ObjUser const& ou, + QPDFObjectHandle oh, std::function<int(QPDFObjectHandle&)> skip_stream_parameters, - std::set<QPDFObjGen>& visited, bool top, + std::set<QPDFObjGen>& visited, + bool top, int depth) { // Traverse the object tree from this point taking care to avoid @@ -382,20 +370,16 @@ QPDF::updateObjectMapsInternal( bool is_page_node = false; - if (oh.isDictionaryOfType("/Page")) - { + if (oh.isDictionaryOfType("/Page")) { is_page_node = true; - if (! top) - { + if (!top) { return; } } - if (oh.isIndirect()) - { + if (oh.isIndirect()) { QPDFObjGen og(oh.getObjGen()); - if (visited.count(og)) - { + if (visited.count(og)) { QTC::TC("qpdf", "QPDF opt loop detected"); return; } @@ -404,60 +388,59 @@ QPDF::updateObjectMapsInternal( visited.insert(og); } - if (oh.isArray()) - { + if (oh.isArray()) { int n = oh.getArrayNItems(); - for (int i = 0; i < n; ++i) - { + for (int i = 0; i < n; ++i) { updateObjectMapsInternal( - ou, oh.getArrayItem(i), skip_stream_parameters, - visited, false, 1 + depth); + ou, + oh.getArrayItem(i), + skip_stream_parameters, + visited, + false, + 1 + depth); } - } - else if (oh.isDictionary() || oh.isStream()) - { + } else if (oh.isDictionary() || oh.isStream()) { QPDFObjectHandle dict = oh; bool is_stream = oh.isStream(); int ssp = 0; - if (is_stream) - { + if (is_stream) { dict = oh.getDict(); - if (skip_stream_parameters) - { + if (skip_stream_parameters) { ssp = skip_stream_parameters(oh); } } std::set<std::string> keys = dict.getKeys(); for (std::set<std::string>::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { + iter != keys.end(); + ++iter) { std::string const& key = *iter; - if (is_page_node && (key == "/Thumb")) - { + if (is_page_node && (key == "/Thumb")) { // Traverse page thumbnail dictionaries as a special // case. updateObjectMapsInternal( ObjUser(ObjUser::ou_thumb, ou.pageno), - dict.getKey(key), skip_stream_parameters, - visited, false, 1 + depth); - } - else if (is_page_node && (key == "/Parent")) - { + dict.getKey(key), + skip_stream_parameters, + visited, + false, + 1 + depth); + } else if (is_page_node && (key == "/Parent")) { // Don't traverse back up the page tree - } - else if (((ssp >= 1) && (key == "/Length")) || - ((ssp >= 2) && ((key == "/Filter") || - (key == "/DecodeParms")))) - { + } else if ( + ((ssp >= 1) && (key == "/Length")) || + ((ssp >= 2) && + ((key == "/Filter") || (key == "/DecodeParms")))) { // Don't traverse into stream parameters that we are // not going to write. - } - else - { + } else { updateObjectMapsInternal( - ou, dict.getKey(key), skip_stream_parameters, - visited, false, 1 + depth); + ou, + dict.getKey(key), + skip_stream_parameters, + visited, + false, + 1 + depth); } } } @@ -466,8 +449,7 @@ QPDF::updateObjectMapsInternal( void QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) { - if (object_stream_data.empty()) - { + if (object_stream_data.empty()) { return; } @@ -476,50 +458,44 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) // user of a compressed object, then it is really a user of the // object stream that contains it. - std::map<ObjUser, std::set<QPDFObjGen> > t_obj_user_to_objects; - std::map<QPDFObjGen, std::set<ObjUser> > t_object_to_obj_users; + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users; - for (std::map<ObjUser, std::set<QPDFObjGen> >::iterator i1 = + for (std::map<ObjUser, std::set<QPDFObjGen>>::iterator i1 = this->m->obj_user_to_objects.begin(); - i1 != this->m->obj_user_to_objects.end(); ++i1) - { + i1 != this->m->obj_user_to_objects.end(); + ++i1) { ObjUser const& ou = (*i1).first; std::set<QPDFObjGen> const& objects = (*i1).second; for (std::set<QPDFObjGen>::const_iterator i2 = objects.begin(); - i2 != objects.end(); ++i2) - { + i2 != objects.end(); + ++i2) { QPDFObjGen const& og = (*i2); std::map<int, int>::const_iterator i3 = object_stream_data.find(og.getObj()); - if (i3 == object_stream_data.end()) - { + if (i3 == object_stream_data.end()) { t_obj_user_to_objects[ou].insert(og); - } - else - { + } else { t_obj_user_to_objects[ou].insert(QPDFObjGen((*i3).second, 0)); } } } - for (std::map<QPDFObjGen, std::set<ObjUser> >::iterator i1 = + for (std::map<QPDFObjGen, std::set<ObjUser>>::iterator i1 = this->m->object_to_obj_users.begin(); - i1 != this->m->object_to_obj_users.end(); ++i1) - { + i1 != this->m->object_to_obj_users.end(); + ++i1) { QPDFObjGen const& og = (*i1).first; std::set<ObjUser> const& objusers = (*i1).second; for (std::set<ObjUser>::const_iterator i2 = objusers.begin(); - i2 != objusers.end(); ++i2) - { + i2 != objusers.end(); + ++i2) { ObjUser const& ou = (*i2); std::map<int, int>::const_iterator i3 = object_stream_data.find(og.getObj()); - if (i3 == object_stream_data.end()) - { + if (i3 == object_stream_data.end()) { t_object_to_obj_users[og].insert(ou); - } - else - { + } else { t_object_to_obj_users[QPDFObjGen((*i3).second, 0)].insert(ou); } } |