diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDFPageObjectHelper.cc | 33 | ||||
-rw-r--r-- | libqpdf/ResourceFinder.cc | 55 | ||||
-rw-r--r-- | libqpdf/qpdf/ResourceFinder.hh | 13 |
3 files changed, 75 insertions, 26 deletions
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 58144a3f..344ff15e 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -684,7 +684,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( ResourceFinder rf; try { - ph.filterContents(&rf); + ph.parseContents(&rf); } catch (std::exception& e) { @@ -711,9 +711,9 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( QPDFObjectHandle resources = ph.getAttribute("/Resources", true); std::vector<QPDFObjectHandle> rdicts; std::set<std::string> known_names; + std::vector<std::string> to_filter = {"/Font", "/XObject"}; if (resources.isDictionary()) { - std::vector<std::string> to_filter = {"/Font", "/XObject"}; for (auto const& iter: to_filter) { QPDFObjectHandle dict = resources.getKey(iter); @@ -729,12 +729,17 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( } std::set<std::string> local_unresolved; - for (auto const& name: rf.getNames()) + auto names_by_rtype = rf.getNamesByResourceType(); + for (auto const& i1: to_filter) { - if (! known_names.count(name)) + for (auto const& n_iter: names_by_rtype[i1]) { - unresolved.insert(name); - local_unresolved.insert(name); + std::string const& name = n_iter.first; + if (! known_names.count(name)) + { + unresolved.insert(name); + local_unresolved.insert(name); + } } } // Older versions of the PDF spec allowed form XObjects to omit @@ -754,11 +759,17 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( if ((! local_unresolved.empty()) && resources.isDictionary()) { - // Don't issue a warning for this case. There are some cases - // of names that aren't XObject references, for example, - // /Artifact in tagged PDF. Until we are certain that we know - // the meaning of every name in a content stream, we don't - // want to give warnings because they will be false positives. + // It's not worth issuing a warning for this case. From qpdf + // 10.3, we are hopefully only looking at names that are + // referencing fonts and XObjects, but until we're certain + // that we know the meaning of every name in a content stream, + // we don't want to give warnings that might be false + // positives. Also, this can happen in legitimate cases with + // older PDFs, and there's nothing to be done about it, so + // there's no good reason to issue a warning. The only sad + // thing is that it was a false positive that alerted me to a + // logic error in the code, and any future such errors would + // now be hidden. QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); return false; } diff --git a/libqpdf/ResourceFinder.cc b/libqpdf/ResourceFinder.cc index 74ba671f..6b9929e4 100644 --- a/libqpdf/ResourceFinder.cc +++ b/libqpdf/ResourceFinder.cc @@ -1,28 +1,53 @@ #include <qpdf/ResourceFinder.hh> ResourceFinder::ResourceFinder() : + last_name_offset(0), saw_bad(false) { } void -ResourceFinder::handleToken(QPDFTokenizer::Token const& token) +ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) { - if ((token.getType() == QPDFTokenizer::tt_word) && - (! this->last_name.empty())) + if (obj.isOperator() && (! this->last_name.empty())) { - this->names.insert(this->last_name); + static std::map<std::string, std::string> op_to_rtype = { + {"CS", "/ColorSpace"}, + {"cs", "/ColorSpace"}, + {"gs", "/ExtGState"}, + {"Tf", "/Font"}, + {"SCN", "/Pattern"}, + {"scn", "/Pattern"}, + {"BDC", "/Properties"}, + {"DP", "/Properties"}, + {"sh", "/Shading"}, + {"Do", "/XObject"}, + }; + std::string op = obj.getOperatorValue(); + std::string resource_type; + auto iter = op_to_rtype.find(op); + if (iter != op_to_rtype.end()) + { + resource_type = iter->second; + } + if (! resource_type.empty()) + { + this->names.insert(this->last_name); + this->names_by_resource_type[ + resource_type][this->last_name].insert(this->last_name_offset); + } } - else if (token.getType() == QPDFTokenizer::tt_name) + else if (obj.isName()) { - this->last_name = - QPDFObjectHandle::newName(token.getValue()).getName(); + this->last_name = obj.getName(); + this->last_name_offset = offset; } - else if (token.getType() == QPDFTokenizer::tt_bad) - { - saw_bad = true; - } - writeToken(token); +} + +void +ResourceFinder::handleWarning() +{ + this->saw_bad = true; } std::set<std::string> const& @@ -31,6 +56,12 @@ ResourceFinder::getNames() const return this->names; } +std::map<std::string, std::map<std::string, std::set<size_t>>> const& +ResourceFinder::getNamesByResourceType() const +{ + return this->names_by_resource_type; +} + bool ResourceFinder::sawBad() const { diff --git a/libqpdf/qpdf/ResourceFinder.hh b/libqpdf/qpdf/ResourceFinder.hh index 0ac74eab..ac3d5b4c 100644 --- a/libqpdf/qpdf/ResourceFinder.hh +++ b/libqpdf/qpdf/ResourceFinder.hh @@ -3,19 +3,26 @@ #include <qpdf/QPDFObjectHandle.hh> -class ResourceFinder: public QPDFObjectHandle::TokenFilter +class ResourceFinder: public QPDFObjectHandle::ParserCallbacks { public: ResourceFinder(); virtual ~ResourceFinder() = default; - virtual void handleToken(QPDFTokenizer::Token const&) override; + virtual void handleObject(QPDFObjectHandle, size_t, size_t) override; + virtual void handleWarning() override; std::set<std::string> const& getNames() const; + std::map<std::string, + std::map<std::string, + std::set<size_t>>> const& getNamesByResourceType() const; bool sawBad() const; private: std::string last_name; + size_t last_name_offset; std::set<std::string> names; - std::map<std::string, std::set<std::string>> names_by_resource_type; + std::map<std::string, + std::map<std::string, + std::set<size_t>>> names_by_resource_type; bool saw_bad; }; |