summaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2021-03-01 22:43:35 +0100
committerJay Berkenbilt <ejb@ql.org>2021-03-03 23:05:49 +0100
commite17585c2d2df9fea296364c0768c2ce5adbc4b91 (patch)
tree356a5b3c7096175ed1ed08d1535a1e4d50f0484d /libqpdf
parenta15ec6967dd3312223a6ab7d4198655234e1a4bf (diff)
downloadqpdf-e17585c2d2df9fea296364c0768c2ce5adbc4b91.tar.zst
Remove unreferenced: ignore names that are not Fonts or XObjects
Converted ResourceFinder to ParserCallbacks so we can better detect the name that precedes various operators and use the operators to sort the names into resource types. This enables us to be smarter about detecting unreferenced resources in pages and also sets the stage for reconciling differences in /DR across documents.
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/QPDFPageObjectHelper.cc33
-rw-r--r--libqpdf/ResourceFinder.cc55
-rw-r--r--libqpdf/qpdf/ResourceFinder.hh13
3 files changed, 75 insertions, 26 deletions
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc
index 58144a3f..344ff15e 100644
--- a/libqpdf/QPDFPageObjectHelper.cc
+++ b/libqpdf/QPDFPageObjectHelper.cc
@@ -684,7 +684,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
ResourceFinder rf;
try
{
- ph.filterContents(&rf);
+ ph.parseContents(&rf);
}
catch (std::exception& e)
{
@@ -711,9 +711,9 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
std::vector<QPDFObjectHandle> rdicts;
std::set<std::string> known_names;
+ std::vector<std::string> to_filter = {"/Font", "/XObject"};
if (resources.isDictionary())
{
- std::vector<std::string> to_filter = {"/Font", "/XObject"};
for (auto const& iter: to_filter)
{
QPDFObjectHandle dict = resources.getKey(iter);
@@ -729,12 +729,17 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
}
std::set<std::string> local_unresolved;
- for (auto const& name: rf.getNames())
+ auto names_by_rtype = rf.getNamesByResourceType();
+ for (auto const& i1: to_filter)
{
- if (! known_names.count(name))
+ for (auto const& n_iter: names_by_rtype[i1])
{
- unresolved.insert(name);
- local_unresolved.insert(name);
+ std::string const& name = n_iter.first;
+ if (! known_names.count(name))
+ {
+ unresolved.insert(name);
+ local_unresolved.insert(name);
+ }
}
}
// Older versions of the PDF spec allowed form XObjects to omit
@@ -754,11 +759,17 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
if ((! local_unresolved.empty()) && resources.isDictionary())
{
- // Don't issue a warning for this case. There are some cases
- // of names that aren't XObject references, for example,
- // /Artifact in tagged PDF. Until we are certain that we know
- // the meaning of every name in a content stream, we don't
- // want to give warnings because they will be false positives.
+ // It's not worth issuing a warning for this case. From qpdf
+ // 10.3, we are hopefully only looking at names that are
+ // referencing fonts and XObjects, but until we're certain
+ // that we know the meaning of every name in a content stream,
+ // we don't want to give warnings that might be false
+ // positives. Also, this can happen in legitimate cases with
+ // older PDFs, and there's nothing to be done about it, so
+ // there's no good reason to issue a warning. The only sad
+ // thing is that it was a false positive that alerted me to a
+ // logic error in the code, and any future such errors would
+ // now be hidden.
QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
return false;
}
diff --git a/libqpdf/ResourceFinder.cc b/libqpdf/ResourceFinder.cc
index 74ba671f..6b9929e4 100644
--- a/libqpdf/ResourceFinder.cc
+++ b/libqpdf/ResourceFinder.cc
@@ -1,28 +1,53 @@
#include <qpdf/ResourceFinder.hh>
ResourceFinder::ResourceFinder() :
+ last_name_offset(0),
saw_bad(false)
{
}
void
-ResourceFinder::handleToken(QPDFTokenizer::Token const& token)
+ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t)
{
- if ((token.getType() == QPDFTokenizer::tt_word) &&
- (! this->last_name.empty()))
+ if (obj.isOperator() && (! this->last_name.empty()))
{
- this->names.insert(this->last_name);
+ static std::map<std::string, std::string> op_to_rtype = {
+ {"CS", "/ColorSpace"},
+ {"cs", "/ColorSpace"},
+ {"gs", "/ExtGState"},
+ {"Tf", "/Font"},
+ {"SCN", "/Pattern"},
+ {"scn", "/Pattern"},
+ {"BDC", "/Properties"},
+ {"DP", "/Properties"},
+ {"sh", "/Shading"},
+ {"Do", "/XObject"},
+ };
+ std::string op = obj.getOperatorValue();
+ std::string resource_type;
+ auto iter = op_to_rtype.find(op);
+ if (iter != op_to_rtype.end())
+ {
+ resource_type = iter->second;
+ }
+ if (! resource_type.empty())
+ {
+ this->names.insert(this->last_name);
+ this->names_by_resource_type[
+ resource_type][this->last_name].insert(this->last_name_offset);
+ }
}
- else if (token.getType() == QPDFTokenizer::tt_name)
+ else if (obj.isName())
{
- this->last_name =
- QPDFObjectHandle::newName(token.getValue()).getName();
+ this->last_name = obj.getName();
+ this->last_name_offset = offset;
}
- else if (token.getType() == QPDFTokenizer::tt_bad)
- {
- saw_bad = true;
- }
- writeToken(token);
+}
+
+void
+ResourceFinder::handleWarning()
+{
+ this->saw_bad = true;
}
std::set<std::string> const&
@@ -31,6 +56,12 @@ ResourceFinder::getNames() const
return this->names;
}
+std::map<std::string, std::map<std::string, std::set<size_t>>> const&
+ResourceFinder::getNamesByResourceType() const
+{
+ return this->names_by_resource_type;
+}
+
bool
ResourceFinder::sawBad() const
{
diff --git a/libqpdf/qpdf/ResourceFinder.hh b/libqpdf/qpdf/ResourceFinder.hh
index 0ac74eab..ac3d5b4c 100644
--- a/libqpdf/qpdf/ResourceFinder.hh
+++ b/libqpdf/qpdf/ResourceFinder.hh
@@ -3,19 +3,26 @@
#include <qpdf/QPDFObjectHandle.hh>
-class ResourceFinder: public QPDFObjectHandle::TokenFilter
+class ResourceFinder: public QPDFObjectHandle::ParserCallbacks
{
public:
ResourceFinder();
virtual ~ResourceFinder() = default;
- virtual void handleToken(QPDFTokenizer::Token const&) override;
+ virtual void handleObject(QPDFObjectHandle, size_t, size_t) override;
+ virtual void handleWarning() override;
std::set<std::string> const& getNames() const;
+ std::map<std::string,
+ std::map<std::string,
+ std::set<size_t>>> const& getNamesByResourceType() const;
bool sawBad() const;
private:
std::string last_name;
+ size_t last_name_offset;
std::set<std::string> names;
- std::map<std::string, std::set<std::string>> names_by_resource_type;
+ std::map<std::string,
+ std::map<std::string,
+ std::set<size_t>>> names_by_resource_type;
bool saw_bad;
};