From 5cfcd4f361063df8e216489915758ce40a15f15b Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 17 Jan 2019 08:56:58 -0500 Subject: Additional checks for unreferenced resources Explicitly abandon removal of unreferenced resources if there are any lexical errors in the page's contents. This case always generated a warning, but it now also prevents removal of unreferenced resources, this strongly decreasing the likelihood of data loss. --- libqpdf/QPDFPageObjectHelper.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'libqpdf/QPDFPageObjectHelper.cc') diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index ba647372..c5ede04f 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -99,11 +99,16 @@ QPDFPageObjectHelper::addContentTokenFilter( class NameWatcher: public QPDFObjectHandle::TokenFilter { public: + NameWatcher() : + saw_bad(false) + { + } virtual ~NameWatcher() { } virtual void handleToken(QPDFTokenizer::Token const&); std::set names; + bool saw_bad; }; void @@ -116,6 +121,10 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) this->names.insert( QPDFObjectHandle::newName(token.getValue()).getName()); } + else if (token.getType() == QPDFTokenizer::tt_bad) + { + saw_bad = true; + } writeToken(token); } @@ -134,6 +143,14 @@ QPDFPageObjectHelper::removeUnreferencedResources() "; not attempting to remove unreferenced objects from this page"); return; } + if (nw.saw_bad) + { + QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); + this->oh.warnIfPossible( + "Bad token found while scanning content stream; " + "not attempting to remove unreferenced objects from this page"); + return; + } // Walk through /Font and /XObject dictionaries, removing any // resources that are not referenced. We must make copies of // resource dictionaries down into the dictionaries are mutating -- cgit v1.2.3-54-g00ecf