From 3f632458ae15b5c63b639e46bf2d89653401d8aa Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 24 Nov 2022 16:50:46 +0000 Subject: Refactor QPDF::fixDanglingReferences --- libqpdf/QPDF.cc | 83 ++++++++++++++++++++++----------------------------- libqpdf/QPDFParser.cc | 5 ++++ qpdf/qpdf.testcov | 1 - 3 files changed, 40 insertions(+), 49 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 7446c6da..7e3326e8 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e) } this->m->reconstructed_xref = true; + // We may find more objects, which may contain dangling references. + this->m->fixed_dangling_refs = false; warn(damagedPDF("", 0, "file is damaged")); warn(e); @@ -1290,65 +1292,48 @@ QPDF::showXRefTable() } } +// Ensure all objects in the pdf file, including those in indirect references, +// appear in the object cache. void QPDF::fixDanglingReferences(bool force) { - if (this->m->fixed_dangling_refs && (!force)) { + if (this->m->fixed_dangling_refs && !force) { return; } - this->m->fixed_dangling_refs = true; - - // Create a set of all known indirect objects including those - // we've previously resolved and those that we have created. - std::set to_process; - for (auto const& iter: this->m->obj_cache) { - to_process.insert(iter.first); - } - for (auto const& iter: this->m->xref_table) { - to_process.insert(iter.first); - } - // For each non-scalar item to process, put it in the queue. - std::list queue; - queue.push_back(this->m->trailer); - for (auto const& og: to_process) { - auto obj = getObject(og); - if (obj.isDictionary() || obj.isArray()) { - queue.push_back(obj); - } else if (obj.isStream()) { - queue.push_back(obj.getDict()); - } - } - - // Process the queue by recursively resolving all object - // references. We don't need to do loop detection because we don't - // traverse known indirect objects when processing the queue. - while (!queue.empty()) { - QPDFObjectHandle obj = queue.front(); - queue.pop_front(); - std::list to_check; - if (obj.isDictionary()) { - std::map members = - obj.getDictAsMap(); - for (auto const& iter: members) { - to_check.push_back(iter.second); + if (!this->m->fixed_dangling_refs) { + // First pass is only run if the the xref table has not been + // reconstructed. It will be terminated as soon as reconstruction is + // triggered. + if (!this->m->reconstructed_xref) { + for (auto const& iter: this->m->xref_table) { + auto og = iter.first; + if (!isCached(og)) { + m->obj_cache[og] = + ObjCache(QPDF_Unresolved::create(this, og), -1, -1); + if (this->m->reconstructed_xref) { + break; + } + } } - } else if (obj.isArray()) { - auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj); - arr->addExplicitElementsToList(to_check); - } - for (auto sub: to_check) { - if (sub.isIndirect()) { - if ((sub.getOwningQPDF() == this) && - isUnresolved(sub.getObjGen())) { - QTC::TC("qpdf", "QPDF detected dangling ref"); - queue.push_back(sub); + } + // Second pass is skipped if the first pass did not trigger + // reconstruction of the xref table. + if (this->m->reconstructed_xref) { + for (auto const& iter: this->m->xref_table) { + auto og = iter.first; + if (!isCached(og)) { + m->obj_cache[og] = + ObjCache(QPDF_Unresolved::create(this, og), -1, -1); } - } else { - queue.push_back(sub); } } } + // Final pass adds all indirect references to the object cache. + for (auto const& iter: this->m->obj_cache) { + resolve(iter.first); + } + this->m->fixed_dangling_refs = true; } size_t @@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const& og) QPDFObjectHandle QPDF::getObject(QPDFObjGen const& og) { + // This method is called by the parser and therefore must not + // resolve any objects. if (!isCached(og)) { m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); } diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 452e741b..eca55a71 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream) olist.at(size - 2).getIntValueAsInt(), olist.back().getIntValueAsInt()); if (ref_og.isIndirect()) { + // This action has the desirable side effect + // of causing dangling references (references + // to indirect objects that don't appear in + // the PDF) in any parsed object to appear in + // the object cache. object = context->getObject(ref_og); indirect_ref = true; } else { diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index d9215dd1..b1c9e697 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0 QPDFFormFieldObjectHelper list found 0 QPDFFormFieldObjectHelper list first too low 0 QPDFFormFieldObjectHelper list last too high 0 -QPDF detected dangling ref 0 QPDFJob image optimize no pipeline 0 QPDFJob image optimize no shrink 0 QPDFJob image optimize too small 0 -- cgit v1.2.3-70-g09d2