diff options
Diffstat (limited to 'libqpdf')
-rw-r--r-- | libqpdf/QPDF.cc | 87 | ||||
-rw-r--r-- | libqpdf/QPDFParser.cc | 5 | ||||
-rw-r--r-- | libqpdf/QPDFWriter.cc | 2 |
3 files changed, 37 insertions, 57 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 7446c6da..86846675 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e) } this->m->reconstructed_xref = true; + // We may find more objects, which may contain dangling references. + this->m->fixed_dangling_refs = false; warn(damagedPDF("", 0, "file is damaged")); warn(e); @@ -1290,65 +1292,36 @@ QPDF::showXRefTable() } } +// Resolve all objects in the xref table. If this triggers a xref table +// reconstruction abort and return false. Otherwise return true. +bool +QPDF::resolveXRefTable() +{ + bool may_change = !this->m->reconstructed_xref; + for (auto& iter: this->m->xref_table) { + if (isUnresolved(iter.first)) { + resolve(iter.first); + if (may_change && this->m->reconstructed_xref) { + return false; + } + } + } + return true; +} + +// Ensure all objects in the pdf file, including those in indirect +// references, appear in the object cache. void QPDF::fixDanglingReferences(bool force) { - if (this->m->fixed_dangling_refs && (!force)) { + if (this->m->fixed_dangling_refs) { return; } - this->m->fixed_dangling_refs = true; - - // Create a set of all known indirect objects including those - // we've previously resolved and those that we have created. - std::set<QPDFObjGen> to_process; - for (auto const& iter: this->m->obj_cache) { - to_process.insert(iter.first); - } - for (auto const& iter: this->m->xref_table) { - to_process.insert(iter.first); - } - - // For each non-scalar item to process, put it in the queue. - std::list<QPDFObjectHandle> queue; - queue.push_back(this->m->trailer); - for (auto const& og: to_process) { - auto obj = getObject(og); - if (obj.isDictionary() || obj.isArray()) { - queue.push_back(obj); - } else if (obj.isStream()) { - queue.push_back(obj.getDict()); - } - } - - // Process the queue by recursively resolving all object - // references. We don't need to do loop detection because we don't - // traverse known indirect objects when processing the queue. - while (!queue.empty()) { - QPDFObjectHandle obj = queue.front(); - queue.pop_front(); - std::list<QPDFObjectHandle> to_check; - if (obj.isDictionary()) { - std::map<std::string, QPDFObjectHandle> members = - obj.getDictAsMap(); - for (auto const& iter: members) { - to_check.push_back(iter.second); - } - } else if (obj.isArray()) { - auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj); - arr->addExplicitElementsToList(to_check); - } - for (auto sub: to_check) { - if (sub.isIndirect()) { - if ((sub.getOwningQPDF() == this) && - isUnresolved(sub.getObjGen())) { - QTC::TC("qpdf", "QPDF detected dangling ref"); - queue.push_back(sub); - } - } else { - queue.push_back(sub); - } - } + if (!resolveXRefTable()) { + QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction"); + resolveXRefTable(); } + this->m->fixed_dangling_refs = true; } size_t @@ -1371,7 +1344,7 @@ QPDF::getAllObjects() { // After fixDanglingReferences is called, all objects are in the // object cache. - fixDanglingReferences(true); + fixDanglingReferences(); std::vector<QPDFObjectHandle> result; for (auto const& iter: this->m->obj_cache) { result.push_back(newIndirect(iter.first, iter.second.object)); @@ -1631,7 +1604,7 @@ QPDF::readObjectAtOffset( bool try_recovery, qpdf_offset_t offset, std::string const& description, - QPDFObjGen const& exp_og, + QPDFObjGen exp_og, QPDFObjGen& og, bool skip_cache_if_in_xref) { @@ -1814,7 +1787,7 @@ QPDF::readObjectAtOffset( } void -QPDF::resolve(QPDFObjGen const& og) +QPDF::resolve(QPDFObjGen og) { if (!isUnresolved(og)) { return; @@ -2082,6 +2055,8 @@ QPDF::reserveStream(QPDFObjGen const& og) QPDFObjectHandle QPDF::getObject(QPDFObjGen const& og) { + // This method is called by the parser and therefore must not + // resolve any objects. if (!isCached(og)) { m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); } diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 452e741b..eca55a71 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream) olist.at(size - 2).getIntValueAsInt(), olist.back().getIntValueAsInt()); if (ref_og.isIndirect()) { + // This action has the desirable side effect + // of causing dangling references (references + // to indirect objects that don't appear in + // the PDF) in any parsed object to appear in + // the object cache. object = context->getObject(ref_og); indirect_ref = true; } else { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index b29f75b9..b07aef53 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -2266,7 +2266,7 @@ QPDFWriter::prepareFileForWrite() // Make document extension level information direct as required by // the spec. - this->m->pdf.fixDanglingReferences(true); + this->m->pdf.fixDanglingReferences(); QPDFObjectHandle root = this->m->pdf.getRoot(); for (auto const& key: root.getKeys()) { QPDFObjectHandle oh = root.getKey(key); |