From a01359189b32c60c2d55b039f7aefd6c3ce0ebde Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 4 Jan 2019 10:17:33 -0500 Subject: Fix dangling references (fixes #240) On certain operations, such as iterating through all objects and adding new indirect objects, walk through the entire object structure and explicitly resolve any indirect references to non-existent objects. That prevents new objects from springing into existence and causing the previously dangling references to point to them. --- libqpdf/QPDF.cc | 127 ++++++++++++++++++++++++++++++++++++++++++++------ libqpdf/QPDFWriter.cc | 1 + 2 files changed, 113 insertions(+), 15 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 75ee1f13..caea6424 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -94,6 +94,7 @@ QPDF::Members::Members() : pushed_inherited_attributes_to_pages(false), copied_stream_data_provider(0), reconstructed_xref(false), + fixed_dangling_refs(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -1218,33 +1219,129 @@ QPDF::showXRefTable() } } +void +QPDF::fixDanglingReferences(bool force) +{ + if (this->m->fixed_dangling_refs && (! force)) + { + return; + } + this->m->fixed_dangling_refs = true; + + // Create a set of all known indirect objects including those + // we've previously resolved and those that we have created. + std::set to_process; + for (std::map::iterator iter = + this->m->obj_cache.begin(); + iter != this->m->obj_cache.end(); ++iter) + { + to_process.insert((*iter).first); + } + for (std::map::iterator iter = + this->m->xref_table.begin(); + iter != this->m->xref_table.end(); ++iter) + { + to_process.insert((*iter).first); + } + + // For each non-scalar item to process, put it in the queue. + std::list queue; + queue.push_back(this->m->trailer); + for (std::set::iterator iter = to_process.begin(); + iter != to_process.end(); ++iter) + { + QPDFObjectHandle obj = QPDFObjectHandle::Factory::newIndirect( + this, (*iter).getObj(), (*iter).getGen()); + if (obj.isDictionary() || obj.isArray()) + { + queue.push_back(obj); + } + else if (obj.isStream()) + { + queue.push_back(obj.getDict()); + } + } + + // Process the queue by recursively resolving all object + // references. We don't need to do loop detection because we don't + // traverse known indirect objects when processing the queue. + while (! queue.empty()) + { + QPDFObjectHandle obj = queue.front(); + queue.pop_front(); + std::list to_check; + if (obj.isDictionary()) + { + std::map members = + obj.getDictAsMap(); + for (std::map::iterator iter = + members.begin(); + iter != members.end(); ++iter) + { + to_check.push_back((*iter).second); + } + } + else if (obj.isArray()) + { + std::vector elements = obj.getArrayAsVector(); + for (std::vector::iterator iter = + elements.begin(); + iter != elements.end(); ++iter) + { + to_check.push_back(*iter); + } + } + for (std::list::iterator iter = to_check.begin(); + iter != to_check.end(); ++iter) + { + QPDFObjectHandle sub = *iter; + if (sub.isIndirect()) + { + if (sub.getOwningQPDF() == this) + { + QPDFObjGen og(sub.getObjGen()); + if (this->m->obj_cache.count(og) == 0) + { + QTC::TC("qpdf", "QPDF detected dangling ref"); + queue.push_back(sub); + } + } + } + else + { + queue.push_back(sub); + } + } + + } +} + size_t QPDF::getObjectCount() { // This method returns the next available indirect object number. - // makeIndirectObject uses it for this purpose. - QPDFObjGen o1(0, 0); + // makeIndirectObject uses it for this purpose. After + // fixDanglingReferences is called, all objects in the xref table + // will also be in obj_cache. + fixDanglingReferences(); + QPDFObjGen og(0, 0); if (! this->m->obj_cache.empty()) { - o1 = (*(this->m->obj_cache.rbegin())).first; - } - QPDFObjGen o2(0, 0); - if (! this->m->xref_table.empty()) - { - o2 = (*(this->m->xref_table.rbegin())).first; + og = (*(this->m->obj_cache.rbegin())).first; } - QTC::TC("qpdf", "QPDF indirect last obj from xref", - (o2.getObj() > o1.getObj()) ? 1 : 0); - return std::max(o1.getObj(), o2.getObj()); + return og.getObj(); } std::vector QPDF::getAllObjects() { + // After fixDanglingReferences is called, all objects are in the + // object cache. + fixDanglingReferences(true); std::vector result; - for (std::map::iterator iter = - this->m->xref_table.begin(); - iter != this->m->xref_table.end(); ++iter) + for (std::map::iterator iter = + this->m->obj_cache.begin(); + iter != this->m->obj_cache.end(); ++iter) { QPDFObjGen const& og = (*iter).first; @@ -1752,7 +1849,6 @@ QPDF::resolve(int objid, int generation) } ResolveRecorder rr(this, og); - // PDF spec says unknown objects resolve to the null object. if ((! this->m->obj_cache.count(og)) && this->m->xref_table.count(og)) { QPDFXRefEntry const& entry = this->m->xref_table[og]; @@ -1800,6 +1896,7 @@ QPDF::resolve(int objid, int generation) } if (this->m->obj_cache.count(og) == 0) { + // PDF spec says unknown objects resolve to the null object. QTC::TC("qpdf", "QPDF resolve failure to null"); QPDFObjectHandle oh = QPDFObjectHandle::newNull(); this->m->obj_cache[og] = diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 71c48333..22b990ff 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -2244,6 +2244,7 @@ QPDFWriter::prepareFileForWrite() // includes stream lengths, stream filtering parameters, and // document extension level information. + this->m->pdf.fixDanglingReferences(true); std::list queue; queue.push_back(getTrimmedTrailer()); std::set visited; -- cgit v1.2.3-54-g00ecf