aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/QPDF.cc87
-rw-r--r--libqpdf/QPDFParser.cc5
-rw-r--r--libqpdf/QPDFWriter.cc2
3 files changed, 37 insertions, 57 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 7446c6da..86846675 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
}
this->m->reconstructed_xref = true;
+ // We may find more objects, which may contain dangling references.
+ this->m->fixed_dangling_refs = false;
warn(damagedPDF("", 0, "file is damaged"));
warn(e);
@@ -1290,65 +1292,36 @@ QPDF::showXRefTable()
}
}
+// Resolve all objects in the xref table. If this triggers a xref table
+// reconstruction abort and return false. Otherwise return true.
+bool
+QPDF::resolveXRefTable()
+{
+ bool may_change = !this->m->reconstructed_xref;
+ for (auto& iter: this->m->xref_table) {
+ if (isUnresolved(iter.first)) {
+ resolve(iter.first);
+ if (may_change && this->m->reconstructed_xref) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Ensure all objects in the pdf file, including those in indirect
+// references, appear in the object cache.
void
QPDF::fixDanglingReferences(bool force)
{
- if (this->m->fixed_dangling_refs && (!force)) {
+ if (this->m->fixed_dangling_refs) {
return;
}
- this->m->fixed_dangling_refs = true;
-
- // Create a set of all known indirect objects including those
- // we've previously resolved and those that we have created.
- std::set<QPDFObjGen> to_process;
- for (auto const& iter: this->m->obj_cache) {
- to_process.insert(iter.first);
- }
- for (auto const& iter: this->m->xref_table) {
- to_process.insert(iter.first);
- }
-
- // For each non-scalar item to process, put it in the queue.
- std::list<QPDFObjectHandle> queue;
- queue.push_back(this->m->trailer);
- for (auto const& og: to_process) {
- auto obj = getObject(og);
- if (obj.isDictionary() || obj.isArray()) {
- queue.push_back(obj);
- } else if (obj.isStream()) {
- queue.push_back(obj.getDict());
- }
- }
-
- // Process the queue by recursively resolving all object
- // references. We don't need to do loop detection because we don't
- // traverse known indirect objects when processing the queue.
- while (!queue.empty()) {
- QPDFObjectHandle obj = queue.front();
- queue.pop_front();
- std::list<QPDFObjectHandle> to_check;
- if (obj.isDictionary()) {
- std::map<std::string, QPDFObjectHandle> members =
- obj.getDictAsMap();
- for (auto const& iter: members) {
- to_check.push_back(iter.second);
- }
- } else if (obj.isArray()) {
- auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj);
- arr->addExplicitElementsToList(to_check);
- }
- for (auto sub: to_check) {
- if (sub.isIndirect()) {
- if ((sub.getOwningQPDF() == this) &&
- isUnresolved(sub.getObjGen())) {
- QTC::TC("qpdf", "QPDF detected dangling ref");
- queue.push_back(sub);
- }
- } else {
- queue.push_back(sub);
- }
- }
+ if (!resolveXRefTable()) {
+ QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
+ resolveXRefTable();
}
+ this->m->fixed_dangling_refs = true;
}
size_t
@@ -1371,7 +1344,7 @@ QPDF::getAllObjects()
{
// After fixDanglingReferences is called, all objects are in the
// object cache.
- fixDanglingReferences(true);
+ fixDanglingReferences();
std::vector<QPDFObjectHandle> result;
for (auto const& iter: this->m->obj_cache) {
result.push_back(newIndirect(iter.first, iter.second.object));
@@ -1631,7 +1604,7 @@ QPDF::readObjectAtOffset(
bool try_recovery,
qpdf_offset_t offset,
std::string const& description,
- QPDFObjGen const& exp_og,
+ QPDFObjGen exp_og,
QPDFObjGen& og,
bool skip_cache_if_in_xref)
{
@@ -1814,7 +1787,7 @@ QPDF::readObjectAtOffset(
}
void
-QPDF::resolve(QPDFObjGen const& og)
+QPDF::resolve(QPDFObjGen og)
{
if (!isUnresolved(og)) {
return;
@@ -2082,6 +2055,8 @@ QPDF::reserveStream(QPDFObjGen const& og)
QPDFObjectHandle
QPDF::getObject(QPDFObjGen const& og)
{
+ // This method is called by the parser and therefore must not
+ // resolve any objects.
if (!isCached(og)) {
m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
}
diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc
index 452e741b..eca55a71 100644
--- a/libqpdf/QPDFParser.cc
+++ b/libqpdf/QPDFParser.cc
@@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream)
olist.at(size - 2).getIntValueAsInt(),
olist.back().getIntValueAsInt());
if (ref_og.isIndirect()) {
+ // This action has the desirable side effect
+ // of causing dangling references (references
+ // to indirect objects that don't appear in
+ // the PDF) in any parsed object to appear in
+ // the object cache.
object = context->getObject(ref_og);
indirect_ref = true;
} else {
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index b29f75b9..b07aef53 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -2266,7 +2266,7 @@ QPDFWriter::prepareFileForWrite()
// Make document extension level information direct as required by
// the spec.
- this->m->pdf.fixDanglingReferences(true);
+ this->m->pdf.fixDanglingReferences();
QPDFObjectHandle root = this->m->pdf.getRoot();
for (auto const& key: root.getKeys()) {
QPDFObjectHandle oh = root.getKey(key);