aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-09-07 22:49:31 +0200
committerJay Berkenbilt <ejb@ql.org>2022-09-08 16:19:38 +0200
commit264e25f391f83bcbeb60590f18ff96719b086454 (patch)
treead7a6daae5a2350d56a521eb022fea30e799b95c
parenta615985865ca73249a7b21e2f28b440cb6c16636 (diff)
downloadqpdf-264e25f391f83bcbeb60590f18ff96719b086454.tar.zst
Clear owning QPDF information for all objects, not just indirect
-rw-r--r--TODO7
-rw-r--r--include/qpdf/QPDFObjectHandle.hh18
-rw-r--r--libqpdf/QPDF.cc23
-rw-r--r--libqpdf/QPDFObjectHandle.cc11
-rw-r--r--libqpdf/QPDFWriter.cc8
-rw-r--r--libqpdf/QPDF_Array.cc6
-rw-r--r--libqpdf/QPDF_Dictionary.cc8
-rw-r--r--libqpdf/QPDF_Stream.cc7
-rw-r--r--libqpdf/SparseOHArray.cc8
-rw-r--r--libqpdf/qpdf/QPDFValue.hh4
-rw-r--r--libqpdf/qpdf/QPDFValueProxy.hh17
-rw-r--r--libqpdf/qpdf/QPDF_Array.hh1
-rw-r--r--libqpdf/qpdf/QPDF_Dictionary.hh1
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh1
-rw-r--r--libqpdf/qpdf/SparseOHArray.hh1
-rw-r--r--qpdf/test_driver.cc35
16 files changed, 141 insertions, 15 deletions
diff --git a/TODO b/TODO
index f91a7d65..cdec24b7 100644
--- a/TODO
+++ b/TODO
@@ -811,3 +811,10 @@ Rejected Ideas
Note that arrays and dictionaries still need to contain
QPDFObjectHandle because of indirect objects. This only pertains to
direct objects, which are always "resolved" in QPDFObjectHandle.
+
+ If this is addressed, read comments in QPDFWriter.cc::enqueueObject
+ near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
+ and comments in QPDF::~QPDF() near the line that assigns to null.
+ This will also affect test 92 in test_driver.cc. All these
+ references were from the release of qpdf 11 (in case they have moved
+ by such time as this might be resurrected).
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index f3c8c8eb..76e36eae 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -1534,6 +1534,23 @@ class QPDFObjectHandle
};
friend class ObjAccessor;
+ // Provide access to specific classes for recursive
+ // reset().
+ class Resetter
+ {
+ friend class QPDF_Dictionary;
+ friend class QPDF_Stream;
+ friend class SparseOHArray;
+
+ private:
+ static void
+ reset(QPDFObjectHandle& o)
+ {
+ o.reset();
+ }
+ };
+ friend class Resetter;
+
// Convenience routine: Throws if the assumption is violated. Your
// code will be better if you call one of the isType methods and
// handle the case of the type being wrong, but these can be
@@ -1631,6 +1648,7 @@ class QPDFObjectHandle
bool first_level_only,
bool stop_at_streams);
void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only);
+ void reset();
void setParsedOffset(qpdf_offset_t offset);
void parseContentStream_internal(
std::string const& description, ParserCallbacks* callbacks);
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 9764cf70..40390055 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -247,19 +247,24 @@ QPDF::~QPDF()
// having an array or dictionary that contains an indirect
// reference to the other), the circular references in the
// std::shared_ptr objects will prevent the objects from being
- // deleted. Walk through all objects in the object cache, which
- // is those objects that we read from the file, and break all
- // resolved indirect references by replacing them with direct
- // null objects. At this point, obviously no one is still
- // using the QPDF object, but we'll explicitly clear the xref
- // table anyway just to prevent any possibility of resolve()
- // succeeding. Note that we can't break references like this at
- // any time when the QPDF object is active.
+ // deleted. Walk through all objects in the object cache, which is
+ // those objects that we read from the file, and break all
+ // resolved indirect references by replacing them with direct null
+ // objects. At this point, obviously no one is still using the
+ // QPDF object, but we'll explicitly clear the xref table anyway
+ // just to prevent any possibility of resolve() succeeding. Note
+ // that we can't break references like this at any time when the
+ // QPDF object is active. This also causes all QPDFObjectHandle
+ // objects that are reachable from this object to become nulls and
+ // release their association with this QPDF.
this->m->xref_table.clear();
auto null_obj = QPDF_Null::create();
for (auto const& iter: this->m->obj_cache) {
+ iter.second.object->reset();
+ // If the issue discussed in QPDFValueProxy::reset were
+ // resolved, then this assignment to null_obj could be
+ // removed.
iter.second.object->assign(null_obj);
- iter.second.object->resetObjGen();
}
}
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 3d011c1d..fa8aae7f 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const
return this->obj != rhs.obj;
}
+void
+QPDFObjectHandle::reset()
+{
+ // Recursively remove association with any QPDF object. This
+ // method may only be called during final destruction. See
+ // comments in QPDF::~QPDF().
+ if (!isIndirect()) {
+ this->obj->reset();
+ }
+}
+
qpdf_object_type_e
QPDFObjectHandle::getTypeCode()
{
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 028f73dc..83785465 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -1198,6 +1198,14 @@ void
QPDFWriter::enqueueObject(QPDFObjectHandle object)
{
if (object.isIndirect()) {
+ // This owner check should really be done for all objects, not
+ // just indirect objects. As of the time of the release of
+ // qpdf 11, it is known that there are cases of direct objects
+ // from other files getting copied into multiple QPDF objects.
+ // This definitely happens in the page splitting code. If we
+ // were to implement strong checks to prevent objects from
+ // having multiple owners, once that was complete phased in,
+ // this check could be moved outside the if statement.
if (object.getOwningQPDF() != &(this->m->pdf)) {
QTC::TC("qpdf", "QPDFWriter foreign object");
throw std::logic_error(
diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc
index 5cce8909..010efd1b 100644
--- a/libqpdf/QPDF_Array.cc
+++ b/libqpdf/QPDF_Array.cc
@@ -34,6 +34,12 @@ QPDF_Array::shallowCopy()
return create(elements);
}
+void
+QPDF_Array::reset()
+{
+ elements.reset();
+}
+
std::string
QPDF_Array::unparse()
{
diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc
index 41760993..00d9f098 100644
--- a/libqpdf/QPDF_Dictionary.cc
+++ b/libqpdf/QPDF_Dictionary.cc
@@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy()
return create(items);
}
+void
+QPDF_Dictionary::reset()
+{
+ for (auto& iter: this->items) {
+ QPDFObjectHandle::Resetter::reset(iter.second);
+ }
+}
+
std::string
QPDF_Dictionary::unparse()
{
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index bf11bd92..dd927dfb 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -168,6 +168,13 @@ QPDF_Stream::getFilterOnWrite() const
}
void
+QPDF_Stream::reset()
+{
+ this->stream_provider = nullptr;
+ QPDFObjectHandle::Resetter::reset(this->stream_dict);
+}
+
+void
QPDF_Stream::setObjGen(QPDFObjGen const& og)
{
if (this->og.isIndirect()) {
diff --git a/libqpdf/SparseOHArray.cc b/libqpdf/SparseOHArray.cc
index 7aa553df..a5a8e4e8 100644
--- a/libqpdf/SparseOHArray.cc
+++ b/libqpdf/SparseOHArray.cc
@@ -49,6 +49,14 @@ SparseOHArray::remove_last()
}
void
+SparseOHArray::reset()
+{
+ for (auto& iter: this->elements) {
+ QPDFObjectHandle::Resetter::reset(iter.second);
+ }
+}
+
+void
SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh)
{
if (idx >= this->n_elements) {
diff --git a/libqpdf/qpdf/QPDFValue.hh b/libqpdf/qpdf/QPDFValue.hh
index 69f7eeda..71078ec8 100644
--- a/libqpdf/qpdf/QPDFValue.hh
+++ b/libqpdf/qpdf/QPDFValue.hh
@@ -63,6 +63,10 @@ class QPDFValue
{
return og;
}
+ virtual void
+ reset()
+ {
+ }
protected:
QPDFValue() :
diff --git a/libqpdf/qpdf/QPDFValueProxy.hh b/libqpdf/qpdf/QPDFValueProxy.hh
index 992ad115..ca8db544 100644
--- a/libqpdf/qpdf/QPDFValueProxy.hh
+++ b/libqpdf/qpdf/QPDFValueProxy.hh
@@ -110,8 +110,21 @@ class QPDFValueProxy
value->og = og;
}
void
- resetObjGen()
- {
+ reset()
+ {
+ value->reset();
+ // It would be better if, rather than clearing value->qpdf and
+ // value->og, we completely replaced value with a null object.
+ // However, at the time of the release of qpdf 11, this causes
+ // test failures and would likely break a lot of code since it
+ // possible for a direct object that recursively contains no
+ // indirect objects to be copied into multiple QPDF objects.
+ // For that reason, we have to break the association with the
+ // owning QPDF but not otherwise mutate the object. For
+ // indirect objects, QPDF::~QPDF replaces the object with
+ // null, which clears circular references. If this code were
+ // able to do the null replacement, that code would not have
+ // to.
value->qpdf = nullptr;
value->og = QPDFObjGen();
}
diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh
index 4bae85f7..33dd1881 100644
--- a/libqpdf/qpdf/QPDF_Array.hh
+++ b/libqpdf/qpdf/QPDF_Array.hh
@@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse();
virtual JSON getJSON(int json_version);
+ virtual void reset();
int getNItems() const;
QPDFObjectHandle getItem(int n) const;
diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh
index 11970161..5f3e54fa 100644
--- a/libqpdf/qpdf/QPDF_Dictionary.hh
+++ b/libqpdf/qpdf/QPDF_Dictionary.hh
@@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse();
virtual JSON getJSON(int json_version);
+ virtual void reset();
// hasKey() and getKeys() treat keys with null values as if they
// aren't there. getKey() returns null for the value of a
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index 2f38e486..c448697d 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue
virtual std::string unparse();
virtual JSON getJSON(int json_version);
virtual void setDescription(QPDF*, std::string const&);
+ virtual void reset();
QPDFObjectHandle getDict() const;
bool isDataModified() const;
void setFilterOnWrite(bool);
diff --git a/libqpdf/qpdf/SparseOHArray.hh b/libqpdf/qpdf/SparseOHArray.hh
index 21a0a9c9..4ebe762c 100644
--- a/libqpdf/qpdf/SparseOHArray.hh
+++ b/libqpdf/qpdf/SparseOHArray.hh
@@ -15,6 +15,7 @@ class SparseOHArray
void setAt(size_t idx, QPDFObjectHandle oh);
void erase(size_t idx);
void insert(size_t idx, QPDFObjectHandle oh);
+ void reset();
typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator
const_iterator;
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 46dda0d0..2446baf8 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -3274,13 +3274,40 @@ test_92(QPDF& pdf, char const* arg2)
{
// Exercise indirect objects owned by destroyed QPDF object.
auto qpdf = QPDF::create();
- qpdf->emptyPDF();
+ qpdf->processFile("minimal.pdf");
auto root = qpdf->getRoot();
- assert(root.getOwningQPDF() != nullptr);
+ assert(root.getOwningQPDF() == qpdf.get());
assert(root.isIndirect());
+ assert(root.isDictionary());
+ auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0);
+ assert(page1.getOwningQPDF() == qpdf.get());
+ assert(page1.isIndirect());
+ assert(page1.isDictionary());
+ auto resources = page1.getKey("/Resources");
+ assert(resources.getOwningQPDF() == qpdf.get());
+ assert(resources.isDictionary());
+ assert(!resources.isIndirect());
+ auto contents = page1.getKey("/Contents");
+ auto contents_dict = contents.getDict();
qpdf = nullptr;
- assert(root.getOwningQPDF() == nullptr);
- assert(!root.isIndirect());
+ auto check = [](QPDFObjectHandle& oh) {
+ assert(oh.getOwningQPDF() == nullptr);
+ assert(!oh.isIndirect());
+ };
+ // All objects should no longer have an owning QPDF or be indirect.
+ check(root);
+ check(page1);
+ check(resources);
+ check(contents);
+ check(contents_dict);
+ // Objects that were originally indirect should be null.
+ // Otherwise, they should have retained their old values. See
+ // comments in QPDFValueProxy::reset for why this is the case.
+ assert(root.isNull());
+ assert(page1.isNull());
+ assert(contents.isNull());
+ assert(!resources.isNull());
+ assert(!contents_dict.isNull());
}
static void