aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2021-02-25 12:34:03 +0100
committerJay Berkenbilt <ejb@ql.org>2021-02-25 13:32:46 +0100
commita4d6589ff26007c966db8912e4dae1aa937a5968 (patch)
tree7eeba29b6e1b59a69f9e60105c1de4b587246473
parentec6719fd25ebd49c43142a607353bad5df7874aa (diff)
downloadqpdf-a4d6589ff26007c966db8912e4dae1aa937a5968.tar.zst
Have QPDFObjectHandle notice when replaceObject was called
This results in a performance penalty of 1% to 2% when replaceObject and swapObjects are never called and a somewhat larger penalty if they are called, but it's worth it to avoid very confusing behavior as discussed in depth in qpdf#507.
-rw-r--r--ChangeLog7
-rw-r--r--include/qpdf/QPDF.hh41
-rw-r--r--libqpdf/QPDF.cc27
-rw-r--r--libqpdf/QPDFObjectHandle.cc6
-rw-r--r--manual/qpdf-manual.xml26
-rw-r--r--qpdf/qtest/qpdf/test14-in.pdf34
-rw-r--r--qpdf/qtest/qpdf/test14-out.pdf28
-rw-r--r--qpdf/qtest/qpdf/test14.out4
-rw-r--r--qpdf/test_driver.cc20
9 files changed, 133 insertions, 60 deletions
diff --git a/ChangeLog b/ChangeLog
index e7af70d4..7deada34 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2021-02-25 Jay Berkenbilt <ejb@ql.org>
+
+ * Bug fix/behavior change: when QPDF::replaceObject or
+ QPDF::swapObjects is called, existing QPDFObjectHandle instances
+ will now notice the change. This removes a long-standing source of
+ bugs and confusing behavior.
+
2021-02-23 Jay Berkenbilt <ejb@ql.org>
* The test for the input and output files being the same wasn't
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index d4b7e775..f9434024 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -278,34 +278,26 @@ class QPDF
QPDFObjectHandle getObjectByID(int objid, int generation);
// Replace the object with the given object id with the given
- // object. The object handle passed in must be a direct object,
+ // object. The object handle passed in must be a direct object,
// though it may contain references to other indirect objects
- // within it. Calling this method can have somewhat confusing
- // results. Any existing QPDFObjectHandle instances that point to
- // the old object and that have been resolved (which happens
- // automatically if you access them in any way) will continue to
- // point to the old object even though that object will no longer
- // be associated with the PDF file. Note that replacing an object
- // with QPDFObjectHandle::newNull() effectively removes the object
- // from the file since a non-existent object is treated as a null
- // object. To replace a reserved object, call replaceReserved
+ // within it. Prior to qpdf 10.2.1, after calling this method,
+ // existing QPDFObjectHandle instances that pointed to the
+ // original object still pointed to the original object, resulting
+ // in confusing and incorrect behavior. This was fixed in 10.2.1,
+ // so existing QPDFObjectHandle objects will start pointing to the
+ // newly replaced object. Note that replacing an object with
+ // QPDFObjectHandle::newNull() effectively removes the object from
+ // the file since a non-existent object is treated as a null
+ // object. To replace a reserved object, call replaceReserved
// instead.
QPDF_DLL
void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
QPDF_DLL
void replaceObject(int objid, int generation, QPDFObjectHandle);
- // Swap two objects given by ID. Calling this method can have
- // confusing results. After swapping two objects, existing
- // QPDFObjectHandle instances that reference them will still
- // reference the same underlying objects, at which point those
- // existing QPDFObjectHandle instances will have incorrect
- // information about the object and generation number of those
- // objects. While this does not necessarily cause a problem, it
- // can certainly be confusing. It is therefore recommended that
- // you replace any existing QPDFObjectHandle instances that point
- // to the swapped objects with new ones, possibly by calling
- // getObjectByID.
+ // Swap two objects given by ID. Prior to qpdf 10.2.1, existing
+ // QPDFObjectHandle instances that reference them objects not
+ // notice the swap, but this was fixed in 10.2.1.
QPDF_DLL
void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
QPDF_DLL
@@ -697,6 +689,11 @@ class QPDF
{
return qpdf->resolve(objid, generation);
}
+ static bool objectChanged(
+ QPDF* qpdf, QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
+ {
+ return qpdf->objectChanged(og, oph);
+ }
};
friend class Resolver;
@@ -930,6 +927,7 @@ class QPDF
qpdf_offset_t offset, std::string const& description,
int exp_objid, int exp_generation,
int& act_objid, int& act_generation);
+ bool objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph);
PointerHolder<QPDFObject> resolve(int objid, int generation);
void resolveObjectsInStream(int obj_stream_number);
void stopOnError(std::string const& message);
@@ -1441,6 +1439,7 @@ class QPDF
bool in_parse;
bool parsed;
std::set<int> resolved_object_streams;
+ bool ever_replaced_objects;
// Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 1935d420..b751e602 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -210,6 +210,7 @@ QPDF::Members::Members() :
immediate_copy_from(false),
in_parse(false),
parsed(false),
+ ever_replaced_objects(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery,
return oh;
}
+bool
+QPDF::objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
+{
+ // See if the object cached at og, if any, is the one passed in.
+ // QPDFObjectHandle uses this to detect outdated handles to
+ // replaced or swapped objects. This is a somewhat expensive check
+ // because it happens with every dereference of a
+ // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the
+ // check if we never called a function that replaces an object
+ // already in cache. It is important for functions that do this to
+ // set ever_replaced_objects = true.
+
+ if (! this->m->ever_replaced_objects)
+ {
+ return false;
+ }
+ auto c = this->m->obj_cache.find(og);
+ if (c == this->m->obj_cache.end())
+ {
+ return true;
+ }
+ return (c->second.object.getPointer() != oph.getPointer());
+}
+
PointerHolder<QPDFObject>
QPDF::resolve(int objid, int generation)
{
@@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
// Replace the object in the object cache
QPDFObjGen og(objid, generation);
+ this->m->ever_replaced_objects = true;
this->m->obj_cache[og] =
ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
}
@@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
QPDFObjGen og1(objid1, generation1);
QPDFObjGen og2(objid2, generation2);
ObjCache t = this->m->obj_cache[og1];
+ this->m->ever_replaced_objects = true;
this->m->obj_cache[og1] = this->m->obj_cache[og2];
this->m->obj_cache[og2] = t;
}
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index c650bdea..6d4f10ce 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference()
throw std::logic_error(
"attempted to dereference an uninitialized QPDFObjectHandle");
}
+ if (this->obj.getPointer() && this->objid &&
+ QPDF::Resolver::objectChanged(
+ this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
+ {
+ this->obj = nullptr;
+ }
if (this->obj.getPointer() == 0)
{
PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index 565967db..7efbf938 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -5061,6 +5061,32 @@ print "\n";
</varlistentry>
-->
<varlistentry>
+ <term>XXX 10.2.1: Month dd, YYYY</term>
+ <listitem>
+ <itemizedlist>
+ <listitem>
+ <para>
+ Bug Fixes
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ When <function>QPDF::replaceObject</function> or
+ <function>QPDF::swapObjects</function> is called, existing
+ <classname>QPDFObjectHandle</classname> instances no longer
+ point to the old objects. The next time they are
+ accessed, they automatically notice the change to the
+ underlying object and update themselves. This resolves a
+ very longstanding source of confusion, albeit in a very
+ rarely used method call.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term>10.2.0: February 23, 2021</term>
<listitem>
<itemizedlist>
diff --git a/qpdf/qtest/qpdf/test14-in.pdf b/qpdf/qtest/qpdf/test14-in.pdf
index 4d761020..d0cd490a 100644
--- a/qpdf/qtest/qpdf/test14-in.pdf
+++ b/qpdf/qtest/qpdf/test14-in.pdf
@@ -65,6 +65,7 @@ endobj
612
792
]
+ /OrigPage 2
/Parent 4 0 R
/Resources <<
/Font <<
@@ -86,6 +87,7 @@ endobj
612
792
]
+ /OrigPage 3
/Parent 4 0 R
/Resources <<
/Font <<
@@ -237,21 +239,21 @@ xref
0000000140 00000 n
0000000252 00000 n
0000000456 00000 n
-0000000661 00000 n
-0000000866 00000 n
-0000001084 00000 n
-0000001186 00000 n
-0000001206 00000 n
-0000001325 00000 n
-0000001384 00000 n
-0000001487 00000 n
-0000001507 00000 n
-0000001566 00000 n
-0000001669 00000 n
-0000001689 00000 n
-0000001748 00000 n
-0000001851 00000 n
-0000001871 00000 n
+0000000675 00000 n
+0000000894 00000 n
+0000001112 00000 n
+0000001214 00000 n
+0000001234 00000 n
+0000001353 00000 n
+0000001412 00000 n
+0000001515 00000 n
+0000001535 00000 n
+0000001594 00000 n
+0000001697 00000 n
+0000001717 00000 n
+0000001776 00000 n
+0000001879 00000 n
+0000001899 00000 n
trailer <<
/QArray 2 0 R
/QDict 3 0 R
@@ -260,5 +262,5 @@ trailer <<
/ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>]
>>
startxref
-1907
+1935
%%EOF
diff --git a/qpdf/qtest/qpdf/test14-out.pdf b/qpdf/qtest/qpdf/test14-out.pdf
index 315d174d..96c25fbd 100644
--- a/qpdf/qtest/qpdf/test14-out.pdf
+++ b/qpdf/qtest/qpdf/test14-out.pdf
@@ -16,10 +16,10 @@ endobj
<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
endobj
6 0 obj
-<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
+<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
endobj
7 0 obj
-<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
+<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
endobj
8 0 obj
<< /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >>
@@ -88,18 +88,18 @@ xref
0000000122 00000 n
0000000199 00000 n
0000000344 00000 n
-0000000490 00000 n
-0000000636 00000 n
-0000000782 00000 n
-0000000877 00000 n
-0000000985 00000 n
-0000001016 00000 n
-0000001112 00000 n
-0000001143 00000 n
-0000001239 00000 n
-0000001270 00000 n
-0000001366 00000 n
+0000000502 00000 n
+0000000660 00000 n
+0000000806 00000 n
+0000000901 00000 n
+0000001009 00000 n
+0000001040 00000 n
+0000001136 00000 n
+0000001167 00000 n
+0000001263 00000 n
+0000001294 00000 n
+0000001390 00000 n
trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >>
startxref
-1397
+1421
%%EOF
diff --git a/qpdf/qtest/qpdf/test14.out b/qpdf/qtest/qpdf/test14.out
index 65b640b9..b6bce056 100644
--- a/qpdf/qtest/qpdf/test14.out
+++ b/qpdf/qtest/qpdf/test14.out
@@ -1,6 +1,6 @@
caught logic error as expected
-old dict: 1
-old dict: 1
+old dict: 2
+swapped array: /Array
new dict: 2
swapped array: /Array
array and dictionary contents are correct
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 0ab25fb3..614d9025 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2)
" not called 4-page file");
}
// Swap pages 2 and 3
- pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen());
+ auto orig_page2 = pages.at(1);
+ auto orig_page3 = pages.at(2);
+ assert(orig_page2.getKey("/OrigPage").getIntValue() == 2);
+ assert(orig_page3.getKey("/OrigPage").getIntValue() == 3);
+ pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen());
+ assert(orig_page2.getKey("/OrigPage").getIntValue() == 3);
+ assert(orig_page3.getKey("/OrigPage").getIntValue() == 2);
// Replace object and swap objects
QPDFObjectHandle trailer = pdf.getTrailer();
QPDFObjectHandle qdict = trailer.getKey("/QDict");
@@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2)
std::cout << "caught logic error as expected" << std::endl;
}
pdf.replaceObject(qdict.getObjGen(), new_dict);
- // Now qdict still points to the old dictionary
- std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
+ // Now qdict points to the new dictionary
+ std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue()
<< std::endl;
// Swap dict and array
pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen());
- // Now qarray will resolve to new object but qdict is still
- // the old object
- std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
+ // Now qarray will resolve to new object and qdict resolves to
+ // the array
+ std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
<< std::endl;
std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue()
<< std::endl;
- // Reread qdict, now pointing to an array
+ // Reread qdict, still pointing to an array
qdict = pdf.getObjectByObjGen(qdict.getObjGen());
std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
<< std::endl;