aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-11-19 22:01:54 +0100
committerJay Berkenbilt <jberkenbilt@users.noreply.github.com>2022-11-19 23:03:17 +0100
commite9980efec87a7a678a1a00cfaf8fc60263c54d24 (patch)
treec3b1379065ff9b1af7b9147ea90c516abf8daf62
parentd79a823d66512b5f5db2c0db739d1a867fcd5774 (diff)
downloadqpdf-e9980efec87a7a678a1a00cfaf8fc60263c54d24.tar.zst
Correctly handle reuse of xref stream (fixes #809)
-rw-r--r--ChangeLog6
-rw-r--r--include/qpdf/QPDF.hh3
-rw-r--r--libqpdf/QPDF.cc63
-rw-r--r--libqpdf/QPDF_linearization.cc7
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf/reuse-xref-stream-obj9.out1
-rw-r--r--qpdf/qtest/qpdf/reuse-xref-stream-xref.out10
-rw-r--r--qpdf/qtest/qpdf/reuse-xref-stream.pdfbin0 -> 1447 bytes
-rw-r--r--qpdf/qtest/xref-streams.test13
9 files changed, 92 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 185b77e6..bb2293ae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2022-11-19 Jay Berkenbilt <ejb@ql.org>
+
+ * Bug fix: handle special case of an earlier xref stream object's
+ object number being reused by an update made by appending the
+ file. Fixes #809.
+
2022-10-08 Jay Berkenbilt <ejb@ql.org>
* Fix major performance bug with the openssl crypto provider when
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 06afc44f..987b78aa 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1209,7 +1209,8 @@ class QPDF
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen const& exp_og,
- QPDFObjGen& og);
+ QPDFObjGen& og,
+ bool skip_cache_if_in_xref);
void resolve(QPDFObjGen const& og);
void resolveObjectsInStream(int obj_stream_number);
void stopOnError(std::string const& message);
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 262f61e6..8e9a52f8 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
QPDFObjectHandle xref_obj;
try {
xref_obj = readObjectAtOffset(
- false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og);
+ false,
+ xref_offset,
+ "xref stream",
+ QPDFObjGen(0, 0),
+ x_og,
+ true);
} catch (QPDFExc&) {
// ignore -- report error below
}
@@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset(
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen const& exp_og,
- QPDFObjGen& og)
+ QPDFObjGen& og,
+ bool skip_cache_if_in_xref)
{
bool check_og = true;
if (exp_og.getObj() == 0) {
@@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset(
qpdf_offset_t new_offset =
this->m->xref_table[exp_og].getOffset();
QPDFObjectHandle result = readObjectAtOffset(
- false, new_offset, description, exp_og, og);
+ false, new_offset, description, exp_og, og, false);
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
return result;
} else {
@@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset(
}
}
qpdf_offset_t end_after_space = this->m->file->tell();
- updateCache(
- og,
- QPDFObjectHandle::ObjAccessor::getObject(oh),
- end_before_space,
- end_after_space);
+ if (skip_cache_if_in_xref && this->m->xref_table.count(og)) {
+ // Ordinarily, an object gets read here when resolved
+ // through xref table or stream. In the special case of
+ // the xref stream and linearization hint tables, the
+ // offset comes from another source. For the specific case
+ // of xref streams, the xref stream is read and loaded
+ // into the object cache very early in parsing.
+ // Ordinarily, when a file is updated by appending, items
+ // inserted into the xref table in later updates take
+ // precedence over earlier items. In the special case of
+ // reusing the object number previously used as the xref
+ // stream, we have the following order of events:
+ //
+ // * reused object gets loaded into the xref table
+ // * old object is read here while reading xref streams
+ // * original xref entry is ignored (since already in xref table)
+ //
+ // It is the second step that causes a problem. Even
+ // though the xref table is correct in this case, the old
+ // object is already in the cache and so effectively
+ // prevails over the reused object. To work around this
+ // issue, we have a special case for the xref stream (via
+ // the skip_cache_if_in_xref): if the object is already in
+ // the xref stream, don't cache what we read here.
+ //
+ // It is likely that the same bug may exist for
+ // linearization hint tables, but the existing code uses
+ // end_before_space and end_after_space from the cache, so
+ // fixing that would require more significant rework. The
+ // chances of a linearization hint stream being reused
+ // seems smaller because the xref stream is probably the
+ // highest object in the file and the linearization hint
+ // stream would be some random place in the middle, so I'm
+ // leaving that bug unfixed for now. If the bug were to be
+ // fixed, we could use !check_og in place of
+ // skip_cache_if_in_xref.
+ QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
+ } else {
+ updateCache(
+ og,
+ QPDFObjectHandle::ObjAccessor::getObject(oh),
+ end_before_space,
+ end_after_space);
+ }
}
return oh;
@@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og)
// Object stored in cache by readObjectAtOffset
QPDFObjGen a_og;
QPDFObjectHandle oh =
- readObjectAtOffset(true, offset, "", og, a_og);
+ readObjectAtOffset(true, offset, "", og, a_og, false);
}
break;
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
index 888b4262..ac7e3efe 100644
--- a/libqpdf/QPDF_linearization.cc
+++ b/libqpdf/QPDF_linearization.cc
@@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
{
QPDFObjGen og;
QPDFObjectHandle H = readObjectAtOffset(
- false, offset, "linearization hint stream", QPDFObjGen(0, 0), og);
+ false,
+ offset,
+ "linearization hint stream",
+ QPDFObjGen(0, 0),
+ og,
+ false);
ObjCache& oc = this->m->obj_cache[og];
qpdf_offset_t min_end_offset = oc.end_before_space;
qpdf_offset_t max_end_offset = oc.end_after_space;
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index e89f63a0..f99d3f74 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0
QPDF_json bad pushedinheritedpageresources 0
QPDFPageObjectHelper copied fallback 0
QPDFPageObjectHelper used fallback without copying 0
+QPDF skipping cache for known unchecked object 0
diff --git a/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out b/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
new file mode 100644
index 00000000..b2a0b987
--- /dev/null
+++ b/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
@@ -0,0 +1 @@
+<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
diff --git a/qpdf/qtest/qpdf/reuse-xref-stream-xref.out b/qpdf/qtest/qpdf/reuse-xref-stream-xref.out
new file mode 100644
index 00000000..becc91f3
--- /dev/null
+++ b/qpdf/qtest/qpdf/reuse-xref-stream-xref.out
@@ -0,0 +1,10 @@
+1/0: uncompressed; offset = 25
+2/0: compressed; stream = 1, index = 0
+3/0: compressed; stream = 1, index = 1
+4/0: compressed; stream = 1, index = 2
+5/0: compressed; stream = 1, index = 3
+6/0: uncompressed; offset = 1054
+7/0: uncompressed; offset = 692
+8/0: uncompressed; offset = 791
+9/0: uncompressed; offset = 1087
+10/0: uncompressed; offset = 1196
diff --git a/qpdf/qtest/qpdf/reuse-xref-stream.pdf b/qpdf/qtest/qpdf/reuse-xref-stream.pdf
new file mode 100644
index 00000000..96cb983f
--- /dev/null
+++ b/qpdf/qtest/qpdf/reuse-xref-stream.pdf
Binary files differ
diff --git a/qpdf/qtest/xref-streams.test b/qpdf/qtest/xref-streams.test
index 10a32763..8d8e8bd9 100644
--- a/qpdf/qtest/xref-streams.test
+++ b/qpdf/qtest/xref-streams.test
@@ -14,7 +14,7 @@ cleanup();
my $td = new TestDriver('xref-streams');
-my $n_tests = 3;
+my $n_tests = 5;
# Handle xref stream with more entries than reported (bug 2872265)
$td->runtest("xref with short size",
@@ -32,6 +32,17 @@ $td->runtest("show new xref stream",
{$td->FILE => "xref-with-short-size-new.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+# Handle appended file that reuses the xref stream object number
+$td->runtest("override xref stream - xref",
+ {$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"},
+ {$td->FILE => "reuse-xref-stream-xref.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("override xref stream - object",
+ {$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"},
+ {$td->FILE => "reuse-xref-stream-obj9.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
cleanup();
$td->report($n_tests);