From e9980efec87a7a678a1a00cfaf8fc60263c54d24 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 19 Nov 2022 16:01:54 -0500 Subject: Correctly handle reuse of xref stream (fixes #809) --- libqpdf/QPDF.cc | 63 ++++++++++++++++++++++++++++++++++++------- libqpdf/QPDF_linearization.cc | 7 ++++- 2 files changed, 60 insertions(+), 10 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 262f61e6..8e9a52f8 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) QPDFObjectHandle xref_obj; try { xref_obj = readObjectAtOffset( - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og); + false, + xref_offset, + "xref stream", + QPDFObjGen(0, 0), + x_og, + true); } catch (QPDFExc&) { // ignore -- report error below } @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( qpdf_offset_t offset, std::string const& description, QPDFObjGen const& exp_og, - QPDFObjGen& og) + QPDFObjGen& og, + bool skip_cache_if_in_xref) { bool check_og = true; if (exp_og.getObj() == 0) { @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( qpdf_offset_t new_offset = this->m->xref_table[exp_og].getOffset(); QPDFObjectHandle result = readObjectAtOffset( - false, new_offset, description, exp_og, og); + false, new_offset, description, exp_og, og, false); QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); return result; } else { @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( } } qpdf_offset_t end_after_space = this->m->file->tell(); - updateCache( - og, - QPDFObjectHandle::ObjAccessor::getObject(oh), - end_before_space, - end_after_space); + if (skip_cache_if_in_xref && this->m->xref_table.count(og)) { + // Ordinarily, an object gets read here when resolved + // through xref table or stream. In the special case of + // the xref stream and linearization hint tables, the + // offset comes from another source. For the specific case + // of xref streams, the xref stream is read and loaded + // into the object cache very early in parsing. + // Ordinarily, when a file is updated by appending, items + // inserted into the xref table in later updates take + // precedence over earlier items. In the special case of + // reusing the object number previously used as the xref + // stream, we have the following order of events: + // + // * reused object gets loaded into the xref table + // * old object is read here while reading xref streams + // * original xref entry is ignored (since already in xref table) + // + // It is the second step that causes a problem. Even + // though the xref table is correct in this case, the old + // object is already in the cache and so effectively + // prevails over the reused object. To work around this + // issue, we have a special case for the xref stream (via + // the skip_cache_if_in_xref): if the object is already in + // the xref stream, don't cache what we read here. + // + // It is likely that the same bug may exist for + // linearization hint tables, but the existing code uses + // end_before_space and end_after_space from the cache, so + // fixing that would require more significant rework. The + // chances of a linearization hint stream being reused + // seems smaller because the xref stream is probably the + // highest object in the file and the linearization hint + // stream would be some random place in the middle, so I'm + // leaving that bug unfixed for now. If the bug were to be + // fixed, we could use !check_og in place of + // skip_cache_if_in_xref. + QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); + } else { + updateCache( + og, + QPDFObjectHandle::ObjAccessor::getObject(oh), + end_before_space, + end_after_space); + } } return oh; @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og) // Object stored in cache by readObjectAtOffset QPDFObjGen a_og; QPDFObjectHandle oh = - readObjectAtOffset(true, offset, "", og, a_og); + readObjectAtOffset(true, offset, "", og, a_og, false); } break; diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 888b4262..ac7e3efe 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) { QPDFObjGen og; QPDFObjectHandle H = readObjectAtOffset( - false, offset, "linearization hint stream", QPDFObjGen(0, 0), og); + false, + offset, + "linearization hint stream", + QPDFObjGen(0, 0), + og, + false); ObjCache& oc = this->m->obj_cache[og]; qpdf_offset_t min_end_offset = oc.end_before_space; qpdf_offset_t max_end_offset = oc.end_after_space; -- cgit v1.2.3-54-g00ecf