From 769a4915e8392490b77aba3b6ddda3e4a2e89508 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 4 Jul 2023 14:01:03 +0100 Subject: Add new private method QPDF::insertReconstructedXrefEntry --- include/qpdf/QPDF.hh | 1 + libqpdf/QPDF.cc | 18 ++++++++++++++++-- libqpdf/qpdf/JSONHandler.hh | 1 - 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index e54947d9..6f346422 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1003,6 +1003,7 @@ class QPDF qpdf_offset_t read_xrefStream(qpdf_offset_t offset); qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2); void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); QPDFObjectHandle readObject( std::shared_ptr, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 396dfe8f..8c96eb9c 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc& e) if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { int obj = QUtil::string_to_int(t1.getValue().c_str()); int gen = QUtil::string_to_int(t2.getValue().c_str()); - insertXrefEntry(obj, 1, token_start, gen, true); + insertReconstructedXrefEntry(obj, token_start, gen); } } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false); @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc& e) m->file->seek(next_line_start, SEEK_SET); line_start = next_line_start; } + m->deleted_objects.clear(); if (!m->trailer.isInitialized()) { // We could check the last encountered object to see if it was an xref stream. If so, we @@ -1126,7 +1127,6 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) QPDFObjGen og(obj, gen); if (m->xref_table.count(og)) { if (overwrite) { - QTC::TC("qpdf", "QPDF xref overwrite object"); m->xref_table.erase(og); } else { QTC::TC("qpdf", "QPDF xref reused object"); @@ -1160,6 +1160,20 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) } } +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from +// beginning to end. +void +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) +{ + QPDFObjGen og(obj, f2); + if (!m->deleted_objects.count(obj)) { + // deleted_objects stores the uncompressed objects removed from the xref table at the start + // of recovery. + QTC::TC("qpdf", "QPDF xref overwrite object"); + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); + } +} + void QPDF::showXRefTable() { diff --git a/libqpdf/qpdf/JSONHandler.hh b/libqpdf/qpdf/JSONHandler.hh index 653924f6..9b2a0b33 100644 --- a/libqpdf/qpdf/JSONHandler.hh +++ b/libqpdf/qpdf/JSONHandler.hh @@ -53,7 +53,6 @@ class JSONHandler static void usage(std::string const& msg); - class Members; std::unique_ptr m; -- cgit v1.2.3-54-g00ecf From 250a7364828da874fd728808f48443f8cfa5132d Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 4 Jul 2023 14:24:30 +0100 Subject: Remove parameter overwrite from QPDF::processXRefStream --- include/qpdf/QPDF.hh | 2 +- libqpdf/QPDF.cc | 34 +++++++++++++--------------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 6f346422..a23e5826 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1002,7 +1002,7 @@ class QPDF qpdf_offset_t read_xrefTable(qpdf_offset_t offset); qpdf_offset_t read_xrefStream(qpdf_offset_t offset); qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2); void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); QPDFObjectHandle readObject( diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 8c96eb9c..e93ffb85 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1112,31 +1112,23 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) } void -QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) { // Populate the xref table in such a way that the first reference to an object that we see, // which is the one in the latest xref table in which it appears, is the one that gets stored. - // This works because we are reading more recent appends before older ones. Exception: if - // overwrite is true, then replace any existing object. This is used in xref recovery mode, - // which reads the file from beginning to end. + // This works because we are reading more recent appends before older ones. // If there is already an entry for this object and generation in the table, it means that a // later xref table has registered this object. Disregard this one. - { // private scope - int gen = (f0 == 2 ? 0 : f2); - QPDFObjGen og(obj, gen); - if (m->xref_table.count(og)) { - if (overwrite) { - m->xref_table.erase(og); - } else { - QTC::TC("qpdf", "QPDF xref reused object"); - return; - } - } - if (m->deleted_objects.count(obj)) { - QTC::TC("qpdf", "QPDF xref deleted object"); - return; - } + + QPDFObjGen og(obj, (f0 == 2 ? 0 : f2)); + if (m->xref_table.count(og)) { + QTC::TC("qpdf", "QPDF xref reused object"); + return; + } + if (m->deleted_objects.count(obj)) { + QTC::TC("qpdf", "QPDF xref deleted object"); + return; } switch (f0) { @@ -1147,11 +1139,11 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) case 1: // f2 is generation QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); - m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); + m->xref_table[og] = QPDFXRefEntry(f1); break; case 2: - m->xref_table[QPDFObjGen(obj, 0)] = QPDFXRefEntry(toI(f1), f2); + m->xref_table[og] = QPDFXRefEntry(toI(f1), f2); break; default: -- cgit v1.2.3-54-g00ecf From 8e6974710cc75b3acfb592db9fb6fd8611c39732 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 4 Jul 2023 16:34:22 +0100 Subject: Add private method QPDF::insertFreeXrefEntry --- include/qpdf/QPDF.hh | 1 + libqpdf/QPDF.cc | 34 ++++++++++++++++++++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index a23e5826..e15b4443 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1003,6 +1003,7 @@ class QPDF qpdf_offset_t read_xrefStream(qpdf_offset_t offset); qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); + void insertFreeXrefEntry(QPDFObjGen); void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2); void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); QPDFObjectHandle readObject( diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e93ffb85..53634485 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -890,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) // Handle any deleted items now that we've read the /XRefStm. for (auto const& og: deleted_items) { - insertXrefEntry(og.getObj(), 0, 0, og.getGen()); + insertFreeXrefEntry(og); } if (cur_trailer.hasKey("/Prev")) { @@ -1088,9 +1088,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) if (fields[0] == 0) { // Ignore fields[2], which we don't care about in this case. This works around the issue // of some PDF files that put invalid values, like -1, here for deleted objects. - fields[2] = 0; + insertFreeXrefEntry(QPDFObjGen(obj, 0)); + } else { + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); } - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); } if (!m->trailer.isInitialized()) { @@ -1121,29 +1122,26 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) // If there is already an entry for this object and generation in the table, it means that a // later xref table has registered this object. Disregard this one. - QPDFObjGen og(obj, (f0 == 2 ? 0 : f2)); - if (m->xref_table.count(og)) { - QTC::TC("qpdf", "QPDF xref reused object"); - return; - } if (m->deleted_objects.count(obj)) { QTC::TC("qpdf", "QPDF xref deleted object"); return; } - switch (f0) { - case 0: - m->deleted_objects.insert(obj); - break; + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); + if (!created) { + QTC::TC("qpdf", "QPDF xref reused object"); + return; + } + switch (f0) { case 1: // f2 is generation QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); - m->xref_table[og] = QPDFXRefEntry(f1); + iter->second = QPDFXRefEntry(f1); break; case 2: - m->xref_table[og] = QPDFXRefEntry(toI(f1), f2); + iter->second = QPDFXRefEntry(toI(f1), f2); break; default: @@ -1152,6 +1150,14 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) } } +void +QPDF::insertFreeXrefEntry(QPDFObjGen og) +{ + if (!m->xref_table.count(og)) { + m->deleted_objects.insert(og.getObj()); + } +} + // Replace uncompressed object. This is used in xref recovery mode, which reads the file from // beginning to end. void -- cgit v1.2.3-54-g00ecf From 098b98c889400aa33c81d71123418eecc4be2162 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 4 Jul 2023 17:07:44 +0100 Subject: Tidy QPDF::read_xrefTable(qpdf_offset_t xref_offset) --- libqpdf/QPDF.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 53634485..9554027c 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -910,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) qpdf_offset_t QPDF::read_xrefStream(qpdf_offset_t xref_offset) { - bool found = false; if (!m->ignore_xref_streams) { QPDFObjGen x_og; QPDFObjectHandle xref_obj; @@ -922,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) } if (xref_obj.isStreamOfType("/XRef")) { QTC::TC("qpdf", "QPDF found xref stream"); - found = true; - xref_offset = processXRefStream(xref_offset, xref_obj); + return processXRefStream(xref_offset, xref_obj); } } - if (!found) { - QTC::TC("qpdf", "QPDF can't find xref"); - throw damagedPDF("", xref_offset, "xref not found"); - } - - return xref_offset; + QTC::TC("qpdf", "QPDF can't find xref"); + throw damagedPDF("", xref_offset, "xref not found"); + return 0; // unreachable } qpdf_offset_t -- cgit v1.2.3-54-g00ecf