From 47f4ebcdac39256b084cdbb91ff04392fca4bb8d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 4 Nov 2020 07:46:46 -0500 Subject: Ignore unused field in xref entry, avoiding range error (fixes #482) --- ChangeLog | 6 ++++++ libqpdf/QPDF.cc | 23 ++++++++++++++++++++++- qpdf/qtest/qpdf.test | 8 +++++++- qpdf/qtest/qpdf/issue-150.out | 5 ++++- qpdf/qtest/qpdf/xref-range.out | 6 ++++++ qpdf/qtest/qpdf/xref-range.pdf | Bin 0 -> 722 bytes 6 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 qpdf/qtest/qpdf/xref-range.out create mode 100644 qpdf/qtest/qpdf/xref-range.pdf diff --git a/ChangeLog b/ChangeLog index 860e73a9..6e20fabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2020-11-04 Jay Berkenbilt + + * Ignore the value of the offset/generation field in an xref entry + for a deleted object. Also attempt file recovery on lower-level + exceptions thrown while reading the xref table. Fixes #482. + 2020-10-31 Jay Berkenbilt * 10.0.3: release diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 62ad926c..a233f05e 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -425,7 +425,20 @@ QPDF::parse(char const* password) throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), "", 0, "can't find startxref"); } - read_xref(xref_offset); + try + { + read_xref(xref_offset); + } + catch (QPDFExc&) + { + throw; + } + catch (std::exception& e) + { + throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), "", 0, + std::string("error reading xref: ") + e.what()); + + } } catch (QPDFExc& e) { @@ -1250,6 +1263,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) // This is needed by checkLinearization() this->m->first_xref_item_offset = xref_offset; } + if (fields[0] == 0) + { + // Ignore fields[2], which we don't care about in this + // case. This works around the issue of some PDF files + // that put invalid values, like -1, here for deleted + // objects. + fields[2] = 0; + } insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 8254013b..1c136126 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1496,7 +1496,7 @@ $td->runtest("detect loops in pages structure", show_ntests(); # ---------- $td->notify("--- Xref ---"); -$n_tests += 5; +$n_tests += 6; # Handle file with invalid xref table and object 0 as a regular object # (bug 3159950). @@ -1538,6 +1538,12 @@ $td->runtest("show number of pages", {$td->STRING => "20\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +# Issue 482 -- don't range check fields[2] for xref entry type 0. +$td->runtest("out of range in deleted object", + {$td->COMMAND => "qpdf --check xref-range.pdf"}, + {$td->FILE => "xref-range.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Overwrite self ---"); diff --git a/qpdf/qtest/qpdf/issue-150.out b/qpdf/qtest/qpdf/issue-150.out index 862b4d1e..f45a0977 100644 --- a/qpdf/qtest/qpdf/issue-150.out +++ b/qpdf/qtest/qpdf/issue-150.out @@ -1,2 +1,5 @@ WARNING: issue-150.pdf: can't find PDF header -overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf: file is damaged +WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table +issue-150.pdf: unable to find trailer dictionary while recovering damaged file diff --git a/qpdf/qtest/qpdf/xref-range.out b/qpdf/qtest/qpdf/xref-range.out new file mode 100644 index 00000000..cb72e559 --- /dev/null +++ b/qpdf/qtest/qpdf/xref-range.out @@ -0,0 +1,6 @@ +checking xref-range.pdf +PDF Version: 1.5 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/xref-range.pdf b/qpdf/qtest/qpdf/xref-range.pdf new file mode 100644 index 00000000..8a1413d0 Binary files /dev/null and b/qpdf/qtest/qpdf/xref-range.pdf differ -- cgit v1.2.3-70-g09d2