aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2020-02-22 17:00:38 +0100
committerJay Berkenbilt <ejb@ql.org>2020-02-22 17:10:31 +0100
commitbb3137296d4070e268690e8233e9d3eb2d64c652 (patch)
treedd6fc652f976d41d64c14e0e3bd29c1b85da075d
parent6d2b4d8f81b1baf0b26af30e275a84ed5d00629a (diff)
downloadqpdf-bb3137296d4070e268690e8233e9d3eb2d64c652.tar.zst
Handle root /Pages pointing to other than page tree root (fixes #398)
-rw-r--r--ChangeLog6
-rw-r--r--libqpdf/QPDF_pages.cc32
-rw-r--r--qpdf/qtest/qpdf.test10
-rw-r--r--qpdf/qtest/qpdf/pages-is-page-out.pdfbin0 -> 1310 bytes
-rw-r--r--qpdf/qtest/qpdf/pages-is-page.out2
-rw-r--r--qpdf/qtest/qpdf/pages-is-page.pdf79
6 files changed, 126 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 0e6dca1b..3d492e78 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2020-02-22 Jay Berkenbilt <ejb@ql.org>
+
+ * Detect, warn, and correct the case of /Pages in the document
+ catalog incorrectly pointing to a page or intermediate node
+ instead of the root of the pages tree. Fixes #398.
+
2020-01-26 Jay Berkenbilt <ejb@ql.org>
* 9.1.1: release
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index 6435d91e..931ee12b 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -49,8 +49,36 @@ QPDF::getAllPages()
{
std::set<QPDFObjGen> visited;
std::set<QPDFObjGen> seen;
- getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages,
- visited, seen);
+ QPDFObjectHandle pages = getRoot().getKey("/Pages");
+ bool warned = false;
+ bool changed_pages = false;
+ while (pages.isDictionary() && pages.hasKey("/Parent"))
+ {
+ if (seen.count(pages.getObjGen()))
+ {
+ // loop -- will be detected again and reported later
+ break;
+ }
+ // Files have been found in the wild where /Pages in the
+ // catalog points to the first page. Try to work around
+ // this and similar cases with this heuristic.
+ if (! warned)
+ {
+ getRoot().warnIfPossible(
+ "document page tree root (root -> /Pages) doesn't point"
+ " to the root of the page tree; attempting to correct");
+ warned = true;
+ }
+ seen.insert(pages.getObjGen());
+ changed_pages = true;
+ pages = pages.getKey("/Parent");
+ }
+ if (changed_pages)
+ {
+ getRoot().replaceKey("/Pages", pages);
+ }
+ seen.clear();
+ getAllPagesInternal(pages, this->m->all_pages, visited, seen);
}
return this->m->all_pages;
}
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index e42e204a..1d59225f 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n)
show_ntests();
# ----------
$td->notify("--- Specific File Tests ---");
-$n_tests += 7;
+$n_tests += 9;
# Special PDF files that caused problems at some point
@@ -2810,6 +2810,14 @@ $td->runtest("check output",
$td->runtest("check fix-qdf idempotency",
{$td->COMMAND => "fix-qdf a.pdf"},
{$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
+$td->runtest("pages points to page",
+ {$td->COMMAND =>
+ "qpdf --static-id --linearize pages-is-page.pdf a.pdf"},
+ {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "pages-is-page-out.pdf"});
show_ntests();
# ----------
diff --git a/qpdf/qtest/qpdf/pages-is-page-out.pdf b/qpdf/qtest/qpdf/pages-is-page-out.pdf
new file mode 100644
index 00000000..15f643d4
--- /dev/null
+++ b/qpdf/qtest/qpdf/pages-is-page-out.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/pages-is-page.out b/qpdf/qtest/qpdf/pages-is-page.out
new file mode 100644
index 00000000..7f410c09
--- /dev/null
+++ b/qpdf/qtest/qpdf/pages-is-page.out
@@ -0,0 +1,2 @@
+WARNING: pages-is-page.pdf, object 1 0 at offset 19: document page tree root (root -> /Pages) doesn't point to the root of the page tree; attempting to correct
+qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/pages-is-page.pdf b/qpdf/qtest/qpdf/pages-is-page.pdf
new file mode 100644
index 00000000..3c114f46
--- /dev/null
+++ b/qpdf/qtest/qpdf/pages-is-page.pdf
@@ -0,0 +1,79 @@
+%PDF-1.3
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 3 0 R
+>>
+endobj
+
+2 0 obj
+<<
+ /Type /Pages
+ /Kids [
+ 3 0 R
+ ]
+ /Count 1
+>>
+endobj
+
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [0 0 612 792]
+ /Contents 4 0 R
+ /Resources <<
+ /ProcSet 5 0 R
+ /Font <<
+ /F1 6 0 R
+ >>
+ >>
+>>
+endobj
+
+4 0 obj
+<<
+ /Length 44
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+6 0 obj
+<<
+ /Type /Font
+ /Subtype /Type1
+ /Name /F1
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+>>
+endobj
+
+xref
+0 7
+0000000000 65535 f
+0000000009 00000 n
+0000000063 00000 n
+0000000135 00000 n
+0000000307 00000 n
+0000000403 00000 n
+0000000438 00000 n
+trailer <<
+ /Size 7
+ /Root 1 0 R
+>>
+startxref
+556
+%%EOF