aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-08-19 00:54:08 +0200
committerJay Berkenbilt <ejb@ql.org>2019-08-19 00:54:37 +0200
commitd9dd99eca32e44788165ce169f1e59498ad1c16e (patch)
tree9e222c7aa27c91c28e8d7f0c9657431bc3b3c546
parentc032f7c972706d5d90b018e9d0a96d56e0b25cc9 (diff)
downloadqpdf-d9dd99eca32e44788165ce169f1e59498ad1c16e.tar.zst
Attempt to repair /Type key in pages nodes (fixes #349)
-rw-r--r--ChangeLog7
-rw-r--r--libqpdf/QPDF_pages.cc43
-rw-r--r--manual/qpdf-manual.xml7
-rw-r--r--qpdf/qtest/qpdf.test13
-rw-r--r--qpdf/qtest/qpdf/no-pages-types-fix.out3
-rw-r--r--qpdf/qtest/qpdf/no-pages-types-fixed.pdfbin0 -> 811 bytes
-rw-r--r--qpdf/qtest/qpdf/no-pages-types.out4
7 files changed, 47 insertions, 30 deletions
diff --git a/ChangeLog b/ChangeLog
index cf51c5b3..51fb9aa9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2019-08-18 Jay Berkenbilt <ejb@ql.org>
+
+ * When traversing the pages tree, if an invalid /Type key is
+ encountered, fix it. This is not done for all operations, but it
+ will be done for any case in which getAllPages is called. This
+ includes all page-based CLI operations. (Hopefully) Fixes #349.
+
2019-08-17 Jay Berkenbilt <ejb@ql.org>
* Change internal implementation of QPDF arrays to use sparse
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index d95002a6..6435d91e 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -56,12 +56,12 @@ QPDF::getAllPages()
}
void
-QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
+QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen)
{
- QPDFObjGen this_og = cur_pages.getObjGen();
+ QPDFObjGen this_og = cur_node.getObjGen();
if (visited.count(this_og) > 0)
{
throw QPDFExc(
@@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
"Loop detected in /Pages structure (getAllPages)");
}
visited.insert(this_og);
- std::string type;
- QPDFObjectHandle type_key = cur_pages.getKey("/Type");
- if (type_key.isName())
+ std::string wanted_type;
+ if (cur_node.hasKey("/Kids"))
{
- type = type_key.getName();
- }
- else if (cur_pages.hasKey("/Kids"))
- {
- type = "/Pages";
- }
- else
- {
- type = "/Page";
- }
- if (type == "/Pages")
- {
- QPDFObjectHandle kids = cur_pages.getKey("/Kids");
+ wanted_type = "/Pages";
+ QPDFObjectHandle kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i)
{
@@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
getAllPagesInternal(kid, result, visited, seen);
}
}
- else if (type == "/Page")
+ else
{
+ wanted_type = "/Page";
seen.insert(this_og);
- result.push_back(cur_pages);
+ result.push_back(cur_node);
}
- else
+
+ QPDFObjectHandle type_key = cur_node.getKey("/Type");
+ if (! (type_key.isName() && (type_key.getName() == wanted_type)))
{
- throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
- this->m->last_object_description,
- this->m->file->getLastOffset(),
- "invalid Type " + type + " in page tree");
+ warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
+ "page tree node",
+ this->m->file->getLastOffset(),
+ "/Type key should be " + wanted_type +
+ " but is not; overriding"));
+ cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
}
visited.erase(this_og);
}
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index e2544cb9..fdd34122 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -4490,6 +4490,13 @@ print "\n";
</listitem>
<listitem>
<para>
+ When traversing the pages tree, if nodes are encountered
+ with invalid types, the types are fixed, and a warning is
+ issued.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
A new helper method
<function>QUtil::read_file_into_memory</function> was added.
</para>
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index a11ccd0b..bfe902f3 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size",
show_ntests();
# ----------
$td->notify("--- Page errors ---");
-$n_tests += 3;
+$n_tests += 5;
$td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
-$td->runtest("no type key for page nodes",
+$td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
- {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
+ {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
+$td->runtest("no type key for page nodes",
+ {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
+ {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a-split-out-1.pdf"},
+ {$td->FILE => "no-pages-types-fixed.pdf"});
$td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
diff --git a/qpdf/qtest/qpdf/no-pages-types-fix.out b/qpdf/qtest/qpdf/no-pages-types-fix.out
new file mode 100644
index 00000000..81e71eeb
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-pages-types-fix.out
@@ -0,0 +1,3 @@
+WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
+WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
+qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/no-pages-types-fixed.pdf b/qpdf/qtest/qpdf/no-pages-types-fixed.pdf
new file mode 100644
index 00000000..b0bf24f8
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-pages-types-fixed.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/no-pages-types.out b/qpdf/qtest/qpdf/no-pages-types.out
index 0d5eb23b..28c172a7 100644
--- a/qpdf/qtest/qpdf/no-pages-types.out
+++ b/qpdf/qtest/qpdf/no-pages-types.out
@@ -2,5 +2,5 @@ checking no-pages-types.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
-No syntax or stream encoding errors found; the file may still contain
-errors that qpdf cannot detect
+WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
+WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding