diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | include/qpdf/QPDFObjectHandle.hh | 5 | ||||
-rw-r--r-- | libqpdf/QPDFObjectHandle.cc | 32 | ||||
-rw-r--r-- | libqpdf/QPDF_pages.cc | 2 | ||||
-rw-r--r-- | qpdf/qtest/qpdf.test | 28 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/no-contents-check.out | 6 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf | 27 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/no-contents-none.pdf | 21 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/no-contents-qdf.pdf | 54 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/no-contents.pdf | 21 |
10 files changed, 189 insertions, 11 deletions
@@ -1,3 +1,7 @@ +2018-03-06 Jay Berkenbilt <ejb@ql.org> + + * Properly handle pages with no contents. Fixes #194. + 2018-03-05 Jay Berkenbilt <ejb@ql.org> * Improve handling of loops while following cross reference diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index a2f54a73..6f78e9a9 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -830,6 +830,11 @@ class QPDFObjectHandle QPDF_DLL void assertNumber(); + // The isPageObject method checks the /Type key of the object. + // This is not completely reliable as there are some otherwise + // valid files whose /Type is wrong for page objects. qpdf is + // slightly more accepting but may still return false here when + // treating the object as a page would work. Use this sparingly. QPDF_DLL bool isPageObject(); QPDF_DLL diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 2e9cc996..c178a492 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -932,8 +932,6 @@ QPDFObjectHandle::getGeneration() const std::map<std::string, QPDFObjectHandle> QPDFObjectHandle::getPageImages() { - assertPageObject(); - // Note: this code doesn't handle inherited resources. If this // page dictionary doesn't have a /Resources key or has one whose // value is null or an empty dictionary, you are supposed to walk @@ -1081,7 +1079,6 @@ QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first) void QPDFObjectHandle::rotatePage(int angle, bool relative) { - assertPageObject(); if ((angle % 90) != 0) { throw std::runtime_error( @@ -1137,7 +1134,6 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) void QPDFObjectHandle::coalesceContentStreams() { - assertPageObject(); QPDFObjectHandle contents = this->getKey("/Contents"); if (contents.isStream()) { @@ -1218,7 +1214,6 @@ QPDFObjectHandle::parse(std::string const& object_str, void QPDFObjectHandle::pipePageContents(Pipeline* p) { - assertPageObject(); std::string description = "page object " + QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->generation); @@ -1256,7 +1251,6 @@ QPDFObjectHandle::pipeContentStreams( void QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) { - assertPageObject(); std::string description = "page object " + QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->generation); @@ -1267,7 +1261,6 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) void QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) { - assertPageObject(); std::string description = "token filter for page object " + QUtil::int_to_string(this->m->objid) + " " + QUtil::int_to_string(this->m->generation); @@ -2222,8 +2215,29 @@ QPDFObjectHandle::assertNumber() bool QPDFObjectHandle::isPageObject() { - // Some PDF files have /Type broken on pages. - return (this->isDictionary() && this->hasKey("/Contents")); + // See comments in QPDFObjectHandle.hh. + if (! this->isDictionary()) + { + return false; + } + if (this->hasKey("/Type")) + { + QPDFObjectHandle type = this->getKey("/Type"); + if (type.isName() && (type.getName() == "/Page")) + { + return true; + } + // Files have been seen in the wild that have /Type (Page) + if (type.isString() && (type.getStringValue() == "Page")) + { + return true; + } + } + if (this->hasKey("/Contents")) + { + return true; + } + return false; } bool diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 31307e4e..ea5afdb5 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -191,7 +191,6 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) // pos = npages adds to the end. flattenPagesTree(); - newpage.assertPageObject(); if (! newpage.isIndirect()) { @@ -288,7 +287,6 @@ QPDF::addPage(QPDFObjectHandle newpage, bool first) int QPDF::findPage(QPDFObjectHandle& page) { - page.assertPageObject(); return findPage(page.getObjGen()); } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 877c3f5e..92a9412b 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -921,6 +921,34 @@ $td->runtest("check output", show_ntests(); # ---------- +$td->notify("--- Page with no contents ---"); +$n_tests += 7; + +$td->runtest("check no contents", + {$td->COMMAND => "qpdf --check no-contents.pdf"}, + {$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +foreach my $arg ('--qdf', '--coalesce-contents', '') +{ + $td->runtest("convert no contents ($arg)", + {$td->COMMAND => + "qpdf $arg --static-id no-contents.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + + my $suf = $arg; + $suf =~ s/--//; + if ($suf eq '') + { + $suf = "none"; + } + $td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "no-contents-$suf.pdf"}); +} + +show_ntests(); +# ---------- $td->notify("--- Token filters ---"); $n_tests += 2; diff --git a/qpdf/qtest/qpdf/no-contents-check.out b/qpdf/qtest/qpdf/no-contents-check.out new file mode 100644 index 00000000..ba195b0e --- /dev/null +++ b/qpdf/qtest/qpdf/no-contents-check.out @@ -0,0 +1,6 @@ +checking no-contents.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf new file mode 100644 index 00000000..c77ad5c9 --- /dev/null +++ b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf @@ -0,0 +1,27 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >> +endobj +4 0 obj +<< /Length 0 /Filter /FlateDecode >> +stream +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000229 00000 n +trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >> +startxref +298 +%%EOF diff --git a/qpdf/qtest/qpdf/no-contents-none.pdf b/qpdf/qtest/qpdf/no-contents-none.pdf new file mode 100644 index 00000000..28f6b3b1 --- /dev/null +++ b/qpdf/qtest/qpdf/no-contents-none.pdf @@ -0,0 +1,21 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >> +endobj +xref +0 4 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >> +startxref +213 +%%EOF diff --git a/qpdf/qtest/qpdf/no-contents-qdf.pdf b/qpdf/qtest/qpdf/no-contents-qdf.pdf new file mode 100644 index 00000000..40511781 --- /dev/null +++ b/qpdf/qtest/qpdf/no-contents-qdf.pdf @@ -0,0 +1,54 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /MediaBox [ + 0 + 0 + 720 + 720 + ] + /Parent 2 0 R + /Resources << + >> + /Type /Page +>> +endobj + +xref +0 4 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +trailer << + /Root 1 0 R + /Size 4 + /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] +>> +startxref +361 +%%EOF diff --git a/qpdf/qtest/qpdf/no-contents.pdf b/qpdf/qtest/qpdf/no-contents.pdf new file mode 100644 index 00000000..aa26d580 --- /dev/null +++ b/qpdf/qtest/qpdf/no-contents.pdf @@ -0,0 +1,21 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >> +endobj +xref +0 4 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >> +startxref +213 +%%EOF |