aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-03-06 17:25:09 +0100
committerJay Berkenbilt <ejb@ql.org>2018-03-06 17:34:07 +0100
commite4e2e26d990d038b0d35e7466c8a24dbfafab7d2 (patch)
tree3f9341102bdadb710912b700e26b8be6e2288123
parent1a4dcb4aaf987631caa93a9d3a5b42ca439bb6ae (diff)
downloadqpdf-e4e2e26d990d038b0d35e7466c8a24dbfafab7d2.tar.zst
Properly handle pages with no contents (fixes #194)
Remove calls to assertPageObject(). All cases in the library that called assertPageObject() work fine if you don't call assertPageObject() because nothing assumes anything that was being checked by that call. Removing the calls enables more files to be successfully processed.
-rw-r--r--ChangeLog4
-rw-r--r--include/qpdf/QPDFObjectHandle.hh5
-rw-r--r--libqpdf/QPDFObjectHandle.cc32
-rw-r--r--libqpdf/QPDF_pages.cc2
-rw-r--r--qpdf/qtest/qpdf.test28
-rw-r--r--qpdf/qtest/qpdf/no-contents-check.out6
-rw-r--r--qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf27
-rw-r--r--qpdf/qtest/qpdf/no-contents-none.pdf21
-rw-r--r--qpdf/qtest/qpdf/no-contents-qdf.pdf54
-rw-r--r--qpdf/qtest/qpdf/no-contents.pdf21
10 files changed, 189 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index bba500ee..b21baeb0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2018-03-06 Jay Berkenbilt <ejb@ql.org>
+
+ * Properly handle pages with no contents. Fixes #194.
+
2018-03-05 Jay Berkenbilt <ejb@ql.org>
* Improve handling of loops while following cross reference
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index a2f54a73..6f78e9a9 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -830,6 +830,11 @@ class QPDFObjectHandle
QPDF_DLL
void assertNumber();
+ // The isPageObject method checks the /Type key of the object.
+ // This is not completely reliable as there are some otherwise
+ // valid files whose /Type is wrong for page objects. qpdf is
+ // slightly more accepting but may still return false here when
+ // treating the object as a page would work. Use this sparingly.
QPDF_DLL
bool isPageObject();
QPDF_DLL
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 2e9cc996..c178a492 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -932,8 +932,6 @@ QPDFObjectHandle::getGeneration() const
std::map<std::string, QPDFObjectHandle>
QPDFObjectHandle::getPageImages()
{
- assertPageObject();
-
// Note: this code doesn't handle inherited resources. If this
// page dictionary doesn't have a /Resources key or has one whose
// value is null or an empty dictionary, you are supposed to walk
@@ -1081,7 +1079,6 @@ QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
void
QPDFObjectHandle::rotatePage(int angle, bool relative)
{
- assertPageObject();
if ((angle % 90) != 0)
{
throw std::runtime_error(
@@ -1137,7 +1134,6 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
void
QPDFObjectHandle::coalesceContentStreams()
{
- assertPageObject();
QPDFObjectHandle contents = this->getKey("/Contents");
if (contents.isStream())
{
@@ -1218,7 +1214,6 @@ QPDFObjectHandle::parse(std::string const& object_str,
void
QPDFObjectHandle::pipePageContents(Pipeline* p)
{
- assertPageObject();
std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -1256,7 +1251,6 @@ QPDFObjectHandle::pipeContentStreams(
void
QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
{
- assertPageObject();
std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -1267,7 +1261,6 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
void
QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
{
- assertPageObject();
std::string description = "token filter for page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -2222,8 +2215,29 @@ QPDFObjectHandle::assertNumber()
bool
QPDFObjectHandle::isPageObject()
{
- // Some PDF files have /Type broken on pages.
- return (this->isDictionary() && this->hasKey("/Contents"));
+ // See comments in QPDFObjectHandle.hh.
+ if (! this->isDictionary())
+ {
+ return false;
+ }
+ if (this->hasKey("/Type"))
+ {
+ QPDFObjectHandle type = this->getKey("/Type");
+ if (type.isName() && (type.getName() == "/Page"))
+ {
+ return true;
+ }
+ // Files have been seen in the wild that have /Type (Page)
+ if (type.isString() && (type.getStringValue() == "Page"))
+ {
+ return true;
+ }
+ }
+ if (this->hasKey("/Contents"))
+ {
+ return true;
+ }
+ return false;
}
bool
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index 31307e4e..ea5afdb5 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -191,7 +191,6 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
// pos = npages adds to the end.
flattenPagesTree();
- newpage.assertPageObject();
if (! newpage.isIndirect())
{
@@ -288,7 +287,6 @@ QPDF::addPage(QPDFObjectHandle newpage, bool first)
int
QPDF::findPage(QPDFObjectHandle& page)
{
- page.assertPageObject();
return findPage(page.getObjGen());
}
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 877c3f5e..92a9412b 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -921,6 +921,34 @@ $td->runtest("check output",
show_ntests();
# ----------
+$td->notify("--- Page with no contents ---");
+$n_tests += 7;
+
+$td->runtest("check no contents",
+ {$td->COMMAND => "qpdf --check no-contents.pdf"},
+ {$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+foreach my $arg ('--qdf', '--coalesce-contents', '')
+{
+ $td->runtest("convert no contents ($arg)",
+ {$td->COMMAND =>
+ "qpdf $arg --static-id no-contents.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+
+ my $suf = $arg;
+ $suf =~ s/--//;
+ if ($suf eq '')
+ {
+ $suf = "none";
+ }
+ $td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "no-contents-$suf.pdf"});
+}
+
+show_ntests();
+# ----------
$td->notify("--- Token filters ---");
$n_tests += 2;
diff --git a/qpdf/qtest/qpdf/no-contents-check.out b/qpdf/qtest/qpdf/no-contents-check.out
new file mode 100644
index 00000000..ba195b0e
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-check.out
@@ -0,0 +1,6 @@
+checking no-contents.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf
new file mode 100644
index 00000000..c77ad5c9
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf
@@ -0,0 +1,27 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+4 0 obj
+<< /Length 0 /Filter /FlateDecode >>
+stream
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+0000000229 00000 n
+trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
+startxref
+298
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents-none.pdf b/qpdf/qtest/qpdf/no-contents-none.pdf
new file mode 100644
index 00000000..28f6b3b1
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-none.pdf
@@ -0,0 +1,21 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+xref
+0 4
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
+startxref
+213
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents-qdf.pdf b/qpdf/qtest/qpdf/no-contents-qdf.pdf
new file mode 100644
index 00000000..40511781
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-qdf.pdf
@@ -0,0 +1,54 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /MediaBox [
+ 0
+ 0
+ 720
+ 720
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ >>
+ /Type /Page
+>>
+endobj
+
+xref
+0 4
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000242 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 4
+ /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>]
+>>
+startxref
+361
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents.pdf b/qpdf/qtest/qpdf/no-contents.pdf
new file mode 100644
index 00000000..aa26d580
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents.pdf
@@ -0,0 +1,21 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+xref
+0 4
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >>
+startxref
+213
+%%EOF