summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--include/qpdf/QPDFObjectHandle.hh5
-rw-r--r--libqpdf/QPDFObjectHandle.cc32
-rw-r--r--libqpdf/QPDF_pages.cc2
-rw-r--r--qpdf/qtest/qpdf.test28
-rw-r--r--qpdf/qtest/qpdf/no-contents-check.out6
-rw-r--r--qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf27
-rw-r--r--qpdf/qtest/qpdf/no-contents-none.pdf21
-rw-r--r--qpdf/qtest/qpdf/no-contents-qdf.pdf54
-rw-r--r--qpdf/qtest/qpdf/no-contents.pdf21
10 files changed, 189 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index bba500ee..b21baeb0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2018-03-06 Jay Berkenbilt <ejb@ql.org>
+
+ * Properly handle pages with no contents. Fixes #194.
+
2018-03-05 Jay Berkenbilt <ejb@ql.org>
* Improve handling of loops while following cross reference
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index a2f54a73..6f78e9a9 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -830,6 +830,11 @@ class QPDFObjectHandle
QPDF_DLL
void assertNumber();
+ // The isPageObject method checks the /Type key of the object.
+ // This is not completely reliable as there are some otherwise
+ // valid files whose /Type is wrong for page objects. qpdf is
+ // slightly more accepting but may still return false here when
+ // treating the object as a page would work. Use this sparingly.
QPDF_DLL
bool isPageObject();
QPDF_DLL
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 2e9cc996..c178a492 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -932,8 +932,6 @@ QPDFObjectHandle::getGeneration() const
std::map<std::string, QPDFObjectHandle>
QPDFObjectHandle::getPageImages()
{
- assertPageObject();
-
// Note: this code doesn't handle inherited resources. If this
// page dictionary doesn't have a /Resources key or has one whose
// value is null or an empty dictionary, you are supposed to walk
@@ -1081,7 +1079,6 @@ QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
void
QPDFObjectHandle::rotatePage(int angle, bool relative)
{
- assertPageObject();
if ((angle % 90) != 0)
{
throw std::runtime_error(
@@ -1137,7 +1134,6 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
void
QPDFObjectHandle::coalesceContentStreams()
{
- assertPageObject();
QPDFObjectHandle contents = this->getKey("/Contents");
if (contents.isStream())
{
@@ -1218,7 +1214,6 @@ QPDFObjectHandle::parse(std::string const& object_str,
void
QPDFObjectHandle::pipePageContents(Pipeline* p)
{
- assertPageObject();
std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -1256,7 +1251,6 @@ QPDFObjectHandle::pipeContentStreams(
void
QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
{
- assertPageObject();
std::string description = "page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -1267,7 +1261,6 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
void
QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
{
- assertPageObject();
std::string description = "token filter for page object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
@@ -2222,8 +2215,29 @@ QPDFObjectHandle::assertNumber()
bool
QPDFObjectHandle::isPageObject()
{
- // Some PDF files have /Type broken on pages.
- return (this->isDictionary() && this->hasKey("/Contents"));
+ // See comments in QPDFObjectHandle.hh.
+ if (! this->isDictionary())
+ {
+ return false;
+ }
+ if (this->hasKey("/Type"))
+ {
+ QPDFObjectHandle type = this->getKey("/Type");
+ if (type.isName() && (type.getName() == "/Page"))
+ {
+ return true;
+ }
+ // Files have been seen in the wild that have /Type (Page)
+ if (type.isString() && (type.getStringValue() == "Page"))
+ {
+ return true;
+ }
+ }
+ if (this->hasKey("/Contents"))
+ {
+ return true;
+ }
+ return false;
}
bool
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index 31307e4e..ea5afdb5 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -191,7 +191,6 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
// pos = npages adds to the end.
flattenPagesTree();
- newpage.assertPageObject();
if (! newpage.isIndirect())
{
@@ -288,7 +287,6 @@ QPDF::addPage(QPDFObjectHandle newpage, bool first)
int
QPDF::findPage(QPDFObjectHandle& page)
{
- page.assertPageObject();
return findPage(page.getObjGen());
}
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 877c3f5e..92a9412b 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -921,6 +921,34 @@ $td->runtest("check output",
show_ntests();
# ----------
+$td->notify("--- Page with no contents ---");
+$n_tests += 7;
+
+$td->runtest("check no contents",
+ {$td->COMMAND => "qpdf --check no-contents.pdf"},
+ {$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+foreach my $arg ('--qdf', '--coalesce-contents', '')
+{
+ $td->runtest("convert no contents ($arg)",
+ {$td->COMMAND =>
+ "qpdf $arg --static-id no-contents.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+
+ my $suf = $arg;
+ $suf =~ s/--//;
+ if ($suf eq '')
+ {
+ $suf = "none";
+ }
+ $td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "no-contents-$suf.pdf"});
+}
+
+show_ntests();
+# ----------
$td->notify("--- Token filters ---");
$n_tests += 2;
diff --git a/qpdf/qtest/qpdf/no-contents-check.out b/qpdf/qtest/qpdf/no-contents-check.out
new file mode 100644
index 00000000..ba195b0e
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-check.out
@@ -0,0 +1,6 @@
+checking no-contents.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf
new file mode 100644
index 00000000..c77ad5c9
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf
@@ -0,0 +1,27 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+4 0 obj
+<< /Length 0 /Filter /FlateDecode >>
+stream
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+0000000229 00000 n
+trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
+startxref
+298
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents-none.pdf b/qpdf/qtest/qpdf/no-contents-none.pdf
new file mode 100644
index 00000000..28f6b3b1
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-none.pdf
@@ -0,0 +1,21 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+xref
+0 4
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
+startxref
+213
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents-qdf.pdf b/qpdf/qtest/qpdf/no-contents-qdf.pdf
new file mode 100644
index 00000000..40511781
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents-qdf.pdf
@@ -0,0 +1,54 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /MediaBox [
+ 0
+ 0
+ 720
+ 720
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ >>
+ /Type /Page
+>>
+endobj
+
+xref
+0 4
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000242 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 4
+ /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>]
+>>
+startxref
+361
+%%EOF
diff --git a/qpdf/qtest/qpdf/no-contents.pdf b/qpdf/qtest/qpdf/no-contents.pdf
new file mode 100644
index 00000000..aa26d580
--- /dev/null
+++ b/qpdf/qtest/qpdf/no-contents.pdf
@@ -0,0 +1,21 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
+endobj
+xref
+0 4
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >>
+startxref
+213
+%%EOF