summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <jberkenbilt@users.noreply.github.com>2023-09-02 21:03:03 +0200
committerGitHub <noreply@github.com>2023-09-02 21:03:03 +0200
commitc51bdeb19cee981cc3e6d308ecff4297a7ca29e7 (patch)
tree5b878a0d846a544cee2a61f0a12ae07570b28607
parentec6784411d4f9fbf99a03f8e092e38fd3816049a (diff)
parent9a69cbba5b0502d0699d526860446791b9fef6f1 (diff)
downloadqpdf-c51bdeb19cee981cc3e6d308ecff4297a7ca29e7.tar.zst
Merge pull request #1017 from m-holger/mbox
Check for missing mediaboxes
-rw-r--r--include/qpdf/QPDF.hh5
-rw-r--r--libqpdf/QPDF_pages.cc19
-rw-r--r--qpdf/qpdf.testcov2
-rw-r--r--qpdf/qtest/copy-foreign-objects.test2
-rw-r--r--qpdf/qtest/page-errors.test18
-rw-r--r--qpdf/qtest/qpdf/issue-449.out3
-rw-r--r--qpdf/qtest/qpdf/page-inherit-mediabox-out.pdfbin0 -> 1432 bytes
-rw-r--r--qpdf/qtest/qpdf/page-inherit-mediabox.pdf184
-rw-r--r--qpdf/qtest/qpdf/page-missing-mediabox-out.pdfbin0 -> 1389 bytes
-rw-r--r--qpdf/qtest/qpdf/page-missing-mediabox.out1
-rw-r--r--qpdf/qtest/qpdf/page-no-content.out2
11 files changed, 229 insertions, 7 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 2fd0f493..0281f5d0 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1105,7 +1105,10 @@ class QPDF
// methods to support page handling
void getAllPagesInternal(
- QPDFObjectHandle cur_pages, QPDFObjGen::set& visited, QPDFObjGen::set& seen);
+ QPDFObjectHandle cur_pages,
+ QPDFObjGen::set& visited,
+ QPDFObjGen::set& seen,
+ bool media_box);
void insertPage(QPDFObjectHandle newpage, int pos);
void flattenPagesTree();
void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc
index e03dabc8..4e3e77c0 100644
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@@ -68,7 +68,7 @@ QPDF::getAllPages()
seen.clear();
if (pages.hasKey("/Kids")) {
// Ensure we actually found a /Pages object.
- getAllPagesInternal(pages, visited, seen);
+ getAllPagesInternal(pages, visited, seen, false);
}
}
return m->all_pages;
@@ -76,7 +76,7 @@ QPDF::getAllPages()
void
QPDF::getAllPagesInternal(
- QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen)
+ QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box)
{
if (!visited.add(cur_node)) {
throw QPDFExc(
@@ -90,13 +90,26 @@ QPDF::getAllPagesInternal(
cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding");
cur_node.replaceKey("/Type", "/Pages"_qpdf);
}
+ if (!media_box) {
+ media_box = cur_node.getKey("/MediaBox").isRectangle();
+ QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
+ }
auto kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) {
auto kid = kids.getArrayItem(i);
if (kid.hasKey("/Kids")) {
- getAllPagesInternal(kid, visited, seen);
+ getAllPagesInternal(kid, visited, seen, media_box);
} else {
+ if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
+ QTC::TC("qpdf", "QPDF missing mediabox");
+ kid.warnIfPossible(
+ "kid " + std::to_string(i) +
+ " (from 0) MediaBox is undefined; setting to letter / ANSI A");
+ kid.replaceKey(
+ "/MediaBox",
+ QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
+ }
if (!kid.isIndirect()) {
QTC::TC("qpdf", "QPDF handle direct page object");
cur_node.warnIfPossible(
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 014ea571..ec11c57b 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -411,6 +411,8 @@ QPDFPageObjectHelper copy shared attribute 1
QPDFJob from_nr from repeat_nr 0
QPDF resolve duplicated page object 0
QPDF handle direct page object 0
+QPDF missing mediabox 0
+QPDF inherit mediabox 1
QPDFTokenizer finder found wrong word 0
QPDFTokenizer found EI by byte count 0
QPDFTokenizer found EI after more than one try 0
diff --git a/qpdf/qtest/copy-foreign-objects.test b/qpdf/qtest/copy-foreign-objects.test
index e7419e4f..73f50e0a 100644
--- a/qpdf/qtest/copy-foreign-objects.test
+++ b/qpdf/qtest/copy-foreign-objects.test
@@ -56,7 +56,7 @@ foreach my $i (0, 1)
}
$td->runtest("issue 449",
{$td->COMMAND => "test_driver 69 issue-449.pdf"},
- {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0},
+ {$td->FILE => "issue-449.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
cleanup();
diff --git a/qpdf/qtest/page-errors.test b/qpdf/qtest/page-errors.test
index 7e210d73..60894f96 100644
--- a/qpdf/qtest/page-errors.test
+++ b/qpdf/qtest/page-errors.test
@@ -14,12 +14,26 @@ cleanup();
my $td = new TestDriver('page-errors');
-my $n_tests = 5;
+my $n_tests = 9;
$td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
- {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
+ {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
+$td->runtest("handle page with missing MediaBox",
+ {$td->COMMAND => "qpdf --static-id --empty --pages page-no-content.pdf -- out.pdf"},
+ {$td->FILE => "page-missing-mediabox.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "out.pdf"},
+ {$td->FILE => "page-missing-mediabox-out.pdf"});
+$td->runtest("handle page with inherited MediaBox",
+ {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- out.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "out.pdf"},
+ {$td->FILE => "page-inherit-mediabox-out.pdf"});
$td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
diff --git a/qpdf/qtest/qpdf/issue-449.out b/qpdf/qtest/qpdf/issue-449.out
new file mode 100644
index 00000000..cd212808
--- /dev/null
+++ b/qpdf/qtest/qpdf/issue-449.out
@@ -0,0 +1,3 @@
+WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
+WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
+test 69 done
diff --git a/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf b/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf
new file mode 100644
index 00000000..67986cfa
--- /dev/null
+++ b/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/page-inherit-mediabox.pdf b/qpdf/qtest/qpdf/page-inherit-mediabox.pdf
new file mode 100644
index 00000000..b098ffbc
--- /dev/null
+++ b/qpdf/qtest/qpdf/page-inherit-mediabox.pdf
@@ -0,0 +1,184 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 3
+ /Kids [
+ 3 0 R
+ 4 0 R
+ 5 0 R
+ ]
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents 6 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 8 0 R
+ >>
+ /ProcSet 9 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 2
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Parent 2 0 R
+ /Type /Page
+>>
+endobj
+
+%% Page 3
+%% Original object ID: 5 0
+5 0 obj
+<<
+ /Contents 10 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 12 0 R
+ >>
+ /ProcSet 13 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 6 0
+6 0 obj
+<<
+ /Length 7 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+7 0 obj
+44
+endobj
+
+%% Original object ID: 7 0
+8 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 8 0
+9 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 3
+%% Original object ID: 9 0
+10 0 obj
+<<
+ /Length 11 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+11 0 obj
+44
+endobj
+
+%% Original object ID: 10 0
+12 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 11 0
+13 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 14
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000308 00000 n
+0000000537 00000 n
+0000000626 00000 n
+0000000871 00000 n
+0000000970 00000 n
+0000001016 00000 n
+0000001161 00000 n
+0000001246 00000 n
+0000001347 00000 n
+0000001395 00000 n
+0000001542 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 14
+ /ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>]
+>>
+startxref
+1578
+%%EOF
diff --git a/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf b/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf
new file mode 100644
index 00000000..8013e36a
--- /dev/null
+++ b/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/page-missing-mediabox.out b/qpdf/qtest/qpdf/page-missing-mediabox.out
new file mode 100644
index 00000000..9db62ed6
--- /dev/null
+++ b/qpdf/qtest/qpdf/page-missing-mediabox.out
@@ -0,0 +1 @@
+WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
diff --git a/qpdf/qtest/qpdf/page-no-content.out b/qpdf/qtest/qpdf/page-no-content.out
index 15df72db..73f078c5 100644
--- a/qpdf/qtest/qpdf/page-no-content.out
+++ b/qpdf/qtest/qpdf/page-no-content.out
@@ -1,3 +1,4 @@
+WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
page 1: 3 0 R
content:
6 0 R
@@ -6,3 +7,4 @@ page 2: 4 0 R
page 3: 5 0 R
content:
9 0 R
+qpdf: operation succeeded with warnings