aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-01-17 14:56:58 +0100
committerJay Berkenbilt <ejb@ql.org>2019-01-17 17:43:56 +0100
commit5cfcd4f361063df8e216489915758ce40a15f15b (patch)
treec27e87747860c51f85072ce70c2222da5604c16e
parente09ae710dc3bfe2b64d2905d4d62079f7e664498 (diff)
downloadqpdf-5cfcd4f361063df8e216489915758ce40a15f15b.tar.zst
Additional checks for unreferenced resources
Explicitly abandon removal of unreferenced resources if there are any lexical errors in the page's contents. This case always generated a warning, but it now also prevents removal of unreferenced resources, this strongly decreasing the likelihood of data loss.
-rw-r--r--libqpdf/QPDFPageObjectHelper.cc17
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test16
-rw-r--r--qpdf/qtest/qpdf/coalesce-split-1-2.pdf231
-rw-r--r--qpdf/qtest/qpdf/coalesce-split.out10
5 files changed, 272 insertions, 3 deletions
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc
index ba647372..c5ede04f 100644
--- a/libqpdf/QPDFPageObjectHelper.cc
+++ b/libqpdf/QPDFPageObjectHelper.cc
@@ -99,11 +99,16 @@ QPDFPageObjectHelper::addContentTokenFilter(
class NameWatcher: public QPDFObjectHandle::TokenFilter
{
public:
+ NameWatcher() :
+ saw_bad(false)
+ {
+ }
virtual ~NameWatcher()
{
}
virtual void handleToken(QPDFTokenizer::Token const&);
std::set<std::string> names;
+ bool saw_bad;
};
void
@@ -116,6 +121,10 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
this->names.insert(
QPDFObjectHandle::newName(token.getValue()).getName());
}
+ else if (token.getType() == QPDFTokenizer::tt_bad)
+ {
+ saw_bad = true;
+ }
writeToken(token);
}
@@ -134,6 +143,14 @@ QPDFPageObjectHelper::removeUnreferencedResources()
"; not attempting to remove unreferenced objects from this page");
return;
}
+ if (nw.saw_bad)
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
+ this->oh.warnIfPossible(
+ "Bad token found while scanning content stream; "
+ "not attempting to remove unreferenced objects from this page");
+ return;
+ }
// Walk through /Font and /XObject dictionaries, removing any
// resources that are not referenced. We must make copies of
// resource dictionaries down into the dictionaries are mutating
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 58f2cdca..08f82592 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -412,3 +412,4 @@ QPDF copy foreign stream with provider 0
QPDF copy foreign stream with buffer 0
QPDF immediate copy stream data 0
qpdf copy same page more than once 1
+QPDFPageObjectHelper bad token finding names 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index b2b92f2c..44aa5421 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -1384,7 +1384,7 @@ my @sp_cases = (
[11, 'pdf extension', '', 'split-out.Pdf'],
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
);
-$n_tests += 21;
+$n_tests += 23;
for (@sp_cases)
{
$n_tests += 1 + $_->[0];
@@ -1482,10 +1482,20 @@ $td->runtest("split shared font, xobject",
foreach my $i (qw(1 2 3 4))
{
$td->runtest("check output ($i)",
- {$td->FILE => "shared-font-xobject-split-$i.pdf"},
- {$td->FILE => "split-out-shared-font-xobject-$i.pdf"});
+ {$td->FILE => "split-out-shared-font-xobject-$i.pdf"},
+ {$td->FILE => "shared-font-xobject-split-$i.pdf"});
}
+$td->runtest("unreferenced resources with bad token",
+ {$td->COMMAND =>
+ "qpdf --qdf --static-id --split-pages=2" .
+ " coalesce.pdf split-out-bad-token.pdf"},
+ {$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "split-out-bad-token-1-2.pdf"},
+ {$td->FILE => "coalesce-split-1-2.pdf"});
+
show_ntests();
# ----------
$td->notify("--- Keep Files Open ---");
diff --git a/qpdf/qtest/qpdf/coalesce-split-1-2.pdf b/qpdf/qtest/qpdf/coalesce-split-1-2.pdf
new file mode 100644
index 00000000..4542411e
--- /dev/null
+++ b/qpdf/qtest/qpdf/coalesce-split-1-2.pdf
@@ -0,0 +1,231 @@
+%PDF-1.3
+%
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 2
+ /Kids [
+ 3 0 R
+ 4 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents [
+ 5 0 R
+ 7 0 R
+ 9 0 R
+ 11 0 R
+ ]
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 13 0 R
+ >>
+ /ProcSet 14 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 2
+%% Original object ID: 14 0
+4 0 obj
+<<
+ /Contents 15 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 17 0 R
+ >>
+ /ProcSet 18 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 4 0
+5 0 obj
+<<
+ /Length 6 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Pot
+endstream
+endobj
+
+%QDF: ignore_newline
+6 0 obj
+33
+endobj
+
+%% Contents for page 1
+%% Original object ID: 6 0
+7 0 obj
+<<
+ /Length 8 0 R
+>>
+stream
+ato) Tj
+ET [ /array
+endstream
+endobj
+
+%QDF: ignore_newline
+8 0 obj
+19
+endobj
+
+%% Contents for page 1
+%% Original object ID: 8 0
+9 0 obj
+<<
+ /Length 10 0 R
+>>
+stream
+/split ] BI
+/CS /G/W 66/H 47/BPC 8/F/Fl/DP<</Predictor 15/Columns 66>>
+ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt
+endstream
+endobj
+
+%QDF: ignore_newline
+10 0 obj
+253
+endobj
+
+%% Contents for page 1
+%% Original object ID: 10 0
+11 0 obj
+<<
+ /Length 12 0 R
+>>
+stream
+QTt*hUw%)p"DiRjDYNUAvF&
+u#cW ߉WO
+EI
+endstream
+endobj
+
+%QDF: ignore_newline
+12 0 obj
+65
+endobj
+
+%% Original object ID: 12 0
+13 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 13 0
+14 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 2
+%% Original object ID: 15 0
+15 0 obj
+<<
+ /Length 16 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+16 0 obj
+44
+endobj
+
+%% Original object ID: 17 0
+17 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 18 0
+18 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 19
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000252 00000 n
+0000000525 00000 n
+0000000770 00000 n
+0000000880 00000 n
+0000000949 00000 n
+0000001045 00000 n
+0000001114 00000 n
+0000001445 00000 n
+0000001517 00000 n
+0000001661 00000 n
+0000001709 00000 n
+0000001856 00000 n
+0000001943 00000 n
+0000002044 00000 n
+0000002092 00000 n
+0000002239 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 19
+ /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
+>>
+startxref
+2275
+%%EOF
diff --git a/qpdf/qtest/qpdf/coalesce-split.out b/qpdf/qtest/qpdf/coalesce-split.out
new file mode 100644
index 00000000..5e18173c
--- /dev/null
+++ b/qpdf/qtest/qpdf/coalesce-split.out
@@ -0,0 +1,10 @@
+WARNING: coalesce.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page
+WARNING: empty PDF: content normalization encountered bad tokens
+WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+WARNING: empty PDF: content normalization encountered bad tokens
+WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+WARNING: empty PDF: content normalization encountered bad tokens
+WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+qpdf: operation succeeded with warnings; resulting file may have some problems