aboutsummaryrefslogtreecommitdiffstats
path: root/qpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-03 03:16:40 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:47 +0100
commit5136238f2a973f693cea53c340dcff23a655531f (patch)
tree8cc1d2a1fdf1833fa67454b2707994b3328c879c /qpdf
parent30709935af023dd66a17f2d494aa7dc84b7177e1 (diff)
downloadqpdf-5136238f2a973f693cea53c340dcff23a655531f.tar.zst
Detect and report bad tokens in content normalization
Diffstat (limited to 'qpdf')
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test10
-rw-r--r--qpdf/qtest/qpdf/coalesce.qdf231
-rw-r--r--qpdf/qtest/qpdf/good14.out17
-rw-r--r--qpdf/qtest/qpdf/normalize-warnings.out9
5 files changed, 265 insertions, 3 deletions
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index a1ce662d..2c51867f 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -306,3 +306,4 @@ Pl_QPDFTokenizer found ID 0
QPDFObjectHandle non-stream in stream array 0
QPDFObjectHandle coalesce called on stream 0
QPDFObjectHandle coalesce provide stream data 0
+QPDF_Stream bad token at end during normalize 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index a3572859..45c750fd 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -737,8 +737,16 @@ $td->runtest("stream with tiff predictor",
show_ntests();
# ----------
$td->notify("--- Coalesce contents ---");
-$n_tests += 4;
+$n_tests += 6;
+$td->runtest("qdf with normalize warnings",
+ {$td->COMMAND =>
+ "qpdf --qdf --static-id coalesce.pdf a.pdf"},
+ {$td->FILE => "normalize-warnings.out", $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "coalesce.qdf"});
$td->runtest("coalesce contents with qdf",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
diff --git a/qpdf/qtest/qpdf/coalesce.qdf b/qpdf/qtest/qpdf/coalesce.qdf
new file mode 100644
index 00000000..5007dc12
--- /dev/null
+++ b/qpdf/qtest/qpdf/coalesce.qdf
@@ -0,0 +1,231 @@
+%PDF-1.3
+%
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 2
+ /Kids [
+ 3 0 R
+ 4 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents [
+ 5 0 R
+ 7 0 R
+ 9 0 R
+ 11 0 R
+ ]
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 13 0 R
+ >>
+ /ProcSet 14 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 2
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Contents 15 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 17 0 R
+ >>
+ /ProcSet 18 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 5 0
+5 0 obj
+<<
+ /Length 6 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Pot
+endstream
+endobj
+
+%QDF: ignore_newline
+6 0 obj
+33
+endobj
+
+%% Contents for page 1
+%% Original object ID: 7 0
+7 0 obj
+<<
+ /Length 8 0 R
+>>
+stream
+ato) Tj
+ET [ /array
+endstream
+endobj
+
+%QDF: ignore_newline
+8 0 obj
+19
+endobj
+
+%% Contents for page 1
+%% Original object ID: 9 0
+9 0 obj
+<<
+ /Length 10 0 R
+>>
+stream
+/split ] BI
+/CS /G/W 66/H 47/BPC 8/F/Fl/DP<</Predictor 15/Columns 66>>
+ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt
+endstream
+endobj
+
+%QDF: ignore_newline
+10 0 obj
+253
+endobj
+
+%% Contents for page 1
+%% Original object ID: 11 0
+11 0 obj
+<<
+ /Length 12 0 R
+>>
+stream
+QTt*hUw%)p"DiRjDYNUAvF&
+u#cW ߉WO
+EI
+endstream
+endobj
+
+%QDF: ignore_newline
+12 0 obj
+65
+endobj
+
+%% Original object ID: 13 0
+13 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 14 0
+14 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 2
+%% Original object ID: 15 0
+15 0 obj
+<<
+ /Length 16 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+16 0 obj
+44
+endobj
+
+%% Original object ID: 17 0
+17 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 18 0
+18 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 19
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000252 00000 n
+0000000524 00000 n
+0000000769 00000 n
+0000000879 00000 n
+0000000948 00000 n
+0000001044 00000 n
+0000001113 00000 n
+0000001444 00000 n
+0000001516 00000 n
+0000001660 00000 n
+0000001708 00000 n
+0000001855 00000 n
+0000001942 00000 n
+0000002043 00000 n
+0000002091 00000 n
+0000002238 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 19
+ /ID [<fa46a90bcf56476b9904a2e7adb75024><31415926535897932384626433832795>]
+>>
+startxref
+2274
+%%EOF
diff --git a/qpdf/qtest/qpdf/good14.out b/qpdf/qtest/qpdf/good14.out
index 87819670..84bf7133 100644
--- a/qpdf/qtest/qpdf/good14.out
+++ b/qpdf/qtest/qpdf/good14.out
@@ -13,7 +13,9 @@ three lines
<8a8b>
(ab)
<8c><dd> ) >
-<610062> (MOO)-- stream 1 --
+<610062> (MOO)WARNING: good14.pdf (file position 628): content normalization encountered bad tokens
+WARNING: good14.pdf (file position 628): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+-- stream 1 --
This stream does end with a newline.
// tests:
// bad tokens preserved
@@ -31,10 +33,18 @@ This stream does end with a newline.
/good name
/bad#00name
+WARNING: good14.pdf (file position 860): content normalization encountered bad tokens
+WARNING: good14.pdf (file position 860): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
-- stream 2 --
(This stream ends with a \001 bad token
+WARNING: good14.pdf (file position 1316): content normalization encountered bad tokens
+WARNING: good14.pdf (file position 1316): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: good14.pdf (file position 1316): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
-- stream 3 --
-<AB X-- stream 4 --
+<AB XWARNING: good14.pdf (file position 1406): content normalization encountered bad tokens
+WARNING: good14.pdf (file position 1406): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: good14.pdf (file position 1406): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+-- stream 4 --
(ends with a name)
/ThisMustBeLast-- stream 5 --
% This stream has an inline image marker that is not terminated
@@ -44,4 +54,7 @@ BI
ID
<506f7
461746f>
+WARNING: good14.pdf (file position 1549): content normalization encountered bad tokens
+WARNING: good14.pdf (file position 1549): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: good14.pdf (file position 1549): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
test 3 done
diff --git a/qpdf/qtest/qpdf/normalize-warnings.out b/qpdf/qtest/qpdf/normalize-warnings.out
new file mode 100644
index 00000000..73947b1a
--- /dev/null
+++ b/qpdf/qtest/qpdf/normalize-warnings.out
@@ -0,0 +1,9 @@
+WARNING: coalesce.pdf (file position 671): content normalization encountered bad tokens
+WARNING: coalesce.pdf (file position 671): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: coalesce.pdf (file position 671): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+WARNING: coalesce.pdf (file position 823): content normalization encountered bad tokens
+WARNING: coalesce.pdf (file position 823): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+WARNING: coalesce.pdf (file position 962): content normalization encountered bad tokens
+WARNING: coalesce.pdf (file position 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
+WARNING: coalesce.pdf (file position 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
+qpdf: operation succeeded with warnings; resulting file may have some problems