From b30deaeeaba3941d7615bc2cc89c664b1273e5df Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 23 Oct 2020 06:40:27 -0400 Subject: Avoid merging adjacent tokens when concatenating contents (fixes #444) --- ChangeLog | 6 + TODO | 1 - libqpdf/QPDFObjectHandle.cc | 53 ++++++- manual/qpdf-manual.xml | 39 ++--- qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf.test | 18 ++- qpdf/qtest/qpdf/coalesce-out.pdf | Bin 1623 -> 2951 bytes qpdf/qtest/qpdf/coalesce-out.qdf | Bin 2192 -> 3520 bytes qpdf/qtest/qpdf/coalesce-split-1-2.pdf | 231 ----------------------------- qpdf/qtest/qpdf/coalesce-split.out | 10 -- qpdf/qtest/qpdf/coalesce.pdf | Bin 2445 -> 3769 bytes qpdf/qtest/qpdf/coalesce.qdf | Bin 2801 -> 4126 bytes qpdf/qtest/qpdf/normalize-warnings.out | 16 +- qpdf/qtest/qpdf/split-tokens-split-1-2.pdf | 231 +++++++++++++++++++++++++++++ qpdf/qtest/qpdf/split-tokens-split.out | 10 ++ qpdf/qtest/qpdf/split-tokens.pdf | 217 +++++++++++++++++++++++++++ qpdf/qtest/qpdf/split-tokens.qdf | 231 +++++++++++++++++++++++++++++ qpdf/qtest/qpdf/token-filters-out.pdf | Bin 2178 -> 3505 bytes 18 files changed, 781 insertions(+), 283 deletions(-) delete mode 100644 qpdf/qtest/qpdf/coalesce-split-1-2.pdf delete mode 100644 qpdf/qtest/qpdf/coalesce-split.out create mode 100644 qpdf/qtest/qpdf/split-tokens-split-1-2.pdf create mode 100644 qpdf/qtest/qpdf/split-tokens-split.out create mode 100644 qpdf/qtest/qpdf/split-tokens.pdf create mode 100644 qpdf/qtest/qpdf/split-tokens.qdf diff --git a/ChangeLog b/ChangeLog index f7ba4f6a..fd057636 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2020-10-23 Jay Berkenbilt + * Bug fix: when concatenating content streams, insert a newline if + needed to prevent the last token from the old stream from being + merged with the first token of the new stream. Qpdf was mistakenly + concatenating the streams without regard to the specification that + content streams are to be broken on token boundaries. Fixes #444. + * Bug fix: fix-qdf: properly handle empty streams with ignore newline. diff --git a/TODO b/TODO index cd6f4c88..2e3898ff 100644 --- a/TODO +++ b/TODO @@ -4,7 +4,6 @@ Candidates for upcoming release * Open "next" issues * bugs * #473: zsh completion with directories - * #444: concatenated stream/whitespace bug * Non-bugs * #446: recognize edited QDF files * #436: parsing of document with form xobject diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 85493680..472ff4e8 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -165,6 +165,47 @@ QPDFObjectHandle::ParserCallbacks::terminateParsing() throw TerminateParsing(); } +class LastChar: public Pipeline +{ + public: + LastChar(Pipeline* next); + virtual ~LastChar() = default; + virtual void write(unsigned char* data, size_t len); + virtual void finish(); + unsigned char getLastChar(); + + private: + unsigned char last_char; +}; + +LastChar::LastChar(Pipeline* next) : + Pipeline("lastchar", next), + last_char(0) +{ +} + +void +LastChar::write(unsigned char* data, size_t len) +{ + if (len > 0) + { + this->last_char = data[len - 1]; + } + getNext()->write(data, len); +} + +void +LastChar::finish() +{ + getNext()->finish(); +} + +unsigned char +LastChar::getLastChar() +{ + return this->last_char; +} + QPDFObjectHandle::QPDFObjectHandle() : initialized(false), qpdf(0), @@ -1600,21 +1641,31 @@ QPDFObjectHandle::pipeContentStreams( std::vector streams = arrayOrStreamToStreamArray( description, all_description); + bool need_newline = false; for (std::vector::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + if (need_newline) + { + p->write(QUtil::unsigned_char_pointer("\n"), 1); + } + LastChar lc(p); QPDFObjectHandle stream = *iter; std::string og = QUtil::int_to_string(stream.getObjectID()) + " " + QUtil::int_to_string(stream.getGeneration()); std::string w_description = "content stream object " + og; - if (! stream.pipeStreamData(p, 0, qpdf_dl_specialized)) + if (! stream.pipeStreamData(&lc, 0, qpdf_dl_specialized)) { QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); throw QPDFExc(qpdf_e_damaged_pdf, "content stream", w_description, 0, "errors while decoding content stream"); } + lc.finish(); + need_newline = (lc.getLastChar() != static_cast('\n')); + QTC::TC("qpdf", "QPDFObjectHandle need_newline", + need_newline ? 0 : 1); } } diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 866a5016..659fbd08 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -2090,14 +2090,9 @@ outfile.pdf option causes qpdf to combine them into a single stream. Use of this option is never necessary for ordinary usage, but it can help when working with some files in some cases. For - example, some PDF writers split page contents into small - streams at arbitrary points that may fall in the middle of - lexical tokens within the content, and some PDF readers may - get confused on such files. If you use qpdf to coalesce the - content streams, such readers may be able to work with the - file more easily. This can also be combined with QDF mode or - content normalization to make it easier to look at all of a - page's contents at once. + example, this can also be combined with QDF mode or content + normalization to make it easier to look at all of a page's + contents at once. @@ -2398,25 +2393,15 @@ outfile.pdf You should not use this for “production” PDF files. - This paragraph discusses edge cases of content normalization that - are not of concern to most users and are not relevant when content - normalization is not enabled. When normalizing content, if qpdf - runs into any lexical errors, it will print a warning indicating - that content may be damaged. The only situation in which qpdf is - known to cause damage during content normalization is when a - page's contents are split across multiple streams and streams are - split in the middle of a lexical token such as a string, name, or - inline image. There may be some pathological cases in which qpdf - could damage content without noticing this, such as if the partial - tokens at the end of one stream and the beginning of the next - stream are both valid, but usually qpdf will be able to detect - this case. For slightly increased safety, you can specify - in addition to - or . - This will cause qpdf to combine all the content streams into one, - thus recombining any split tokens. However doing this will prevent - you from being able to see the original layout of the content - streams. If you must inspect the original content streams in an + When normalizing content, if qpdf runs into any lexical errors, it + will print a warning indicating that content may be damaged. The + only situation in which qpdf is known to cause damage during + content normalization is when a page's contents are split across + multiple streams and streams are split in the middle of a lexical + token such as a string, name, or inline image. Note that files + that do this are invalid since the PDF specification states that + content streams are not to be split in the middle of a token. If + you want to inspect the original content streams in an uncompressed format, you can always run with for a QDF file without content normalization, or alternatively diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 621ec53a..ced20279 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -455,3 +455,4 @@ qpdf found shared resources in leaf 0 qpdf found shared xobject in leaf 0 QPDF copy foreign with data 1 QPDF copy foreign with foreign_stream 1 +QPDFObjectHandle need_newline 1 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a0ff2a57..75021b56 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1591,13 +1591,21 @@ $td->runtest("type checks with object streams", # ---------- $td->notify("--- Coalesce contents ---"); -$n_tests += 6; +$n_tests += 8; $td->runtest("qdf with normalize warnings", {$td->COMMAND => - "qpdf --qdf --static-id coalesce.pdf a.pdf"}, + "qpdf --qdf --static-id split-tokens.pdf a.pdf"}, {$td->FILE => "normalize-warnings.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "split-tokens.qdf"}); +$td->runtest("coalesce to qdf", + {$td->COMMAND => + "qpdf --qdf --static-id coalesce.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "coalesce.qdf"}); @@ -1831,12 +1839,12 @@ $td->runtest("unreferenced resources with bad token", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=2" . " --remove-unreferenced-resources=yes" . - " coalesce.pdf split-out-bad-token.pdf"}, - {$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3}, + " split-tokens.pdf split-out-bad-token.pdf"}, + {$td->FILE => "split-tokens-split.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "split-out-bad-token-1-2.pdf"}, - {$td->FILE => "coalesce-split-1-2.pdf"}); + {$td->FILE => "split-tokens-split-1-2.pdf"}); $td->runtest("shared images in form xobject", {$td->COMMAND => "qpdf --qdf --static-id --split-pages". diff --git a/qpdf/qtest/qpdf/coalesce-out.pdf b/qpdf/qtest/qpdf/coalesce-out.pdf index 78505aba..a0dae39d 100644 Binary files a/qpdf/qtest/qpdf/coalesce-out.pdf and b/qpdf/qtest/qpdf/coalesce-out.pdf differ diff --git a/qpdf/qtest/qpdf/coalesce-out.qdf b/qpdf/qtest/qpdf/coalesce-out.qdf index 9a7129f3..822fdd17 100644 Binary files a/qpdf/qtest/qpdf/coalesce-out.qdf and b/qpdf/qtest/qpdf/coalesce-out.qdf differ diff --git a/qpdf/qtest/qpdf/coalesce-split-1-2.pdf b/qpdf/qtest/qpdf/coalesce-split-1-2.pdf deleted file mode 100644 index 4542411e..00000000 --- a/qpdf/qtest/qpdf/coalesce-split-1-2.pdf +++ /dev/null @@ -1,231 +0,0 @@ -%PDF-1.3 -% -%QDF-1.0 - -%% Original object ID: 1 0 -1 0 obj -<< - /Pages 2 0 R - /Type /Catalog ->> -endobj - -%% Original object ID: 2 0 -2 0 obj -<< - /Count 2 - /Kids [ - 3 0 R - 4 0 R - ] - /Type /Pages ->> -endobj - -%% Page 1 -%% Original object ID: 3 0 -3 0 obj -<< - /Contents [ - 5 0 R - 7 0 R - 9 0 R - 11 0 R - ] - /MediaBox [ - 0 - 0 - 612 - 792 - ] - /Parent 2 0 R - /Resources << - /Font << - /F1 13 0 R - >> - /ProcSet 14 0 R - >> - /Type /Page ->> -endobj - -%% Page 2 -%% Original object ID: 14 0 -4 0 obj -<< - /Contents 15 0 R - /MediaBox [ - 0 - 0 - 612 - 792 - ] - /Parent 2 0 R - /Resources << - /Font << - /F1 17 0 R - >> - /ProcSet 18 0 R - >> - /Type /Page ->> -endobj - -%% Contents for page 1 -%% Original object ID: 4 0 -5 0 obj -<< - /Length 6 0 R ->> -stream -BT - /F1 24 Tf - 72 720 Td - (Pot -endstream -endobj - -%QDF: ignore_newline -6 0 obj -33 -endobj - -%% Contents for page 1 -%% Original object ID: 6 0 -7 0 obj -<< - /Length 8 0 R ->> -stream -ato) Tj -ET [ /array -endstream -endobj - -%QDF: ignore_newline -8 0 obj -19 -endobj - -%% Contents for page 1 -%% Original object ID: 8 0 -9 0 obj -<< - /Length 10 0 R ->> -stream -/split ] BI -/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> -ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt -endstream -endobj - -%QDF: ignore_newline -10 0 obj -253 -endobj - -%% Contents for page 1 -%% Original object ID: 10 0 -11 0 obj -<< - /Length 12 0 R ->> -stream -QTt*hUw%)p"DiRjDYNUAvF& -u#cW ߉WO -EI -endstream -endobj - -%QDF: ignore_newline -12 0 obj -65 -endobj - -%% Original object ID: 12 0 -13 0 obj -<< - /BaseFont /Helvetica - /Encoding /WinAnsiEncoding - /Name /F1 - /Subtype /Type1 - /Type /Font ->> -endobj - -%% Original object ID: 13 0 -14 0 obj -[ - /PDF - /Text -] -endobj - -%% Contents for page 2 -%% Original object ID: 15 0 -15 0 obj -<< - /Length 16 0 R ->> -stream -BT - /F1 24 Tf - 72 720 Td - (Potato) Tj -ET -endstream -endobj - -16 0 obj -44 -endobj - -%% Original object ID: 17 0 -17 0 obj -<< - /BaseFont /Helvetica - /Encoding /WinAnsiEncoding - /Name /F1 - /Subtype /Type1 - /Type /Font ->> -endobj - -%% Original object ID: 18 0 -18 0 obj -[ - /PDF - /Text -] -endobj - -xref -0 19 -0000000000 65535 f -0000000052 00000 n -0000000133 00000 n -0000000252 00000 n -0000000525 00000 n -0000000770 00000 n -0000000880 00000 n -0000000949 00000 n -0000001045 00000 n -0000001114 00000 n -0000001445 00000 n -0000001517 00000 n -0000001661 00000 n -0000001709 00000 n -0000001856 00000 n -0000001943 00000 n -0000002044 00000 n -0000002092 00000 n -0000002239 00000 n -trailer << - /Root 1 0 R - /Size 19 - /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] ->> -startxref -2275 -%%EOF diff --git a/qpdf/qtest/qpdf/coalesce-split.out b/qpdf/qtest/qpdf/coalesce-split.out deleted file mode 100644 index 5e18173c..00000000 --- a/qpdf/qtest/qpdf/coalesce-split.out +++ /dev/null @@ -1,10 +0,0 @@ -WARNING: coalesce.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page -WARNING: empty PDF: content normalization encountered bad tokens -WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: empty PDF: content normalization encountered bad tokens -WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: empty PDF: content normalization encountered bad tokens -WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/coalesce.pdf b/qpdf/qtest/qpdf/coalesce.pdf index ba5d959b..4fa99202 100644 Binary files a/qpdf/qtest/qpdf/coalesce.pdf and b/qpdf/qtest/qpdf/coalesce.pdf differ diff --git a/qpdf/qtest/qpdf/coalesce.qdf b/qpdf/qtest/qpdf/coalesce.qdf index 5007dc12..ab5b08cc 100644 Binary files a/qpdf/qtest/qpdf/coalesce.qdf and b/qpdf/qtest/qpdf/coalesce.qdf differ diff --git a/qpdf/qtest/qpdf/normalize-warnings.out b/qpdf/qtest/qpdf/normalize-warnings.out index 57f038f4..287a583c 100644 --- a/qpdf/qtest/qpdf/normalize-warnings.out +++ b/qpdf/qtest/qpdf/normalize-warnings.out @@ -1,9 +1,9 @@ -WARNING: coalesce.pdf (offset 671): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 671): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: coalesce.pdf (offset 671): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: coalesce.pdf (offset 823): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 823): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: coalesce.pdf (offset 962): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: coalesce.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 671): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 671): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: split-tokens.pdf (offset 671): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 823): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf b/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf new file mode 100644 index 00000000..4542411e --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf @@ -0,0 +1,231 @@ +%PDF-1.3 +% +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 2 + /Kids [ + 3 0 R + 4 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents [ + 5 0 R + 7 0 R + 9 0 R + 11 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 14 0 +4 0 obj +<< + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 17 0 R + >> + /ProcSet 18 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +5 0 obj +<< + /Length 6 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Pot +endstream +endobj + +%QDF: ignore_newline +6 0 obj +33 +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +7 0 obj +<< + /Length 8 0 R +>> +stream +ato) Tj +ET [ /array +endstream +endobj + +%QDF: ignore_newline +8 0 obj +19 +endobj + +%% Contents for page 1 +%% Original object ID: 8 0 +9 0 obj +<< + /Length 10 0 R +>> +stream +/split ] BI +/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> +ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt +endstream +endobj + +%QDF: ignore_newline +10 0 obj +253 +endobj + +%% Contents for page 1 +%% Original object ID: 10 0 +11 0 obj +<< + /Length 12 0 R +>> +stream +QTt*hUw%)p"DiRjDYNUAvF& +u#cW ߉WO +EI +endstream +endobj + +%QDF: ignore_newline +12 0 obj +65 +endobj + +%% Original object ID: 12 0 +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 13 0 +14 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +%% Original object ID: 15 0 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +16 0 obj +44 +endobj + +%% Original object ID: 17 0 +17 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 18 0 +18 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 19 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000252 00000 n +0000000525 00000 n +0000000770 00000 n +0000000880 00000 n +0000000949 00000 n +0000001045 00000 n +0000001114 00000 n +0000001445 00000 n +0000001517 00000 n +0000001661 00000 n +0000001709 00000 n +0000001856 00000 n +0000001943 00000 n +0000002044 00000 n +0000002092 00000 n +0000002239 00000 n +trailer << + /Root 1 0 R + /Size 19 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +2275 +%%EOF diff --git a/qpdf/qtest/qpdf/split-tokens-split.out b/qpdf/qtest/qpdf/split-tokens-split.out new file mode 100644 index 00000000..0a76a46a --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens-split.out @@ -0,0 +1,10 @@ +WARNING: split-tokens.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page +WARNING: empty PDF: content normalization encountered bad tokens +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: empty PDF: content normalization encountered bad tokens +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: empty PDF: content normalization encountered bad tokens +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/split-tokens.pdf b/qpdf/qtest/qpdf/split-tokens.pdf new file mode 100644 index 00000000..ba5d959b --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens.pdf @@ -0,0 +1,217 @@ +%PDF-1.3 +% +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 2 + /Kids [ + 3 0 R + 4 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents [ + 5 0 R + 7 0 R + 9 0 R + 11 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +4 0 obj +<< + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 17 0 R + >> + /ProcSet 18 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +5 0 obj +<< + /Length 6 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Pot +endstream +endobj + +%QDF: ignore_newline +6 0 obj +33 +endobj + +%% Contents for page 1 +7 0 obj +<< + /Length 8 0 R +>> +stream +ato) Tj +ET [ /array +endstream +endobj + +%QDF: ignore_newline +8 0 obj +19 +endobj + +%% Contents for page 1 +9 0 obj +<< + /Length 10 0 R +>> +stream +/split ] BI +/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> +ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt +endstream +endobj + +%QDF: ignore_newline +10 0 obj +253 +endobj + +%% Contents for page 1 +11 0 obj +<< + /Length 12 0 R +>> +stream +QTt*hUw%)p"DiRjDYNUAvF& u#cW ߉WO +EI +endstream +endobj + +%QDF: ignore_newline +12 0 obj +66 +endobj + +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +14 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +16 0 obj +44 +endobj + +17 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +18 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 19 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000171 00000 n +0000000416 00000 n +0000000634 00000 n +0000000744 00000 n +0000000786 00000 n +0000000882 00000 n +0000000924 00000 n +0000001255 00000 n +0000001299 00000 n +0000001444 00000 n +0000001464 00000 n +0000001583 00000 n +0000001642 00000 n +0000001743 00000 n +0000001763 00000 n +0000001882 00000 n +trailer << + /Root 1 0 R + /Size 19 + /ID [<6af379f20e8dcd4e724869daec3ba023>] +>> +startxref +1918 +%%EOF diff --git a/qpdf/qtest/qpdf/split-tokens.qdf b/qpdf/qtest/qpdf/split-tokens.qdf new file mode 100644 index 00000000..5007dc12 --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens.qdf @@ -0,0 +1,231 @@ +%PDF-1.3 +% +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 2 + /Kids [ + 3 0 R + 4 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents [ + 5 0 R + 7 0 R + 9 0 R + 11 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 17 0 R + >> + /ProcSet 18 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 5 0 +5 0 obj +<< + /Length 6 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Pot +endstream +endobj + +%QDF: ignore_newline +6 0 obj +33 +endobj + +%% Contents for page 1 +%% Original object ID: 7 0 +7 0 obj +<< + /Length 8 0 R +>> +stream +ato) Tj +ET [ /array +endstream +endobj + +%QDF: ignore_newline +8 0 obj +19 +endobj + +%% Contents for page 1 +%% Original object ID: 9 0 +9 0 obj +<< + /Length 10 0 R +>> +stream +/split ] BI +/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> +ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt +endstream +endobj + +%QDF: ignore_newline +10 0 obj +253 +endobj + +%% Contents for page 1 +%% Original object ID: 11 0 +11 0 obj +<< + /Length 12 0 R +>> +stream +QTt*hUw%)p"DiRjDYNUAvF& +u#cW ߉WO +EI +endstream +endobj + +%QDF: ignore_newline +12 0 obj +65 +endobj + +%% Original object ID: 13 0 +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 14 0 +14 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +%% Original object ID: 15 0 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +16 0 obj +44 +endobj + +%% Original object ID: 17 0 +17 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 18 0 +18 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 19 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000252 00000 n +0000000524 00000 n +0000000769 00000 n +0000000879 00000 n +0000000948 00000 n +0000001044 00000 n +0000001113 00000 n +0000001444 00000 n +0000001516 00000 n +0000001660 00000 n +0000001708 00000 n +0000001855 00000 n +0000001942 00000 n +0000002043 00000 n +0000002091 00000 n +0000002238 00000 n +trailer << + /Root 1 0 R + /Size 19 + /ID [<31415926535897932384626433832795>] +>> +startxref +2274 +%%EOF diff --git a/qpdf/qtest/qpdf/token-filters-out.pdf b/qpdf/qtest/qpdf/token-filters-out.pdf index 6d24497c..8f5f14c3 100644 Binary files a/qpdf/qtest/qpdf/token-filters-out.pdf and b/qpdf/qtest/qpdf/token-filters-out.pdf differ -- cgit v1.2.3-54-g00ecf