From bedf35d6a55a9092485d56002b50bc9003ce7931 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 2 Jan 2021 12:05:09 -0500 Subject: Bug fix: avoid extraneous pipeline finish calls with multiple contents Avoid calling finish() multiple times on the pipeline passed to pipeContentStreams. This commit also fixes a bug in which qpdf was not exiting with the proper exit status if warnings found while splitting pages; this was exposed by a test case that changed. --- TODO | 17 ----------------- include/qpdf/QPDFPageObjectHelper.hh | 7 +------ libqpdf/QPDFObjectHandle.cc | 13 +++++++------ qpdf/qpdf.cc | 11 ++++++++--- qpdf/qtest/qpdf/normalize-warnings.out | 3 +++ qpdf/qtest/qpdf/split-tokens-split-1-2.pdf | 19 ++++++++++--------- qpdf/qtest/qpdf/split-tokens-split.out | 3 +++ qpdf/qtest/qpdf/split-tokens.pdf | 19 ++++++++++--------- qpdf/qtest/qpdf/split-tokens.qdf | 19 ++++++++++--------- 9 files changed, 52 insertions(+), 59 deletions(-) diff --git a/TODO b/TODO index 857fb701..0cf8017d 100644 --- a/TODO +++ b/TODO @@ -16,23 +16,6 @@ Candidates for upcoming release definition of an object without breaking compatibility * See if the objects part can be sorted by object number -* QPDFObjectHandle::pipeContentStreams calls finish() after each - stream. In some code paths, Pl_Concatenate is used, which suppresses - that, but in other code paths, it's not used, and the library relies - on the behavior of finish() being called. Then there's the issue of - nested Pl_Concatenate pipelines -- calling manualFinish() on the top - one doesn't call manualFinish() on the lower ones, and there are no - exposed methods that allow us to apply things down the pipeline - stack, so it's hard to fix this without changing the API (at least - making Pipeline::getNext() public, which may be undesirable). To see - this problem in action, stick a Pl_Concatenate in front of the - pipeline in pipeContentStreams and observe the test failure. One - solution might be to add an additional argument indicating whether - or not to delay calling finish() until the end. See comments on - QPDFPageObjectHelper::filterPageContents, - QPDFObjectHandle::filterPageContents, and - QPDFObjectHandle::pipeContentStreams - * Remember to check work `qpdf` project for private issues * file with very slow page extraction * big page even with --remove-unreferenced-resources=yes, even with --empty diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index 93745446..1152a7a5 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // Pipe a page's contents through the given pipeline. This method // works whether the contents are a single stream or an array of - // streams. Call on a page object. Please note that if there is an - // array of content streams, p->finish() is called after each - // stream. If you pass a pipeline that doesn't allow write() to be - // called after finish(), you can wrap it in an instance of - // Pl_Concatenate and then call manualFinish() on the - // Pl_Concatenate pipeline at the end. + // streams. QPDF_DLL void pipePageContents(Pipeline* p); diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index add0b14d..94b81f3d 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -90,13 +89,11 @@ void CoalesceProvider::provideStreamData(int, int, Pipeline* p) { QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data"); - Pl_Concatenate concat("concatenate", p); std::string description = "page object " + QUtil::int_to_string(containing_page.getObjectID()) + " " + QUtil::int_to_string(containing_page.getGeneration()); std::string all_description; - old_contents.pipeContentStreams(&concat, description, all_description); - concat.manualFinish(); + old_contents.pipeContentStreams(p, description, all_description); } void @@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams( arrayOrStreamToStreamArray( description, all_description); bool need_newline = false; + Pl_Buffer buf("concatenated content stream buffer"); for (std::vector::iterator iter = streams.begin(); iter != streams.end(); ++iter) { if (need_newline) { - p->write(QUtil::unsigned_char_pointer("\n"), 1); + buf.write(QUtil::unsigned_char_pointer("\n"), 1); } - LastChar lc(p); + LastChar lc(&buf); QPDFObjectHandle stream = *iter; std::string og = QUtil::int_to_string(stream.getObjectID()) + " " + @@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams( QTC::TC("qpdf", "QPDFObjectHandle need_newline", need_newline ? 0 : 1); } + std::unique_ptr b(buf.getBuffer()); + p->write(b->getBuffer(), b->getSize()); + p->finish(); } void diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 70a5746a..cc02e1fd 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) } } -static void do_split_pages(QPDF& pdf, Options& o) +static void do_split_pages(QPDF& pdf, Options& o, bool& warnings) { // Generate output file pattern std::string before; @@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF& pdf, Options& o) { std::cout << whoami << ": wrote file " << outfile << std::endl; } + if (outpdf.anyWarnings()) + { + warnings = true; + } } } @@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[]) } handle_under_overlay(pdf, o); handle_transformations(pdf, o); + bool split_warnings = false; if ((o.outfilename == 0) && (! o.replace_input)) { @@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[]) } else if (o.split_pages) { - do_split_pages(pdf, o); + do_split_pages(pdf, o, split_warnings); } else { write_outfile(pdf, o); } - if (! pdf.getWarnings().empty()) + if ((! pdf.getWarnings().empty()) || split_warnings) { if (! o.suppress_warnings) { diff --git a/qpdf/qtest/qpdf/normalize-warnings.out b/qpdf/qtest/qpdf/normalize-warnings.out index 287a583c..c89096d4 100644 --- a/qpdf/qtest/qpdf/normalize-warnings.out +++ b/qpdf/qtest/qpdf/normalize-warnings.out @@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 1338): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 1338): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: split-tokens.pdf (offset 1338): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf b/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf index 4542411e..4f9af09f 100644 --- a/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf +++ b/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf @@ -135,12 +135,13 @@ stream QTt*hUw%)p"DiRjDYNUAvF& u#cW ߉WO EI +/Z#00Z endstream endobj %QDF: ignore_newline 12 0 obj -65 +72 endobj %% Original object ID: 12 0 @@ -214,18 +215,18 @@ xref 0000001114 00000 n 0000001445 00000 n 0000001517 00000 n -0000001661 00000 n -0000001709 00000 n -0000001856 00000 n -0000001943 00000 n -0000002044 00000 n -0000002092 00000 n -0000002239 00000 n +0000001668 00000 n +0000001716 00000 n +0000001863 00000 n +0000001950 00000 n +0000002051 00000 n +0000002099 00000 n +0000002246 00000 n trailer << /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> startxref -2275 +2282 %%EOF diff --git a/qpdf/qtest/qpdf/split-tokens-split.out b/qpdf/qtest/qpdf/split-tokens-split.out index 0a76a46a..7b940622 100644 --- a/qpdf/qtest/qpdf/split-tokens-split.out +++ b/qpdf/qtest/qpdf/split-tokens-split.out @@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef WARNING: empty PDF: content normalization encountered bad tokens WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: empty PDF: content normalization encountered bad tokens +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/split-tokens.pdf b/qpdf/qtest/qpdf/split-tokens.pdf index ba5d959b..cab00950 100644 --- a/qpdf/qtest/qpdf/split-tokens.pdf +++ b/qpdf/qtest/qpdf/split-tokens.pdf @@ -126,12 +126,13 @@ endobj stream QTt*hUw%)p"DiRjDYNUAvF& u#cW ߉WO EI +/Z#00Z endstream endobj %QDF: ignore_newline 12 0 obj -66 +73 endobj 13 0 obj @@ -200,18 +201,18 @@ xref 0000000924 00000 n 0000001255 00000 n 0000001299 00000 n -0000001444 00000 n -0000001464 00000 n -0000001583 00000 n -0000001642 00000 n -0000001743 00000 n -0000001763 00000 n -0000001882 00000 n +0000001451 00000 n +0000001471 00000 n +0000001590 00000 n +0000001649 00000 n +0000001750 00000 n +0000001770 00000 n +0000001889 00000 n trailer << /Root 1 0 R /Size 19 /ID [<6af379f20e8dcd4e724869daec3ba023>] >> startxref -1918 +1925 %%EOF diff --git a/qpdf/qtest/qpdf/split-tokens.qdf b/qpdf/qtest/qpdf/split-tokens.qdf index 5007dc12..141417dd 100644 --- a/qpdf/qtest/qpdf/split-tokens.qdf +++ b/qpdf/qtest/qpdf/split-tokens.qdf @@ -135,12 +135,13 @@ stream QTt*hUw%)p"DiRjDYNUAvF& u#cW ߉WO EI +/Z#00Z endstream endobj %QDF: ignore_newline 12 0 obj -65 +72 endobj %% Original object ID: 13 0 @@ -214,18 +215,18 @@ xref 0000001113 00000 n 0000001444 00000 n 0000001516 00000 n -0000001660 00000 n -0000001708 00000 n -0000001855 00000 n -0000001942 00000 n -0000002043 00000 n -0000002091 00000 n -0000002238 00000 n +0000001667 00000 n +0000001715 00000 n +0000001862 00000 n +0000001949 00000 n +0000002050 00000 n +0000002098 00000 n +0000002245 00000 n trailer << /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795>] >> startxref -2274 +2281 %%EOF -- cgit v1.2.3-70-g09d2