diff options
author | Jay Berkenbilt <ejb@ql.org> | 2019-08-24 01:59:38 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2019-08-24 02:34:21 +0200 |
commit | 2794bfb1a665cad93a38144bea0ba0daea7152e7 (patch) | |
tree | e83256473254f4935de0477d784a2123a7828d27 | |
parent | dac0598b94c877bec92a1edd78ae00021cfa1638 (diff) | |
download | qpdf-2794bfb1a665cad93a38144bea0ba0daea7152e7.tar.zst |
Add flags to control zlib compression level (fixes #113)
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | include/qpdf/QPDFWriter.hh | 17 | ||||
-rw-r--r-- | libqpdf/QPDFWriter.cc | 20 | ||||
-rw-r--r-- | manual/build.mk | 3 | ||||
-rw-r--r-- | manual/qpdf-manual.xml | 98 | ||||
-rw-r--r-- | qpdf/qpdf.cc | 36 | ||||
-rw-r--r-- | qpdf/qtest/qpdf.test | 12 | ||||
-rw-r--r-- | qpdf/qtest/qpdf/minimal-1.pdf | bin | 0 -> 750 bytes | |||
-rw-r--r-- | qpdf/qtest/qpdf/minimal-9.pdf | bin | 0 -> 743 bytes |
9 files changed, 178 insertions, 22 deletions
@@ -1,8 +1,22 @@ 2019-08-23 Jay Berkenbilt <ejb@ql.org> + * Add --recompress-streams option to qpdf and + QPDFWriter::setRecompressFlate to cause QPDFWriter to recompress + streams that are already compressed with /FlateDecode. + * Add option Pl_Flate::setCompressionLevel to globally set the zlib compression level used by all Pl_Flate pipelines. + * Add --compression-level flag to qpdf to set the zlib compression + level. When combined with --recompress-flate, this will cause most + of qpdf's streams to use the maximum compression level. This + results in only a very small amount of savings in size that comes + at a fairly significant performance cost, but it could be useful + for archival files or other cases where every byte counts and + creation time doesn't matter so much. Note that using + --object-streams=generate in combination with these options gives + you the biggest advantage. Fixes #113. + 2019-08-22 Jay Berkenbilt <ejb@ql.org> * In QPDFObjectHandle::ParserCallbacks, in addition to diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 860b0630..0fd114db 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -189,10 +189,11 @@ class QPDFWriter // filters on the input. When combined with // setCompressStreams(true), which the default, the effect of this // is that streams filtered with these older and less efficient - // filters will be recompressed with the Flate filter. As a - // special case, if a stream is already compressed with + // filters will be recompressed with the Flate filter. By default, + // as a special case, if a stream is already compressed with // FlateDecode and setCompressStreams is enabled, the original - // compressed data will be preserved. + // compressed data will be preserved. This behavior can be + // overridden by calling setRecompressFlate(true). // // qpdf_dl_specialized: In addition to uncompressing the // generalized compression formats, supported non-lossy @@ -209,6 +210,15 @@ class QPDFWriter QPDF_DLL void setDecodeLevel(qpdf_stream_decode_level_e); + // By default, when both the input and output contents of a stream + // are compressed with Flate, qpdf does not uncompress and + // recompress the stream. Passing true here causes it to do so. + // This can be useful if recompressing all streams with a higher + // compression level, which can be set by calling the static + // method Pl_Flate::setCompressionLevel. + QPDF_DLL + void setRecompressFlate(bool); + // Set value of content stream normalization. The default is // "false". If true, we attempt to normalize newlines inside of // content streams. Some constructs such as inline images may @@ -597,6 +607,7 @@ class QPDFWriter bool compress_streams_set; qpdf_stream_decode_level_e stream_decode_level; bool stream_decode_level_set; + bool recompress_flate; bool qdf_mode; bool preserve_unreferenced_objects; bool newline_before_endstream; diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 6c92338d..30bc1fcb 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -37,6 +37,7 @@ QPDFWriter::Members::Members(QPDF& pdf) : compress_streams_set(false), stream_decode_level(qpdf_dl_none), stream_decode_level_set(false), + recompress_flate(false), qdf_mode(false), preserve_unreferenced_objects(false), newline_before_endstream(false), @@ -207,6 +208,12 @@ QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) } void +QPDFWriter::setRecompressFlate(bool val) +{ + this->m->recompress_flate = val; +} + +void QPDFWriter::setContentNormalization(bool val) { this->m->normalize_content_set = true; @@ -1716,13 +1723,14 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, if (this->m->compress_streams) { // Don't filter if the stream is already compressed with - // FlateDecode. We don't want to make it worse by getting - // rid of a predictor or otherwise messing with it. We - // should also avoid messing with anything that's - // compressed with a lossy compression scheme, but we - // don't support any of those right now. + // FlateDecode. This way we don't make it worse if the + // original file used a better Flate algorithm, and we + // don't spend time and CPU cycles uncompressing and + // recompressing stuff. This can be overridden with + // setRecompressFlate(true). QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); - if ((! object.isDataModified()) && + if ((! this->m->recompress_flate) && + (! object.isDataModified()) && filter_obj.isName() && ((filter_obj.getName() == "/FlateDecode") || (filter_obj.getName() == "/Fl"))) diff --git a/manual/build.mk b/manual/build.mk index 03e8fe56..3911b8e2 100644 --- a/manual/build.mk +++ b/manual/build.mk @@ -26,7 +26,8 @@ endif $(OUTDOC).pdf: $(OUTDOC).fo qpdf/build/qpdf $(FOP) $< -pdf $@.tmp - qpdf/build/qpdf --linearize $@.tmp $@ + qpdf/build/qpdf --linearize --object-streams=generate \ + --recompress-flate --compression-level=9 $@.tmp $@ $(OUTDOC).html: $(INDOC).xml manual/html.xsl $(VALIDATE) $(XSLTPROC) --output $@ manual/html.xsl $< diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index db2a54fa..6e72456e 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -1433,27 +1433,32 @@ outfile.pdf</option> <listitem> <para> <option>generalized</option>: decode streams filtered with - supported generalized filters: <option>/LZWDecode</option>, - <option>/FlateDecode</option>, - <option>/ASCII85Decode</option>, and - <option>/ASCIIHexDecode</option>. We define generalized + supported generalized filters: + <literal>/LZWDecode</literal>, + <literal>/FlateDecode</literal>, + <literal>/ASCII85Decode</literal>, and + <literal>/ASCIIHexDecode</literal>. We define generalized filters as those to be used for general-purpose compression or encoding, as opposed to filters specifically designed - for image data. + for image data. Note that, by default, streams already + compressed with <literal>/FlateDecode</literal> are not + uncompressed and recompressed unless you also specify + <option>--recompress-flate</option>. </para> </listitem> <listitem> <para> <option>specialized</option>: in addition to generalized, decode streams with supported non-lossy specialized - filters; currently this is just <option>/RunLengthDecode</option> + filters; currently this is just + <literal>/RunLengthDecode</literal> </para> </listitem> <listitem> <para> <option>all</option>: in addition to generalized and specialized, decode streams with supported lossy filters; - currently this is just <option>/DCTDecode</option> (JPEG) + currently this is just <literal>/DCTDecode</literal> (JPEG) </para> </listitem> </itemizedlist> @@ -1476,7 +1481,10 @@ outfile.pdf</option> <option>compress</option>: recompress stream data when possible (default); equivalent to <option>--compress-streams=y</option> - <option>--decode-level=generalized</option> + <option>--decode-level=generalized</option>. Does not + recompress streams already compressed with + <literal>/FlateDecode</literal> unless + <option>--recompress-flate</option> is also specified. </para> </listitem> <listitem> @@ -1499,6 +1507,37 @@ outfile.pdf</option> </listitem> </varlistentry> <varlistentry> + <term><option>--recompress-flate</option></term> + <listitem> + <para> + By default, streams already compressed with + <literal>/FlateDecode</literal> are left alone rather than + being uncompressed and recompressed. This option causes qpdf + to uncompress and recompress the streams. There is a + significant performance cost to using this option, but you + probably want to use it if you specify + <option>--compression-level</option>. + </para> + </listitem> + </varlistentry> + <varlistentry> + <term><option>--compression-level=<replaceable>level</replaceable></option></term> + <listitem> + <para> + When writing new streams that are compressed with + <literal>/FlateDecode</literal>, use the specified compression + level. The value of <option>level</option> should be a number + from 1 to 9 and is passed directly to zlib, which implements + deflate compression. Note that qpdf doesn't uncompress and + recompress streams by default. To have this option apply to + already compressed streams, you should also specify + <option>--recompress-flate</option>. If your goal is to shrink + the size of PDF files, you should also use + <option>--object-streams=generate</option>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>--normalize-content=[yn]</option></term> <listitem> <para> @@ -4449,7 +4488,7 @@ print "\n"; </listitem> <listitem> <para> - Library Enhancements + Library and CLI Enhancements </para> <itemizedlist> <listitem> @@ -4510,6 +4549,41 @@ print "\n"; </listitem> <listitem> <para> + Static method + <function>Pl_Flate::setCompressionLevel</function> can be + called to set the zlib compression level globally used by + all instances of Pl_Flate in deflate mode. + </para> + </listitem> + <listitem> + <para> + The method + <function>QPDFWriter::setRecompressFlate</function> can be + called to tell <classname>QPDFWriter</classname> to + uncompress and recompress streams already compressed with + <literal>/FlateDecode</literal>. + </para> + </listitem> + <listitem> + <para> + CLI enhancement: the <option>--recompress-flate</option> + instructs <command>qpdf</command> to recompress streams that + are already compressed with <literal>/FlateDecode</literal>. + Useful with <option>--compression-level</option>. + </para> + </listitem> + <listitem> + <para> + CLI enhancement: the + <option>--compression-level=<replaceable>level</replaceable></option> + sets the zlib compression level used for any streams + compressed by <literal>/FlateDecode</literal>. Most + effective when combined with + <option>--recompress-flate</option>. + </para> + </listitem> + <listitem> + <para> The underlying implementation of QPDF arrays has been enhanced to be much more memory efficient when dealing with arrays with lots of nulls. This enables qpdf to use @@ -5699,9 +5773,9 @@ print "\n"; <listitem> <para> Disregard data check errors when uncompressing - <option>/FlateDecode</option> streams. This is consistent with - most other PDF readers and allows qpdf to recover data from - another class of malformed PDF files. + <literal>/FlateDecode</literal> streams. This is consistent + with most other PDF readers and allows qpdf to recover data + from another class of malformed PDF files. </para> </listitem> <listitem> diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index a0f7f7ea..a5eef425 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -13,6 +13,7 @@ #include <qpdf/Pl_Discard.hh> #include <qpdf/Pl_DCT.hh> #include <qpdf/Pl_Count.hh> +#include <qpdf/Pl_Flate.hh> #include <qpdf/PointerHolder.hh> #include <qpdf/QPDF.hh> @@ -124,6 +125,9 @@ struct Options stream_data_mode(qpdf_s_compress), compress_streams(true), compress_streams_set(false), + recompress_flate(false), + recompress_flate_set(false), + compression_level(-1), decode_level(qpdf_dl_generalized), decode_level_set(false), normalize_set(false), @@ -217,6 +221,9 @@ struct Options qpdf_stream_data_e stream_data_mode; bool compress_streams; bool compress_streams_set; + bool recompress_flate; + bool recompress_flate_set; + int compression_level; qpdf_stream_decode_level_e decode_level; bool decode_level_set; bool normalize_set; @@ -632,6 +639,8 @@ class ArgParser void argCollate(); void argStreamData(char* parameter); void argCompressStreams(char* parameter); + void argRecompressFlate(); + void argCompressionLevel(char* parameter); void argDecodeLevel(char* parameter); void argNormalizeContent(char* parameter); void argSuppressRecovery(); @@ -847,6 +856,9 @@ ArgParser::initOptionTable() &ArgParser::argStreamData, stream_data_choices); (*t)["compress-streams"] = oe_requiredChoices( &ArgParser::argCompressStreams, yn); + (*t)["recompress-flate"] = oe_bare(&ArgParser::argRecompressFlate); + (*t)["compression-level"] = oe_requiredParameter( + &ArgParser::argCompressionLevel, "level"); char const* decode_level_choices[] = {"none", "generalized", "specialized", "all", 0}; (*t)["decode-level"] = oe_requiredChoices( @@ -1328,6 +1340,9 @@ ArgParser::argHelp() << "--stream-data=option controls transformation of stream data (below)\n" << "--compress-streams=[yn] controls whether to compress streams on output\n" << "--decode-level=option controls how to filter streams from the input\n" + << "--recompress-flate recompress streams already compressed with Flate\n" + << "--compression-level=n set zlib compression level; most effective with\n" + << " --recompress-flate --object-streams=generate\n" << "--normalize-content=[yn] enables or disables normalization of content streams\n" << "--object-streams=mode controls handing of object streams\n" << "--preserve-unreferenced preserve unreferenced objects\n" @@ -1725,6 +1740,19 @@ ArgParser::argCompressStreams(char* parameter) } void +ArgParser::argRecompressFlate() +{ + o.recompress_flate_set = true; + o.recompress_flate = true; +} + +void +ArgParser::argCompressionLevel(char* parameter) +{ + o.compression_level = QUtil::string_to_int(parameter); +} + +void ArgParser::argDecodeLevel(char* parameter) { o.decode_level_set = true; @@ -4889,6 +4917,10 @@ static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w) static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { + if (o.compression_level >= 0) + { + Pl_Flate::setCompressionLevel(o.compression_level); + } if (o.qdf_mode) { w.setQDFMode(true); @@ -4913,6 +4945,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { w.setCompressStreams(o.compress_streams); } + if (o.recompress_flate_set) + { + w.setRecompressFlate(o.recompress_flate); + } if (o.decode_level_set) { w.setDecodeLevel(o.decode_level); diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index d7046e8b..9474d723 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -3876,8 +3876,20 @@ $td->runtest("convert inline-images to qdf", compare_pdfs("inline-images.pdf", "a.pdf"); show_ntests(); +# ---------- +$td->notify("--- Compression Level ---"); +$n_tests += 4; +check_pdf("recompress with level", + "qpdf --static-id --recompress-flate --compression-level=9" . + " --object-streams=generate minimal.pdf", + "minimal-9.pdf", 0); +check_pdf("recompress with level", + "qpdf --static-id --recompress-flate --compression-level=1" . + " --object-streams=generate minimal.pdf", + "minimal-1.pdf", 0); +show_ntests(); # ---------- $td->notify("--- Specialized filtering Tests ---"); $n_tests += 3; diff --git a/qpdf/qtest/qpdf/minimal-1.pdf b/qpdf/qtest/qpdf/minimal-1.pdf Binary files differnew file mode 100644 index 00000000..726a9d11 --- /dev/null +++ b/qpdf/qtest/qpdf/minimal-1.pdf diff --git a/qpdf/qtest/qpdf/minimal-9.pdf b/qpdf/qtest/qpdf/minimal-9.pdf Binary files differnew file mode 100644 index 00000000..46becb36 --- /dev/null +++ b/qpdf/qtest/qpdf/minimal-9.pdf |