aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2019-08-24 01:59:38 +0200
committerJay Berkenbilt <ejb@ql.org>2019-08-24 02:34:21 +0200
commit2794bfb1a665cad93a38144bea0ba0daea7152e7 (patch)
treee83256473254f4935de0477d784a2123a7828d27
parentdac0598b94c877bec92a1edd78ae00021cfa1638 (diff)
downloadqpdf-2794bfb1a665cad93a38144bea0ba0daea7152e7.tar.zst
Add flags to control zlib compression level (fixes #113)
-rw-r--r--ChangeLog14
-rw-r--r--include/qpdf/QPDFWriter.hh17
-rw-r--r--libqpdf/QPDFWriter.cc20
-rw-r--r--manual/build.mk3
-rw-r--r--manual/qpdf-manual.xml98
-rw-r--r--qpdf/qpdf.cc36
-rw-r--r--qpdf/qtest/qpdf.test12
-rw-r--r--qpdf/qtest/qpdf/minimal-1.pdfbin0 -> 750 bytes
-rw-r--r--qpdf/qtest/qpdf/minimal-9.pdfbin0 -> 743 bytes
9 files changed, 178 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index cdfa3fce..915d73f8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,22 @@
2019-08-23 Jay Berkenbilt <ejb@ql.org>
+ * Add --recompress-streams option to qpdf and
+ QPDFWriter::setRecompressFlate to cause QPDFWriter to recompress
+ streams that are already compressed with /FlateDecode.
+
* Add option Pl_Flate::setCompressionLevel to globally set the
zlib compression level used by all Pl_Flate pipelines.
+ * Add --compression-level flag to qpdf to set the zlib compression
+ level. When combined with --recompress-flate, this will cause most
+ of qpdf's streams to use the maximum compression level. This
+ results in only a very small amount of savings in size that comes
+ at a fairly significant performance cost, but it could be useful
+ for archival files or other cases where every byte counts and
+ creation time doesn't matter so much. Note that using
+ --object-streams=generate in combination with these options gives
+ you the biggest advantage. Fixes #113.
+
2019-08-22 Jay Berkenbilt <ejb@ql.org>
* In QPDFObjectHandle::ParserCallbacks, in addition to
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index 860b0630..0fd114db 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -189,10 +189,11 @@ class QPDFWriter
// filters on the input. When combined with
// setCompressStreams(true), which the default, the effect of this
// is that streams filtered with these older and less efficient
- // filters will be recompressed with the Flate filter. As a
- // special case, if a stream is already compressed with
+ // filters will be recompressed with the Flate filter. By default,
+ // as a special case, if a stream is already compressed with
// FlateDecode and setCompressStreams is enabled, the original
- // compressed data will be preserved.
+ // compressed data will be preserved. This behavior can be
+ // overridden by calling setRecompressFlate(true).
//
// qpdf_dl_specialized: In addition to uncompressing the
// generalized compression formats, supported non-lossy
@@ -209,6 +210,15 @@ class QPDFWriter
QPDF_DLL
void setDecodeLevel(qpdf_stream_decode_level_e);
+ // By default, when both the input and output contents of a stream
+ // are compressed with Flate, qpdf does not uncompress and
+ // recompress the stream. Passing true here causes it to do so.
+ // This can be useful if recompressing all streams with a higher
+ // compression level, which can be set by calling the static
+ // method Pl_Flate::setCompressionLevel.
+ QPDF_DLL
+ void setRecompressFlate(bool);
+
// Set value of content stream normalization. The default is
// "false". If true, we attempt to normalize newlines inside of
// content streams. Some constructs such as inline images may
@@ -597,6 +607,7 @@ class QPDFWriter
bool compress_streams_set;
qpdf_stream_decode_level_e stream_decode_level;
bool stream_decode_level_set;
+ bool recompress_flate;
bool qdf_mode;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 6c92338d..30bc1fcb 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -37,6 +37,7 @@ QPDFWriter::Members::Members(QPDF& pdf) :
compress_streams_set(false),
stream_decode_level(qpdf_dl_none),
stream_decode_level_set(false),
+ recompress_flate(false),
qdf_mode(false),
preserve_unreferenced_objects(false),
newline_before_endstream(false),
@@ -207,6 +208,12 @@ QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
}
void
+QPDFWriter::setRecompressFlate(bool val)
+{
+ this->m->recompress_flate = val;
+}
+
+void
QPDFWriter::setContentNormalization(bool val)
{
this->m->normalize_content_set = true;
@@ -1716,13 +1723,14 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
if (this->m->compress_streams)
{
// Don't filter if the stream is already compressed with
- // FlateDecode. We don't want to make it worse by getting
- // rid of a predictor or otherwise messing with it. We
- // should also avoid messing with anything that's
- // compressed with a lossy compression scheme, but we
- // don't support any of those right now.
+ // FlateDecode. This way we don't make it worse if the
+ // original file used a better Flate algorithm, and we
+ // don't spend time and CPU cycles uncompressing and
+ // recompressing stuff. This can be overridden with
+ // setRecompressFlate(true).
QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
- if ((! object.isDataModified()) &&
+ if ((! this->m->recompress_flate) &&
+ (! object.isDataModified()) &&
filter_obj.isName() &&
((filter_obj.getName() == "/FlateDecode") ||
(filter_obj.getName() == "/Fl")))
diff --git a/manual/build.mk b/manual/build.mk
index 03e8fe56..3911b8e2 100644
--- a/manual/build.mk
+++ b/manual/build.mk
@@ -26,7 +26,8 @@ endif
$(OUTDOC).pdf: $(OUTDOC).fo qpdf/build/qpdf
$(FOP) $< -pdf $@.tmp
- qpdf/build/qpdf --linearize $@.tmp $@
+ qpdf/build/qpdf --linearize --object-streams=generate \
+ --recompress-flate --compression-level=9 $@.tmp $@
$(OUTDOC).html: $(INDOC).xml manual/html.xsl $(VALIDATE)
$(XSLTPROC) --output $@ manual/html.xsl $<
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index db2a54fa..6e72456e 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -1433,27 +1433,32 @@ outfile.pdf</option>
<listitem>
<para>
<option>generalized</option>: decode streams filtered with
- supported generalized filters: <option>/LZWDecode</option>,
- <option>/FlateDecode</option>,
- <option>/ASCII85Decode</option>, and
- <option>/ASCIIHexDecode</option>. We define generalized
+ supported generalized filters:
+ <literal>/LZWDecode</literal>,
+ <literal>/FlateDecode</literal>,
+ <literal>/ASCII85Decode</literal>, and
+ <literal>/ASCIIHexDecode</literal>. We define generalized
filters as those to be used for general-purpose compression
or encoding, as opposed to filters specifically designed
- for image data.
+ for image data. Note that, by default, streams already
+ compressed with <literal>/FlateDecode</literal> are not
+ uncompressed and recompressed unless you also specify
+ <option>--recompress-flate</option>.
</para>
</listitem>
<listitem>
<para>
<option>specialized</option>: in addition to generalized,
decode streams with supported non-lossy specialized
- filters; currently this is just <option>/RunLengthDecode</option>
+ filters; currently this is just
+ <literal>/RunLengthDecode</literal>
</para>
</listitem>
<listitem>
<para>
<option>all</option>: in addition to generalized and
specialized, decode streams with supported lossy filters;
- currently this is just <option>/DCTDecode</option> (JPEG)
+ currently this is just <literal>/DCTDecode</literal> (JPEG)
</para>
</listitem>
</itemizedlist>
@@ -1476,7 +1481,10 @@ outfile.pdf</option>
<option>compress</option>: recompress stream data when
possible (default); equivalent to
<option>--compress-streams=y</option>
- <option>--decode-level=generalized</option>
+ <option>--decode-level=generalized</option>. Does not
+ recompress streams already compressed with
+ <literal>/FlateDecode</literal> unless
+ <option>--recompress-flate</option> is also specified.
</para>
</listitem>
<listitem>
@@ -1499,6 +1507,37 @@ outfile.pdf</option>
</listitem>
</varlistentry>
<varlistentry>
+ <term><option>--recompress-flate</option></term>
+ <listitem>
+ <para>
+ By default, streams already compressed with
+ <literal>/FlateDecode</literal> are left alone rather than
+ being uncompressed and recompressed. This option causes qpdf
+ to uncompress and recompress the streams. There is a
+ significant performance cost to using this option, but you
+ probably want to use it if you specify
+ <option>--compression-level</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>--compression-level=<replaceable>level</replaceable></option></term>
+ <listitem>
+ <para>
+ When writing new streams that are compressed with
+ <literal>/FlateDecode</literal>, use the specified compression
+ level. The value of <option>level</option> should be a number
+ from 1 to 9 and is passed directly to zlib, which implements
+ deflate compression. Note that qpdf doesn't uncompress and
+ recompress streams by default. To have this option apply to
+ already compressed streams, you should also specify
+ <option>--recompress-flate</option>. If your goal is to shrink
+ the size of PDF files, you should also use
+ <option>--object-streams=generate</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--normalize-content=[yn]</option></term>
<listitem>
<para>
@@ -4449,7 +4488,7 @@ print "\n";
</listitem>
<listitem>
<para>
- Library Enhancements
+ Library and CLI Enhancements
</para>
<itemizedlist>
<listitem>
@@ -4510,6 +4549,41 @@ print "\n";
</listitem>
<listitem>
<para>
+ Static method
+ <function>Pl_Flate::setCompressionLevel</function> can be
+ called to set the zlib compression level globally used by
+ all instances of Pl_Flate in deflate mode.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The method
+ <function>QPDFWriter::setRecompressFlate</function> can be
+ called to tell <classname>QPDFWriter</classname> to
+ uncompress and recompress streams already compressed with
+ <literal>/FlateDecode</literal>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ CLI enhancement: the <option>--recompress-flate</option>
+ instructs <command>qpdf</command> to recompress streams that
+ are already compressed with <literal>/FlateDecode</literal>.
+ Useful with <option>--compression-level</option>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ CLI enhancement: the
+ <option>--compression-level=<replaceable>level</replaceable></option>
+ sets the zlib compression level used for any streams
+ compressed by <literal>/FlateDecode</literal>. Most
+ effective when combined with
+ <option>--recompress-flate</option>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
The underlying implementation of QPDF arrays has been
enhanced to be much more memory efficient when dealing with
arrays with lots of nulls. This enables qpdf to use
@@ -5699,9 +5773,9 @@ print "\n";
<listitem>
<para>
Disregard data check errors when uncompressing
- <option>/FlateDecode</option> streams. This is consistent with
- most other PDF readers and allows qpdf to recover data from
- another class of malformed PDF files.
+ <literal>/FlateDecode</literal> streams. This is consistent
+ with most other PDF readers and allows qpdf to recover data
+ from another class of malformed PDF files.
</para>
</listitem>
<listitem>
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index a0f7f7ea..a5eef425 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -13,6 +13,7 @@
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Count.hh>
+#include <qpdf/Pl_Flate.hh>
#include <qpdf/PointerHolder.hh>
#include <qpdf/QPDF.hh>
@@ -124,6 +125,9 @@ struct Options
stream_data_mode(qpdf_s_compress),
compress_streams(true),
compress_streams_set(false),
+ recompress_flate(false),
+ recompress_flate_set(false),
+ compression_level(-1),
decode_level(qpdf_dl_generalized),
decode_level_set(false),
normalize_set(false),
@@ -217,6 +221,9 @@ struct Options
qpdf_stream_data_e stream_data_mode;
bool compress_streams;
bool compress_streams_set;
+ bool recompress_flate;
+ bool recompress_flate_set;
+ int compression_level;
qpdf_stream_decode_level_e decode_level;
bool decode_level_set;
bool normalize_set;
@@ -632,6 +639,8 @@ class ArgParser
void argCollate();
void argStreamData(char* parameter);
void argCompressStreams(char* parameter);
+ void argRecompressFlate();
+ void argCompressionLevel(char* parameter);
void argDecodeLevel(char* parameter);
void argNormalizeContent(char* parameter);
void argSuppressRecovery();
@@ -847,6 +856,9 @@ ArgParser::initOptionTable()
&ArgParser::argStreamData, stream_data_choices);
(*t)["compress-streams"] = oe_requiredChoices(
&ArgParser::argCompressStreams, yn);
+ (*t)["recompress-flate"] = oe_bare(&ArgParser::argRecompressFlate);
+ (*t)["compression-level"] = oe_requiredParameter(
+ &ArgParser::argCompressionLevel, "level");
char const* decode_level_choices[] =
{"none", "generalized", "specialized", "all", 0};
(*t)["decode-level"] = oe_requiredChoices(
@@ -1328,6 +1340,9 @@ ArgParser::argHelp()
<< "--stream-data=option controls transformation of stream data (below)\n"
<< "--compress-streams=[yn] controls whether to compress streams on output\n"
<< "--decode-level=option controls how to filter streams from the input\n"
+ << "--recompress-flate recompress streams already compressed with Flate\n"
+ << "--compression-level=n set zlib compression level; most effective with\n"
+ << " --recompress-flate --object-streams=generate\n"
<< "--normalize-content=[yn] enables or disables normalization of content streams\n"
<< "--object-streams=mode controls handing of object streams\n"
<< "--preserve-unreferenced preserve unreferenced objects\n"
@@ -1725,6 +1740,19 @@ ArgParser::argCompressStreams(char* parameter)
}
void
+ArgParser::argRecompressFlate()
+{
+ o.recompress_flate_set = true;
+ o.recompress_flate = true;
+}
+
+void
+ArgParser::argCompressionLevel(char* parameter)
+{
+ o.compression_level = QUtil::string_to_int(parameter);
+}
+
+void
ArgParser::argDecodeLevel(char* parameter)
{
o.decode_level_set = true;
@@ -4889,6 +4917,10 @@ static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
+ if (o.compression_level >= 0)
+ {
+ Pl_Flate::setCompressionLevel(o.compression_level);
+ }
if (o.qdf_mode)
{
w.setQDFMode(true);
@@ -4913,6 +4945,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
w.setCompressStreams(o.compress_streams);
}
+ if (o.recompress_flate_set)
+ {
+ w.setRecompressFlate(o.recompress_flate);
+ }
if (o.decode_level_set)
{
w.setDecodeLevel(o.decode_level);
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index d7046e8b..9474d723 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -3876,8 +3876,20 @@ $td->runtest("convert inline-images to qdf",
compare_pdfs("inline-images.pdf", "a.pdf");
show_ntests();
+# ----------
+$td->notify("--- Compression Level ---");
+$n_tests += 4;
+check_pdf("recompress with level",
+ "qpdf --static-id --recompress-flate --compression-level=9" .
+ " --object-streams=generate minimal.pdf",
+ "minimal-9.pdf", 0);
+check_pdf("recompress with level",
+ "qpdf --static-id --recompress-flate --compression-level=1" .
+ " --object-streams=generate minimal.pdf",
+ "minimal-1.pdf", 0);
+show_ntests();
# ----------
$td->notify("--- Specialized filtering Tests ---");
$n_tests += 3;
diff --git a/qpdf/qtest/qpdf/minimal-1.pdf b/qpdf/qtest/qpdf/minimal-1.pdf
new file mode 100644
index 00000000..726a9d11
--- /dev/null
+++ b/qpdf/qtest/qpdf/minimal-1.pdf
Binary files differ
diff --git a/qpdf/qtest/qpdf/minimal-9.pdf b/qpdf/qtest/qpdf/minimal-9.pdf
new file mode 100644
index 00000000..46becb36
--- /dev/null
+++ b/qpdf/qtest/qpdf/minimal-9.pdf
Binary files differ