aboutsummaryrefslogtreecommitdiffstats
path: root/include/qpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-19 15:18:14 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-21 23:44:22 +0200
commit9744414c66e3f85700ebc8b32d90f45ff97221bd (patch)
tree47abed5c4105fe3f0089ab9afdf1d053d806b39d /include/qpdf
parentae90d2c485318beb8b4b938d09ffaf5c6f0a5e21 (diff)
downloadqpdf-9744414c66e3f85700ebc8b32d90f45ff97221bd.tar.zst
Enable finer grained control of stream decoding
This commit adds several API methods that enable control over which types of filters QPDF will attempt to decode. It also adds support for /RunLengthDecode and /DCTDecode filters for both encoding and decoding.
Diffstat (limited to 'include/qpdf')
-rw-r--r--include/qpdf/Constants.h19
-rw-r--r--include/qpdf/QPDFObjectHandle.hh104
-rw-r--r--include/qpdf/QPDFWriter.hh72
3 files changed, 155 insertions, 40 deletions
diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h
index 38f1e71e..c2763956 100644
--- a/include/qpdf/Constants.h
+++ b/include/qpdf/Constants.h
@@ -26,7 +26,7 @@ enum qpdf_error_code_e
qpdf_e_pages, /* erroneous or unsupported pages structure */
};
-/* Write Parameters */
+/* Write Parameters. See QPDFWriter.hh for details. */
enum qpdf_object_stream_e
{
@@ -41,6 +41,23 @@ enum qpdf_stream_data_e
qpdf_s_compress /* compress stream data */
};
+/* Stream data flags */
+
+/* See pipeStreamData in QPDFObjectHandle.hh for details on these flags. */
+enum qpdf_stream_encode_flags_e
+{
+ qpdf_ef_compress = 1 << 0, /* compress uncompressed streams */
+ qpdf_ef_normalize = 1 << 1, /* normalize content stream */
+};
+enum qpdf_stream_decode_level_e
+{
+ /* These must be in order from less to more decoding. */
+ qpdf_dl_none = 0, /* preserve all stream filters */
+ qpdf_dl_generalized, /* decode general-purpose filters */
+ qpdf_dl_specialized, /* also decode other non-lossy filters */
+ qpdf_dl_all /* also decode loss filters */
+};
+
/* R3 Encryption Parameters */
enum qpdf_r3_print_e
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index fbe02ba8..588768fc 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -10,6 +10,7 @@
#include <qpdf/DLL.h>
#include <qpdf/Types.h>
+#include <qpdf/Constants.h>
#include <string>
#include <vector>
@@ -44,19 +45,19 @@ class QPDFObjectHandle
virtual ~StreamDataProvider()
{
}
- // The implementation of this function must write the
- // unencrypted, raw stream data to the given pipeline. Every
- // call to provideStreamData for a given stream must write the
- // same data. The number of bytes written must agree with the
- // length provided at the time the StreamDataProvider object
- // was associated with the stream. The object ID and
- // generation passed to this method are those that belong to
- // the stream on behalf of which the provider is called. They
- // may be ignored or used by the implementation for indexing
- // or other purposes. This information is made available just
- // to make it more convenient to use a single
- // StreamDataProvider object to provide data for multiple
- // streams.
+ // The implementation of this function must write stream data
+ // to the given pipeline. The stream data must conform to
+ // whatever filters are explicitly associated with the stream.
+ // QPDFWriter may, in some cases, add compression, but if it
+ // does, it will update the filters as needed. Every call to
+ // provideStreamData for a given stream must write the same
+ // data.The object ID and generation passed to this method are
+ // those that belong to the stream on behalf of which the
+ // provider is called. They may be ignored or used by the
+ // implementation for indexing or other purposes. This
+ // information is made available just to make it more
+ // convenient to use a single StreamDataProvider object to
+ // provide data for multiple streams.
virtual void provideStreamData(int objid, int generation,
Pipeline* pipeline) = 0;
};
@@ -370,32 +371,71 @@ class QPDFObjectHandle
// Returns filtered (uncompressed) stream data. Throws an
// exception if the stream is filtered and we can't decode it.
QPDF_DLL
- PointerHolder<Buffer> getStreamData();
+ PointerHolder<Buffer> getStreamData(
+ qpdf_stream_decode_level_e level = qpdf_dl_generalized);
+
// Returns unfiltered (raw) stream data.
QPDF_DLL
PointerHolder<Buffer> getRawStreamData();
- // Write stream data through the given pipeline. A null pipeline
+ // Write stream data through the given pipeline. A null pipeline
// value may be used if all you want to do is determine whether a
- // stream is filterable. If filter is false, write raw stream
- // data and return false. If filter is true, then attempt to
- // apply all the decoding filters to the stream data. If we are
- // successful, return true. Otherwise, return false and write raw
- // data. If filtering is requested and successfully performed,
- // then the normalize and compress flags are used to determine
- // whether stream data should be normalized and compressed. In
- // all cases, if this function returns false, raw data has been
- // written. If it returns true, then any requested filtering has
- // been performed. Note that if the original stream data has no
- // filters applied to it, the return value will be equal to the
- // value of the filter parameter. Callers may use the return
- // value of this function to determine whether or not the /Filter
- // and /DecodeParms keys in the stream dictionary should be
- // replaced if writing a new stream object.
+ // stream is filterable and would be filtered based on the
+ // provided flags. If flags is 0, write raw stream data and return
+ // false. Otherwise, the flags alter the behavior in the following
+ // way:
+ //
+ // encode_flags:
+ //
+ // qpdf_sf_compress -- compress data with /FlateDecode if no other
+ // compression filters are applied.
+ //
+ // qpdf_sf_normalize -- tokenize as content stream and normalize tokens
+ //
+ // decode_level:
+ //
+ // qpdf_dl_none -- do not decode any streams.
+ //
+ // qpdf_dl_generalized -- decode supported general-purpose
+ // filters. This includes /ASCIIHexDecode, /ASCII85Decode,
+ // /LZWDecode, and /FlateDecode.
+ //
+ // qpdf_dl_specialized -- in addition to generalized filters, also
+ // decode supported non-lossy specialized filters. This includes
+ // /RunLengthDecode.
+ //
+ // qpdf_dl_all -- in addition to generalized and non-lossy
+ // specialized filters, decode supported lossy filters. This
+ // includes /DCTDecode.
+ //
+ // If, based on the flags and the filters and decode parameters,
+ // we determine that we know how to apply all requested filters,
+ // do so and return true if we are successful.
+ //
+ // In all cases, a return value of true means that filtered data
+ // has been written successfully. If filtering is requested but
+ // this method returns false, it means there was some error in the
+ // filtering, in which case the resulting data is likely partially
+ // filtered and/or incomplete and may not be consistent with the
+ // configured filters. QPDFWriter handles this by attempting to
+ // get the stream data without filtering, but callers should
+ // consider a false return value when decode_level is not
+ // qpdf_dl_none to be a potential loss of data.
+ QPDF_DLL
+ bool pipeStreamData(Pipeline*,
+ unsigned long encode_flags,
+ qpdf_stream_decode_level_e decode_level,
+ bool suppress_warnings = false);
+
+ // Legacy pipeStreamData. This maps to the the flags-based
+ // pipeStreamData as follows:
+ // filter = false -> encode_flags = 0
+ // filter = true -> decode_level = qpdf_dl_generalized
+ // normalize = true -> encode_flags |= qpdf_sf_normalize
+ // compress = true -> encode_flags |= qpdf_sf_compress
QPDF_DLL
bool pipeStreamData(Pipeline*, bool filter,
- bool normalize, bool compress,
- bool suppress_warnings = false);
+ bool normalize, bool compress);
// Replace a stream's dictionary. The new dictionary must be
// consistent with the stream's data. This is most appropriately
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index 2519ed12..c4bc7846 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -118,14 +118,70 @@ class QPDFWriter
QPDF_DLL
void setObjectStreamMode(qpdf_object_stream_e);
- // Set value of stream data mode. In uncompress mode, we attempt
- // to uncompress any stream that we can. In preserve mode, we
- // preserve any filtering applied to streams. In compress mode,
- // if we can apply all filters and the stream is not already
- // optimally compressed, recompress the stream.
+ // Set value of stream data mode. This is an older interface.
+ // Instead of using this, prefer setCompressStreams() and
+ // setDecodeLevel(). This method is retained for compatibility,
+ // but it does not cover the full range of available
+ // configurations. The mapping between this and the new methods is
+ // as follows:
+ //
+ // qpdf_s_uncompress:
+ // setCompressStreams(false)
+ // setDecodeLevel(qpdf_dl_generalized)
+ // qpdf_s_preserve:
+ // setCompressStreams(false)
+ // setDecodeLevel(qpdf_dl_none)
+ // qpdf_s_compress:
+ // setCompressStreams(true)
+ // setDecodeLevel(qpdf_dl_generalized)
+ //
+ // The default is qpdf_s_compress.
QPDF_DLL
void setStreamDataMode(qpdf_stream_data_e);
+ // If true, compress any uncompressed streams when writing them.
+ // Metadata streams are a special case and are not compressed even
+ // if this is true. This is true by default for QPDFWriter. If you
+ // want QPDFWriter to leave uncompressed streams uncompressed,
+ // pass false to this method.
+ QPDF_DLL
+ void setCompressStreams(bool);
+
+ // When QPDFWriter encounters streams, this parameter controls the
+ // behavior with respect to attempting to apply any filters to the
+ // streams when copying to the output. The decode levels are as
+ // follows:
+ //
+ // qpdf_dl_none: Do not attempt to apply any filters. Streams
+ // remain as they appear in the original file. Note that
+ // uncompressed streams may still be compressed on output. You can
+ // disable that by calling setCompressStreams(false).
+ //
+ // qpdf_dl_generalized: This is the default. QPDFWriter will apply
+ // LZWDecode, ASCII85Decode, ASCIIHexDecode, and FlateDecode
+ // filters on the input. When combined with
+ // setCompressStreams(true), which the default, the effect of this
+ // is that streams filtered with these older and less efficient
+ // filters will be recompressed with the Flate filter. As a
+ // special case, if a stream is already compressed with
+ // FlateDecode and setCompressStreams is enabled, the original
+ // compressed data will be preserved.
+ //
+ // qpdf_dl_specialized: In addition to uncompressing the
+ // generalized compression formats, supported non-lossy
+ // compression will also be be decoded. At present, this includes
+ // the RunLengthDecode filter.
+ //
+ // qpdf_dl_all: In addition to generalized and non-lossy
+ // specialized filters, supported lossy compression filters will
+ // be applied. At present, this includes DCTDecode (JPEG)
+ // compression. Note that compressing the resulting data with
+ // DCTDecode again will accumulate loss, so avoid multiple
+ // compression and decompression cycles. This is mostly useful for
+ // retreiving image data.
+ QPDF_DLL
+ void setDecodeLevel(qpdf_stream_decode_level_e);
+
// Set value of content stream normalization. The default is
// "false". If true, we attempt to normalize newlines inside of
// content streams. Some constructs such as inline images may
@@ -434,8 +490,10 @@ class QPDFWriter
Buffer* output_buffer;
bool normalize_content_set;
bool normalize_content;
- bool stream_data_mode_set;
- qpdf_stream_data_e stream_data_mode;
+ bool compress_streams;
+ bool compress_streams_set;
+ qpdf_stream_decode_level_e stream_decode_level;
+ bool stream_decode_level_set;
bool qdf_mode;
bool precheck_streams;
bool preserve_unreferenced_objects;