aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2020-12-23 12:12:49 +0100
committerJay Berkenbilt <ejb@ql.org>2020-12-28 18:58:19 +0100
commit39bfa0130713defc9abb478a70717ca07377cdab (patch)
tree18b6370d5f2f7d10a3f1ef09a8f0dd3b9281bd3c
parent1fb26f08ad91d08f67ac30e2557ddcadd8b9ccac (diff)
downloadqpdf-39bfa0130713defc9abb478a70717ca07377cdab.tar.zst
Implement user-provided stream filters
Refactor QPDF_Stream to use stream filter classes to handle supported stream filters as well.
-rw-r--r--ChangeLog10
-rw-r--r--TODO7
-rw-r--r--include/qpdf/QPDF.hh17
-rw-r--r--include/qpdf/QPDFStreamFilter.hh78
-rw-r--r--libqpdf/QPDF.cc8
-rw-r--r--libqpdf/QPDFStreamFilter.cc19
-rw-r--r--libqpdf/QPDF_Stream.cc381
-rw-r--r--libqpdf/SF_FlateLzwDecode.cc153
-rw-r--r--libqpdf/build.mk2
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh23
-rw-r--r--libqpdf/qpdf/SF_ASCII85Decode.hh30
-rw-r--r--libqpdf/qpdf/SF_ASCIIHexDecode.hh30
-rw-r--r--libqpdf/qpdf/SF_DCTDecode.hh39
-rw-r--r--libqpdf/qpdf/SF_FlateLzwDecode.hh30
-rw-r--r--libqpdf/qpdf/SF_RunLengthDecode.hh35
-rw-r--r--qpdf/qpdf.testcov4
16 files changed, 587 insertions, 279 deletions
diff --git a/ChangeLog b/ChangeLog
index 1af2520e..7c1b43d1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2020-12-23 Jay Berkenbilt <ejb@ql.org>
+
+ * Allow library users to provide their own decoders for stream
+ filters by deriving classes from QPDFStreamFilter and registering
+ them using QPDF::registerStreamFilter. Registered stream filters
+ provide code to validate and interpret /DecodeParms for a specific
+ /Filter and also to provide a pipeline that will decode. Note that
+ it is possible to encode to a filter type that is not supported
+ even without this feature.
+
2020-12-22 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if
diff --git a/TODO b/TODO
index 1479aa56..28917b66 100644
--- a/TODO
+++ b/TODO
@@ -317,13 +317,6 @@ I find it useful to make reference to them in this list
is exercised elsewhere in qpdf's test suite, so this is not that
pressing.
- * Support user-pluggable stream filters. This would enable external
- code to provide interpretation for filters that are missing from
- qpdf. Make it possible for user-provided filters to override
- built-in filters. Make sure that the pluggable filters can be
- prioritized so that we can poll all registered filters to see
- whether they are capable of filtering a particular stream.
-
* If possible, consider adding CCITT3, CCITT4, or any other easy
filters. For some reference code that we probably can't use but may
be handy anyway, see
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 285ba1e3..b0e9b717 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -31,6 +31,8 @@
#include <list>
#include <iostream>
#include <vector>
+#include <functional>
+#include <memory>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDFExc.hh>
@@ -39,6 +41,7 @@
#include <qpdf/QPDFXRefEntry.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFTokenizer.hh>
+#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Buffer.hh>
#include <qpdf/InputSource.hh>
@@ -132,6 +135,20 @@ class QPDF
QPDF_DLL
void emptyPDF();
+ // From 10.1: register a new filter implementation for a specific
+ // stream filter. You can add your own implementations for new
+ // filter types or override existing ones provided by the library.
+ // Registered stream filters are used for decoding only as you can
+ // override encoding with stream data providers. For example, you
+ // could use this method to support for one of the other filter
+ // types by using additional third-party libraries that qpdf does
+ // not presently use. The standard filters are implemented using
+ // QPDFStreamFilter classes.
+ QPDF_DLL
+ static void registerStreamFilter(
+ std::string const& filter_name,
+ std::function<std::shared_ptr<QPDFStreamFilter> ()> factory);
+
// Parameter settings
// By default, warning messages are issued to std::cerr and output
diff --git a/include/qpdf/QPDFStreamFilter.hh b/include/qpdf/QPDFStreamFilter.hh
new file mode 100644
index 00000000..5fdcf5ca
--- /dev/null
+++ b/include/qpdf/QPDFStreamFilter.hh
@@ -0,0 +1,78 @@
+// Copyright (c) 2005-2020 Jay Berkenbilt
+//
+// This file is part of qpdf.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Versions of qpdf prior to version 7 were released under the terms
+// of version 2.0 of the Artistic License. At your option, you may
+// continue to consider qpdf to be licensed under those terms. Please
+// see the manual for additional information.
+
+#ifndef QPDFSTREAMFILTER_HH
+#define QPDFSTREAMFILTER_HH
+
+#include <qpdf/DLL.h>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/Pipeline.hh>
+
+class QPDF_DLL_CLASS QPDFStreamFilter
+{
+ public:
+ QPDF_DLL
+ QPDFStreamFilter() = default;
+
+ QPDF_DLL
+ virtual ~QPDFStreamFilter() = default;
+
+ // A QPDFStreamFilter class must implement, at a minimum,
+ // setDecodeParms() and getDecodePipeline(). QPDF will always call
+ // setDecodeParms() before calling getDecodePipeline(). It is
+ // expected that you will store any needed information from
+ // decode_parms (or the decode_parms object iself) in your
+ // instance so that it can be used to construct the decode
+ // pipeline.
+
+ // Return a boolean indicating whether your filter can proceed
+ // with the given /DecodeParms. The default implementation accepts
+ // a null object and rejects everything else.
+ QPDF_DLL
+ virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
+
+ // Return a pipeline that will decode data encoded with your
+ // filter. Your implementation must ensure that the pipeline is
+ // deleted when the instance of your class is destroyed.
+ QPDF_DLL
+ virtual Pipeline* getDecodePipeline(Pipeline* next) = 0;
+
+ // If your filter implements "specialized" compression or lossy
+ // compression, override one or both of these methods. The default
+ // implementations return false. See comments in QPDFWriter for
+ // details. QPDF defines specialized compression as non-lossy
+ // compression not intended for general-purpose data. qpdf, by
+ // default, doesn't mess with streams that are compressed with
+ // specialized compression, the idea being that the decision to
+ // use that compression scheme would fall outside of what
+ // QPDFWriter would know anything about, so any attempt to decode
+ // and re-encode would probably be undesirable.
+ QPDF_DLL
+ virtual bool isSpecializedCompression();
+ QPDF_DLL
+ virtual bool isLossyCompression();
+
+ private:
+ QPDFStreamFilter(QPDFStreamFilter const&) = delete;
+ QPDFStreamFilter& operator=(QPDFStreamFilter const&) = delete;
+};
+
+#endif // QPDFSTREAMFILTER_HH
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 793ce2fc..ff4866f0 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -270,6 +270,14 @@ QPDF::emptyPDF()
}
void
+QPDF::registerStreamFilter(
+ std::string const& filter_name,
+ std::function<std::shared_ptr<QPDFStreamFilter> ()> factory)
+{
+ QPDF_Stream::registerStreamFilter(filter_name, factory);
+}
+
+void
QPDF::setIgnoreXRefStreams(bool val)
{
this->m->ignore_xref_streams = val;
diff --git a/libqpdf/QPDFStreamFilter.cc b/libqpdf/QPDFStreamFilter.cc
new file mode 100644
index 00000000..66a2f762
--- /dev/null
+++ b/libqpdf/QPDFStreamFilter.cc
@@ -0,0 +1,19 @@
+#include <qpdf/QPDFStreamFilter.hh>
+
+bool
+QPDFStreamFilter::setDecodeParms(QPDFObjectHandle decode_parms)
+{
+ return decode_parms.isNull();
+}
+
+bool
+QPDFStreamFilter::isSpecializedCompression()
+{
+ return false;
+}
+
+bool
+QPDFStreamFilter::isLossyCompression()
+{
+ return false;
+}
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index e4a3c039..8f9b4b52 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -3,15 +3,7 @@
#include <qpdf/QUtil.hh>
#include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Flate.hh>
-#include <qpdf/Pl_PNGFilter.hh>
-#include <qpdf/Pl_TIFFPredictor.hh>
-#include <qpdf/Pl_RC4.hh>
#include <qpdf/Pl_Buffer.hh>
-#include <qpdf/Pl_ASCII85Decoder.hh>
-#include <qpdf/Pl_ASCIIHexDecoder.hh>
-#include <qpdf/Pl_LZWDecoder.hh>
-#include <qpdf/Pl_RunLength.hh>
-#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Count.hh>
#include <qpdf/ContentNormalizer.hh>
#include <qpdf/QTC.hh>
@@ -19,10 +11,78 @@
#include <qpdf/QPDFExc.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QIntC.hh>
+#include <qpdf/SF_FlateLzwDecode.hh>
+#include <qpdf/SF_DCTDecode.hh>
+#include <qpdf/SF_RunLengthDecode.hh>
+#include <qpdf/SF_ASCII85Decode.hh>
+#include <qpdf/SF_ASCIIHexDecode.hh>
#include <stdexcept>
-std::map<std::string, std::string> QPDF_Stream::filter_abbreviations;
+class SF_Crypt: public QPDFStreamFilter
+{
+ public:
+ SF_Crypt() = default;
+ virtual ~SF_Crypt() = default;
+
+ virtual bool setDecodeParms(QPDFObjectHandle decode_parms)
+ {
+ if (decode_parms.isNull())
+ {
+ return true;
+ }
+ bool filterable = true;
+ for (auto const& key: decode_parms.getKeys())
+ {
+ if (((key == "/Type") || (key == "/Name")) &&
+ (decode_parms.getKey("/Type").isNull() ||
+ (decode_parms.getKey("/Type").isName() &&
+ (decode_parms.getKey("/Type").getName() ==
+ "/CryptFilterDecodeParms"))))
+ {
+ // we handle this in decryptStream
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ return filterable;
+ }
+
+ virtual Pipeline* getDecodePipeline(Pipeline*)
+ {
+ // Not used -- handled by pipeStreamData
+ return nullptr;
+ }
+};
+
+std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
+ // The PDF specification provides these filter abbreviations for
+ // use in inline images, but according to table H.1 in the pre-ISO
+ // versions of the PDF specification, Adobe Reader also accepts
+ // them for stream filters.
+ {"/AHx", "/ASCIIHexDecode"},
+ {"/A85", "/ASCII85Decode"},
+ {"/LZW", "/LZWDecode"},
+ {"/Fl", "/FlateDecode"},
+ {"/RL", "/RunLengthDecode"},
+ {"/CCF", "/CCITTFaxDecode"},
+ {"/DCT", "/DCTDecode"},
+};
+
+std::map<
+ std::string,
+ std::function<std::shared_ptr<QPDFStreamFilter>()>>
+QPDF_Stream::filter_factories = {
+ {"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
+ {"/FlateDecode", SF_FlateLzwDecode::flate_factory},
+ {"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
+ {"/RunLengthDecode", SF_RunLengthDecode::factory},
+ {"/DCTDecode", SF_DCTDecode::factory},
+ {"/ASCII85Decode", SF_ASCII85Decode::factory},
+ {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
+};
QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
QPDFObjectHandle stream_dict,
@@ -48,6 +108,14 @@ QPDF_Stream::~QPDF_Stream()
}
void
+QPDF_Stream::registerStreamFilter(
+ std::string const& filter_name,
+ std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
+{
+ filter_factories[filter_name] = factory;
+}
+
+void
QPDF_Stream::releaseResolved()
{
this->stream_provider = 0;
@@ -190,125 +258,18 @@ QPDF_Stream::getRawStreamData()
}
bool
-QPDF_Stream::understandDecodeParams(
- std::string const& filter, QPDFObjectHandle decode_obj,
- int& predictor, int& columns,
- int& colors, int& bits_per_component,
- bool& early_code_change)
-{
- bool filterable = true;
- std::set<std::string> keys = decode_obj.getKeys();
- for (std::set<std::string>::iterator iter = keys.begin();
- iter != keys.end(); ++iter)
- {
- std::string const& key = *iter;
- if (((filter == "/FlateDecode") || (filter == "/LZWDecode")) &&
- (key == "/Predictor"))
- {
- QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
- if (predictor_obj.isInteger())
- {
- predictor = predictor_obj.getIntValueAsInt();
- if (! ((predictor == 1) || (predictor == 2) ||
- ((predictor >= 10) && (predictor <= 15))))
- {
- filterable = false;
- }
- }
- else
- {
- filterable = false;
- }
- }
- else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
- {
- QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
- if (earlychange_obj.isInteger())
- {
- int earlychange = earlychange_obj.getIntValueAsInt();
- early_code_change = (earlychange == 1);
- if (! ((earlychange == 0) || (earlychange == 1)))
- {
- filterable = false;
- }
- }
- else
- {
- filterable = false;
- }
- }
- else if ((key == "/Columns") ||
- (key == "/Colors") ||
- (key == "/BitsPerComponent"))
- {
- QPDFObjectHandle param_obj = decode_obj.getKey(key);
- if (param_obj.isInteger())
- {
- int val = param_obj.getIntValueAsInt();
- if (key == "/Columns")
- {
- columns = val;
- }
- else if (key == "/Colors")
- {
- colors = val;
- }
- else if (key == "/BitsPerComponent")
- {
- bits_per_component = val;
- }
- }
- else
- {
- filterable = false;
- }
- }
- else if ((filter == "/Crypt") &&
- (((key == "/Type") || (key == "/Name")) &&
- (decode_obj.getKey("/Type").isNull() ||
- (decode_obj.getKey("/Type").isName() &&
- (decode_obj.getKey("/Type").getName() ==
- "/CryptFilterDecodeParms")))))
- {
- // we handle this in decryptStream
- }
- else
- {
- filterable = false;
- }
- }
-
- return filterable;
-}
-
-bool
-QPDF_Stream::filterable(std::vector<std::string>& filters,
- bool& specialized_compression,
- bool& lossy_compression,
- int& predictor, int& columns,
- int& colors, int& bits_per_component,
- bool& early_code_change)
+QPDF_Stream::filterable(
+ std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
+ bool& specialized_compression,
+ bool& lossy_compression)
{
- if (filter_abbreviations.empty())
- {
- // The PDF specification provides these filter abbreviations
- // for use in inline images, but according to table H.1 in the
- // pre-ISO versions of the PDF specification, Adobe Reader
- // also accepts them for stream filters.
- filter_abbreviations["/AHx"] = "/ASCIIHexDecode";
- filter_abbreviations["/A85"] = "/ASCII85Decode";
- filter_abbreviations["/LZW"] = "/LZWDecode";
- filter_abbreviations["/Fl"] = "/FlateDecode";
- filter_abbreviations["/RL"] = "/RunLengthDecode";
- filter_abbreviations["/CCF"] = "/CCITTFaxDecode";
- filter_abbreviations["/DCT"] = "/DCTDecode";
- }
-
// Check filters
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
bool filters_okay = true;
+ std::vector<std::string> filter_names;
+
if (filter_obj.isNull())
{
// No filters
@@ -316,7 +277,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
else if (filter_obj.isName())
{
// One filter
- filters.push_back(filter_obj.getName());
+ filter_names.push_back(filter_obj.getName());
}
else if (filter_obj.isArray())
{
@@ -327,7 +288,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
QPDFObjectHandle item = filter_obj.getArrayItem(i);
if (item.isName())
{
- filters.push_back(item.getName());
+ filter_names.push_back(item.getName());
}
else
{
@@ -351,34 +312,23 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
bool filterable = true;
- for (std::vector<std::string>::iterator iter = filters.begin();
- iter != filters.end(); ++iter)
+ for (auto& filter_name: filter_names)
{
- std::string& filter = *iter;
-
- if (filter_abbreviations.count(filter))
+ if (filter_abbreviations.count(filter_name))
{
QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
- filter = filter_abbreviations[filter];
+ filter_name = filter_abbreviations[filter_name];
}
- if (filter == "/RunLengthDecode")
+ auto ff = filter_factories.find(filter_name);
+ if (ff == filter_factories.end())
{
- specialized_compression = true;
+ filterable = false;
}
- else if (filter == "/DCTDecode")
+ else
{
- specialized_compression = true;
- lossy_compression = true;
+ filters.push_back((ff->second)());
}
- else if (! ((filter == "/Crypt") ||
- (filter == "/FlateDecode") ||
- (filter == "/LZWDecode") ||
- (filter == "/ASCII85Decode") ||
- (filter == "/ASCIIHexDecode")))
- {
- filterable = false;
- }
}
if (! filterable)
@@ -386,15 +336,8 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
return false;
}
- // `filters' now contains a list of filters to be applied in
- // order. See which ones we can support.
-
- // Initialize values to their defaults as per the PDF spec
- predictor = 1;
- columns = 0;
- colors = 1;
- bits_per_component = 8;
- early_code_change = true;
+ // filters now contains a list of filters to be applied in order.
+ // See which ones we can support.
// See if we can support any decode parameters that are specified.
@@ -413,7 +356,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
}
else
{
- for (unsigned int i = 0; i < filters.size(); ++i)
+ for (unsigned int i = 0; i < filter_names.size(); ++i)
{
decode_parms.push_back(decode_obj);
}
@@ -436,21 +379,21 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
return false;
}
- for (unsigned int i = 0; i < filters.size(); ++i)
+ for (size_t i = 0; i < filters.size(); ++i)
{
- QPDFObjectHandle decode_item = decode_parms.at(i);
- if (decode_item.isNull())
- {
- // okay
- }
- else if (decode_item.isDictionary())
+ auto filter = filters.at(i);
+ auto decode_item = decode_parms.at(i);
+
+ if (filter->setDecodeParms(decode_item))
{
- if (! understandDecodeParams(
- filters.at(i), decode_item,
- predictor, columns, colors, bits_per_component,
- early_code_change))
+ if (filter->isSpecializedCompression())
{
- filterable = false;
+ specialized_compression = true;
+ }
+ if (filter->isLossyCompression())
+ {
+ specialized_compression = true;
+ lossy_compression = true;
}
}
else
@@ -459,17 +402,6 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
}
}
- if ((predictor > 1) && (columns == 0))
- {
- // invalid
- filterable = false;
- }
-
- if (! filterable)
- {
- return false;
- }
-
return filterable;
}
@@ -479,12 +411,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings, bool will_retry)
{
- std::vector<std::string> filters;
- int predictor = 1;
- int columns = 0;
- int colors = 1;
- int bits_per_component = 8;
- bool early_code_change = true;
+ std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
bool specialized_compression = false;
bool lossy_compression = false;
bool ignored;
@@ -497,10 +424,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
bool success = true;
if (filter)
{
- filter = filterable(filters, specialized_compression, lossy_compression,
- predictor, columns,
- colors, bits_per_component,
- early_code_change);
+ filter = filterable(
+ filters, specialized_compression, lossy_compression);
if ((decode_level < qpdf_dl_all) && lossy_compression)
{
filter = false;
@@ -523,9 +448,11 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
return filter;
}
- // Construct the pipeline in reverse order. Force pipelines we
- // create to be deleted when this function finishes.
- std::vector<PointerHolder<Pipeline> > to_delete;
+ // Construct the pipeline in reverse order. Force pipelines we
+ // create to be deleted when this function finishes. Pipelines
+ // created by QPDFStreamFilter objects will be deleted by those
+ // objects.
+ std::vector<PointerHolder<Pipeline>> to_delete;
PointerHolder<ContentNormalizer> normalizer;
if (filter)
@@ -555,80 +482,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
to_delete.push_back(pipeline);
}
- for (std::vector<std::string>::reverse_iterator f_iter =
- filters.rbegin();
- f_iter != filters.rend(); ++f_iter)
+ for (auto f_iter = filters.rbegin();
+ f_iter != filters.rend(); ++f_iter)
{
- std::string const& filter_name = *f_iter;
-
- if ((filter_name == "/FlateDecode") ||
- (filter_name == "/LZWDecode"))
+ auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
+ if (decode_pipeline)
{
- if ((predictor >= 10) && (predictor <= 15))
- {
- QTC::TC("qpdf", "QPDF_Stream PNG filter");
- pipeline = new Pl_PNGFilter(
- "png decode", pipeline, Pl_PNGFilter::a_decode,
- QIntC::to_uint(columns),
- QIntC::to_uint(colors),
- QIntC::to_uint(bits_per_component));
- to_delete.push_back(pipeline);
- }
- else if (predictor == 2)
- {
- QTC::TC("qpdf", "QPDF_Stream TIFF predictor");
- pipeline = new Pl_TIFFPredictor(
- "tiff decode", pipeline, Pl_TIFFPredictor::a_decode,
- QIntC::to_uint(columns),
- QIntC::to_uint(colors),
- QIntC::to_uint(bits_per_component));
- to_delete.push_back(pipeline);
- }
+ pipeline = decode_pipeline;
}
-
- if (filter_name == "/Crypt")
- {
- // Ignore -- handled by pipeStreamData
- }
- else if (filter_name == "/FlateDecode")
- {
- pipeline = new Pl_Flate("stream inflate",
- pipeline, Pl_Flate::a_inflate);
- to_delete.push_back(pipeline);
- }
- else if (filter_name == "/ASCII85Decode")
- {
- pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
- to_delete.push_back(pipeline);
- }
- else if (filter_name == "/ASCIIHexDecode")
- {
- pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
- to_delete.push_back(pipeline);
- }
- else if (filter_name == "/LZWDecode")
- {
- pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
- early_code_change);
- to_delete.push_back(pipeline);
- }
- else if (filter_name == "/RunLengthDecode")
- {
- pipeline = new Pl_RunLength("runlength decode", pipeline,
- Pl_RunLength::a_decode);
- to_delete.push_back(pipeline);
- }
- else if (filter_name == "/DCTDecode")
- {
- pipeline = new Pl_DCT("DCT decode", pipeline);
- to_delete.push_back(pipeline);
- }
- else
- {
- throw std::logic_error(
- "INTERNAL ERROR: QPDFStream: unknown filter "
- "encountered after check");
- }
}
}
diff --git a/libqpdf/SF_FlateLzwDecode.cc b/libqpdf/SF_FlateLzwDecode.cc
new file mode 100644
index 00000000..29064cc0
--- /dev/null
+++ b/libqpdf/SF_FlateLzwDecode.cc
@@ -0,0 +1,153 @@
+#include <qpdf/SF_FlateLzwDecode.hh>
+#include <qpdf/Pl_PNGFilter.hh>
+#include <qpdf/Pl_TIFFPredictor.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_LZWDecoder.hh>
+#include <qpdf/QTC.hh>
+#include <qpdf/QIntC.hh>
+
+SF_FlateLzwDecode::SF_FlateLzwDecode(bool lzw) :
+ lzw(lzw),
+ // Initialize values to their defaults as per the PDF spec
+ predictor(1),
+ columns(0),
+ colors(1),
+ bits_per_component(8),
+ early_code_change(true)
+{
+}
+
+bool
+SF_FlateLzwDecode::setDecodeParms(QPDFObjectHandle decode_parms)
+{
+ if (decode_parms.isNull())
+ {
+ return true;
+ }
+
+ bool filterable = true;
+ std::set<std::string> keys = decode_parms.getKeys();
+ for (auto const& key: keys)
+ {
+ QPDFObjectHandle value = decode_parms.getKey(key);
+ if (key == "/Predictor")
+ {
+ if (value.isInteger())
+ {
+ this->predictor = value.getIntValueAsInt();
+ if (! ((this->predictor == 1) || (this->predictor == 2) ||
+ ((this->predictor >= 10) && (this->predictor <= 15))))
+ {
+ filterable = false;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ else if ((key == "/Columns") ||
+ (key == "/Colors") ||
+ (key == "/BitsPerComponent"))
+ {
+ if (value.isInteger())
+ {
+ int val = value.getIntValueAsInt();
+ if (key == "/Columns")
+ {
+ this->columns = val;
+ }
+ else if (key == "/Colors")
+ {
+ this->colors = val;
+ }
+ else if (key == "/BitsPerComponent")
+ {
+ this->bits_per_component = val;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ else if (lzw && (key == "/EarlyChange"))
+ {
+ if (value.isInteger())
+ {
+ int earlychange = value.getIntValueAsInt();
+ this->early_code_change = (earlychange == 1);
+ if (! ((earlychange == 0) || (earlychange == 1)))
+ {
+ filterable = false;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ }
+
+ if ((this->predictor > 1) && (this->columns == 0))
+ {
+ filterable = false;
+ }
+
+ return filterable;
+}
+
+
+
+Pipeline*
+SF_FlateLzwDecode::getDecodePipeline(Pipeline* next)
+{
+ std::shared_ptr<Pipeline> pipeline;
+ if ((this->predictor >= 10) && (this->predictor <= 15))
+ {
+ QTC::TC("qpdf", "SF_FlateLzwDecode PNG filter");
+ pipeline = std::make_shared<Pl_PNGFilter>(
+ "png decode", next, Pl_PNGFilter::a_decode,
+ QIntC::to_uint(this->columns),
+ QIntC::to_uint(this->colors),
+ QIntC::to_uint(this->bits_per_component));
+ this->pipelines.push_back(pipeline);
+ next = pipeline.get();
+ }
+ else if (this->predictor == 2)
+ {
+ QTC::TC("qpdf", "SF_FlateLzwDecode TIFF predictor");
+ pipeline = std::make_shared<Pl_TIFFPredictor>(
+ "tiff decode", next, Pl_TIFFPredictor::a_decode,
+ QIntC::to_uint(this->columns),
+ QIntC::to_uint(this->colors),
+ QIntC::to_uint(this->bits_per_component));
+ this->pipelines.push_back(pipeline);
+ next = pipeline.get();
+ }
+
+ if (lzw)
+ {
+ pipeline = std::make_shared<Pl_LZWDecoder>(
+ "lzw decode", next, early_code_change);
+ }
+ else
+ {
+ pipeline = std::make_shared<Pl_Flate>(
+ "stream inflate", next, Pl_Flate::a_inflate);
+ }
+ this->pipelines.push_back(pipeline);
+ return pipeline.get();
+}
+
+std::shared_ptr<QPDFStreamFilter>
+SF_FlateLzwDecode::flate_factory()
+{
+ return std::make_shared<SF_FlateLzwDecode>(false);
+}
+
+std::shared_ptr<QPDFStreamFilter>
+SF_FlateLzwDecode::lzw_factory()
+{
+ return std::make_shared<SF_FlateLzwDecode>(true);
+}
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
index ec13b46b..40b022d6 100644
--- a/libqpdf/build.mk
+++ b/libqpdf/build.mk
@@ -70,6 +70,7 @@ SRCS_libqpdf = \
libqpdf/QPDFPageDocumentHelper.cc \
libqpdf/QPDFPageLabelDocumentHelper.cc \
libqpdf/QPDFPageObjectHelper.cc \
+ libqpdf/QPDFStreamFilter.cc \
libqpdf/QPDFSystemError.cc \
libqpdf/QPDFTokenizer.cc \
libqpdf/QPDFWriter.cc \
@@ -94,6 +95,7 @@ SRCS_libqpdf = \
libqpdf/QUtil.cc \
libqpdf/RC4.cc \
libqpdf/SecureRandomDataProvider.cc \
+ libqpdf/SF_FlateLzwDecode.cc \
libqpdf/SparseOHArray.cc \
libqpdf/qpdf-c.cc
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index da9f91a0..b11de6a2 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -5,6 +5,10 @@
#include <qpdf/QPDFObject.hh>
#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/QPDFStreamFilter.hh>
+
+#include <functional>
+#include <memory>
class Pipeline;
class QPDF;
@@ -49,6 +53,10 @@ class QPDF_Stream: public QPDFObject
void replaceDict(QPDFObjectHandle new_dict);
+ static void registerStreamFilter(
+ std::string const& filter_name,
+ std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
+
// Replace object ID and generation. This may only be called if
// object ID and generation are 0. It is used by QPDFObjectHandle
// when adding streams to files.
@@ -59,20 +67,15 @@ class QPDF_Stream: public QPDFObject
private:
static std::map<std::string, std::string> filter_abbreviations;
+ static std::map<
+ std::string,
+ std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
void replaceFilterData(QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms,
size_t length);
- bool understandDecodeParams(
- std::string const& filter, QPDFObjectHandle decode_params,
- int& predictor, int& columns,
- int& colors, int& bits_per_component,
- bool& early_code_change);
- bool filterable(std::vector<std::string>& filters,
- bool& specialized_compression, bool& lossy_compression,
- int& predictor, int& columns,
- int& colors, int& bits_per_component,
- bool& early_code_change);
+ bool filterable(std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
+ bool& specialized_compression, bool& lossy_compression);
void warn(QPDFExc const& e);
void setDictDescription();
void setStreamDescription();
diff --git a/libqpdf/qpdf/SF_ASCII85Decode.hh b/libqpdf/qpdf/SF_ASCII85Decode.hh
new file mode 100644
index 00000000..b0b2f2e3
--- /dev/null
+++ b/libqpdf/qpdf/SF_ASCII85Decode.hh
@@ -0,0 +1,30 @@
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/Pl_ASCII85Decoder.hh>
+#include <memory>
+
+#ifndef SF_ASCII85DECODE_HH
+#define SF_ASCII85DECODE_HH
+
+class SF_ASCII85Decode: public QPDFStreamFilter
+{
+ public:
+ SF_ASCII85Decode() = default;
+ virtual ~SF_ASCII85Decode() = default;
+
+ virtual Pipeline* getDecodePipeline(Pipeline* next) override
+ {
+ this->pipeline = std::make_shared<Pl_ASCII85Decoder>(
+ "ascii85 decode", next);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<QPDFStreamFilter> factory()
+ {
+ return std::make_shared<SF_ASCII85Decode>();
+ }
+
+ private:
+ std::shared_ptr<Pipeline> pipeline;
+};
+
+#endif // SF_ASCII85DECODE_HH
diff --git a/libqpdf/qpdf/SF_ASCIIHexDecode.hh b/libqpdf/qpdf/SF_ASCIIHexDecode.hh
new file mode 100644
index 00000000..869d0722
--- /dev/null
+++ b/libqpdf/qpdf/SF_ASCIIHexDecode.hh
@@ -0,0 +1,30 @@
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/Pl_ASCIIHexDecoder.hh>
+#include <memory>
+
+#ifndef SF_ASCIIHEXDECODE_HH
+#define SF_ASCIIHEXDECODE_HH
+
+class SF_ASCIIHexDecode: public QPDFStreamFilter
+{
+ public:
+ SF_ASCIIHexDecode() = default;
+ virtual ~SF_ASCIIHexDecode() = default;
+
+ virtual Pipeline* getDecodePipeline(Pipeline* next) override
+ {
+ this->pipeline = std::make_shared<Pl_ASCIIHexDecoder>(
+ "asciiHex decode", next);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<QPDFStreamFilter> factory()
+ {
+ return std::make_shared<SF_ASCIIHexDecode>();
+ }
+
+ private:
+ std::shared_ptr<Pipeline> pipeline;
+};
+
+#endif // SF_ASCIIHEXDECODE_HH
diff --git a/libqpdf/qpdf/SF_DCTDecode.hh b/libqpdf/qpdf/SF_DCTDecode.hh
new file mode 100644
index 00000000..28aa42c1
--- /dev/null
+++ b/libqpdf/qpdf/SF_DCTDecode.hh
@@ -0,0 +1,39 @@
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/Pl_DCT.hh>
+#include <memory>
+
+#ifndef SF_DCTDECODE_HH
+#define SF_DCTDECODE_HH
+
+class SF_DCTDecode: public QPDFStreamFilter
+{
+ public:
+ SF_DCTDecode() = default;
+ virtual ~SF_DCTDecode() = default;
+
+ virtual Pipeline* getDecodePipeline(Pipeline* next) override
+ {
+ this->pipeline = std::make_shared<Pl_DCT>("DCT decode", next);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<QPDFStreamFilter> factory()
+ {
+ return std::make_shared<SF_DCTDecode>();
+ }
+
+ virtual bool isSpecializedCompression() override
+ {
+ return true;
+ }
+
+ virtual bool isLossyCompression() override
+ {
+ return true;
+ }
+
+ private:
+ std::shared_ptr<Pipeline> pipeline;
+};
+
+#endif // SF_DCTDECODE_HH
diff --git a/libqpdf/qpdf/SF_FlateLzwDecode.hh b/libqpdf/qpdf/SF_FlateLzwDecode.hh
new file mode 100644
index 00000000..9c72eff9
--- /dev/null
+++ b/libqpdf/qpdf/SF_FlateLzwDecode.hh
@@ -0,0 +1,30 @@
+#include <qpdf/QPDFStreamFilter.hh>
+#include <memory>
+#include <vector>
+
+#ifndef SF_FLATELZWDECODE_HH
+#define SF_FLATELZWDECODE_HH
+
+class SF_FlateLzwDecode: public QPDFStreamFilter
+{
+ public:
+ SF_FlateLzwDecode(bool lzw);
+ virtual ~SF_FlateLzwDecode() = default;
+
+ virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
+ virtual Pipeline* getDecodePipeline(Pipeline* next);
+
+ static std::shared_ptr<QPDFStreamFilter> flate_factory();
+ static std::shared_ptr<QPDFStreamFilter> lzw_factory();
+
+ private:
+ bool lzw;
+ int predictor;
+ int columns;
+ int colors;
+ int bits_per_component;
+ bool early_code_change;
+ std::vector<std::shared_ptr<Pipeline>> pipelines;
+};
+
+#endif // SF_FLATELZWDECODE_HH
diff --git a/libqpdf/qpdf/SF_RunLengthDecode.hh b/libqpdf/qpdf/SF_RunLengthDecode.hh
new file mode 100644
index 00000000..1bdfb4c0
--- /dev/null
+++ b/libqpdf/qpdf/SF_RunLengthDecode.hh
@@ -0,0 +1,35 @@
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/Pl_RunLength.hh>
+#include <memory>
+
+#ifndef SF_RUNLENGTHDECODE_HH
+#define SF_RUNLENGTHDECODE_HH
+
+class SF_RunLengthDecode: public QPDFStreamFilter
+{
+ public:
+ SF_RunLengthDecode() = default;
+ virtual ~SF_RunLengthDecode() = default;
+
+ virtual Pipeline* getDecodePipeline(Pipeline* next) override
+ {
+ this->pipeline = std::make_shared<Pl_RunLength>(
+ "runlength decode", next, Pl_RunLength::a_decode);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<QPDFStreamFilter> factory()
+ {
+ return std::make_shared<SF_RunLengthDecode>();
+ }
+
+ virtual bool isSpecializedCompression() override
+ {
+ return true;
+ }
+
+ private:
+ std::shared_ptr<Pipeline> pipeline;
+};
+
+#endif // SF_RUNLENGTHDECODE_HH
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 15f6cf1e..f0f96242 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -86,7 +86,7 @@ QPDF prev key in trailer dictionary 0
QPDF found xref stream 0
QPDF ignoring XRefStm in trailer 0
QPDF xref deleted object 0
-QPDF_Stream PNG filter 0
+SF_FlateLzwDecode PNG filter 0
QPDF xref /Index is null 0
QPDF xref /Index is array 1
QPDFWriter copy Extends 0
@@ -294,7 +294,7 @@ qpdf-c called qpdf_set_decode_level 0
qpdf-c called qpdf_set_compress_streams 0
qpdf-c called qpdf_set_preserve_unreferenced_objects 0
qpdf-c called qpdf_set_newline_before_endstream 0
-QPDF_Stream TIFF predictor 0
+SF_FlateLzwDecode TIFF predictor 0
QPDFTokenizer inline image at EOF 0
Pl_QPDFTokenizer found ID 0
QPDFObjectHandle non-stream in stream array 0