aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/QPDFObjectHandle.hh87
-rw-r--r--include/qpdf/QPDFTokenizer.hh11
2 files changed, 89 insertions, 9 deletions
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 14dadd6c..f0b8f2af 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -35,6 +35,7 @@
#include <qpdf/PointerHolder.hh>
#include <qpdf/Buffer.hh>
#include <qpdf/InputSource.hh>
+#include <qpdf/QPDFTokenizer.hh>
#include <qpdf/QPDFObject.hh>
@@ -76,6 +77,66 @@ class QPDFObjectHandle
Pipeline* pipeline) = 0;
};
+ // The TokenFilter class provides a way to filter content streams
+ // in a lexically aware fashion. TokenFilters can be attached to
+ // streams using the addTokenFilter or addContentTokenFilter
+ // methods. The handleToken method is called for each token,
+ // including the eof token, and then handleEOF is called at the
+ // very end. Handlers may call write (or writeToken) to pass data
+ // downstream. The finish() method must be called exactly one time
+ // to ensure that any written data is flushed out. The default
+ // handleEOF calls finish. If you override handleEOF, you must
+ // ensure that finish() is called either there or in response to
+ // whatever event causes you to terminate creation of output.
+ // Failure to call finish() may result in some of the data you
+ // have written being lost. You should not rely on a destructor
+ // for calling finish() since the destructor call may occur later
+ // than you expect. Please see examples/token-filters.cc for
+ // examples of using TokenFilters.
+ //
+ // Please note that when you call token.getValue() on a token of
+ // type tt_string, you get the string value without any
+ // delimiters. token.getRawValue() will return something suitable
+ // for being written to output, or calling writeToken with a
+ // string token will also work. The correct way to construct a
+ // string token that would write the literal value (str) is
+ // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "str").
+ class TokenFilter
+ {
+ public:
+ QPDF_DLL
+ TokenFilter()
+ {
+ }
+ QPDF_DLL
+ virtual ~TokenFilter()
+ {
+ }
+ virtual void handleToken(QPDFTokenizer::Token const&) = 0;
+ virtual void handleEOF()
+ {
+ // If you override handleEOF, you must be sure to call
+ // finish().
+ finish();
+ }
+
+ // This is called internally by the qpdf library.
+ void setPipeline(Pipeline*);
+
+ protected:
+ QPDF_DLL
+ void write(char const* data, size_t len);
+ QPDF_DLL
+ void write(std::string const& str);
+ QPDF_DLL
+ void writeToken(QPDFTokenizer::Token const&);
+ QPDF_DLL
+ void finish();
+
+ private:
+ Pipeline* pipeline;
+ };
+
// This class is used by parse to decrypt strings when reading an
// object that contains encrypted strings.
class StringDecrypter
@@ -223,6 +284,23 @@ class QPDFObjectHandle
static void parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks);
+ // Attach a token filter to a page's contents. If the page's
+ // contents is an array of streams, it is automatically coalesced.
+ // The token filter is applied to the page's contents as a single
+ // stream.
+ QPDF_DLL
+ void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
+
+ // As of qpdf 8, it is possible to add custom token filters to a
+ // stream. The tokenized stream data is passed through the token
+ // filter after all original filters but before content stream
+ // normalization if requested. This is a low-level interface to
+ // add it to a stream. You will usually want to call
+ // addContentTokenFilter instead, which can be applied to a page
+ // object, and which will automatically handle the case of pages
+ // whose contents are split across multiple streams.
+ void addTokenFilter(PointerHolder<TokenFilter> token_filter);
+
// Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();
@@ -414,6 +492,13 @@ class QPDFObjectHandle
QPDF_DLL
QPDFObjectHandle getDict();
+ // If addTokenFilter has been called for this stream, then the
+ // original data should be considered to be modified. This means we
+ // should avoid optimizations such as not filtering a stream that
+ // is already compressed.
+ QPDF_DLL
+ bool isDataModified();
+
// Returns filtered (uncompressed) stream data. Throws an
// exception if the stream is filtered and we can't decode it.
QPDF_DLL
@@ -608,7 +693,7 @@ class QPDFObjectHandle
// stream or an array of streams. If this page's content is an
// array, concatenate the streams into a single stream. This can
// be useful when working with files that split content streams in
- // arbitary spots, such as in the middle of a token, as that can
+ // arbitrary spots, such as in the middle of a token, as that can
// confuse some software. You could also call this after calling
// addPageContents.
QPDF_DLL
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index fe2e95f7..eb9215aa 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -62,13 +62,8 @@ class QPDFTokenizer
{
public:
Token() : type(tt_bad) {}
-
- Token(token_type_e type, std::string const& value) :
- type(type),
- value(value)
- {
- }
-
+ QPDF_DLL
+ Token(token_type_e type, std::string const& value);
Token(token_type_e type, std::string const& value,
std::string raw_value, std::string error_message) :
type(type),
@@ -93,7 +88,7 @@ class QPDFTokenizer
{
return this->error_message;
}
- bool operator==(Token const& rhs)
+ bool operator==(Token const& rhs) const
{
// Ignore fields other than type and value
return ((this->type != tt_bad) &&