aboutsummaryrefslogtreecommitdiffstats
path: root/examples/pdf-filter-tokens.cc
blob: e764a86afd3b5255166472280650d306358065f4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
//
// This example illustrates the use of QPDFObjectHandle::TokenFilter
// with addContentTokenFilter. Please see comments inline for details.
// See also pdf-count-strings.cc for a use of
// QPDFObjectHandle::TokenFilter with filterContents.
//

#include <iostream>
#include <string.h>
#include <stdlib.h>
#include <algorithm>
#include <deque>

#include <qpdf/QPDF.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QPDFObjectHandle.hh>

static char const* whoami = 0;

void usage()
{
    std::cerr << "Usage: " << whoami << " infile outfile" << std::endl
	      << "Applies token filters to infile and writes outfile"
              << std::endl;
    exit(2);
}

// The StringReverser class is a trivial example of using a token
// filter. This class only overrides the pure virtual handleToken
// function and preserves the default handleEOF function.
class StringReverser: public QPDFObjectHandle::TokenFilter
{
  public:
    virtual ~StringReverser()
    {
    }
    virtual void handleToken(QPDFTokenizer::Token const&);
};

void
StringReverser::handleToken(QPDFTokenizer::Token const& token)
{
    // For string tokens, reverse the characters. For other tokens,
    // just pass them through. Notice that we construct a new string
    // token and write that, thus allowing the library to handle any
    // subtleties about properly encoding unprintable characters. This
    // function doesn't handle multibyte characters at all. It's not
    // intended to be an example of the correct way to reverse
    // strings. It's just intended to give a simple example of a
    // pretty minimal filter and to show an example of writing a
    // constructed token.
    if (token.getType() == QPDFTokenizer::tt_string)
    {
        std::string value = token.getValue();
        std::reverse(value.begin(), value.end());
        writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, value));
    }
    else
    {
        writeToken(token);
    }
}

// The ColorToGray filter finds all "rg" operators in the content
// stream and replaces them with "g" operators, thus mapping color to
// grayscale. Note that it only applies to content streams, not
// images, so this will not replace color images with grayscale
// images.
class ColorToGray: public QPDFObjectHandle::TokenFilter
{
  public:
    virtual ~ColorToGray()
    {
    }
    virtual void handleToken(QPDFTokenizer::Token const&);
    virtual void handleEOF();

  private:
    bool isNumeric(QPDFTokenizer::token_type_e);
    bool isIgnorable(QPDFTokenizer::token_type_e);
    double numericValue(QPDFTokenizer::Token const&);

    std::deque<QPDFTokenizer::Token> all_stack;
    std::deque<QPDFTokenizer::Token> stack;
};

bool
ColorToGray::isNumeric(QPDFTokenizer::token_type_e token_type)
{
    return ((token_type == QPDFTokenizer::tt_integer) ||
            (token_type == QPDFTokenizer::tt_real));
}

bool
ColorToGray::isIgnorable(QPDFTokenizer::token_type_e token_type)
{
    return ((token_type == QPDFTokenizer::tt_space) ||
            (token_type == QPDFTokenizer::tt_comment));
}

double
ColorToGray::numericValue(QPDFTokenizer::Token const& token)
{
    return QPDFObjectHandle::parse(token.getValue()).getNumericValue();
}

void
ColorToGray::handleToken(QPDFTokenizer::Token const& token)
{
    // Track the number of non-ignorable tokens we've seen. If we see
    // an "rg" following three numbers, convert it to a grayscale
    // value. Keep writing tokens to the output as we can.

    // There are several things to notice here. We keep two stacks:
    // one of "meaningful" tokens, and one of all tokens. This way we
    // can preserve whitespace or comments that we encounter in the
    // stream and there preserve layout. As we receive tokens, we keep
    // the last four meaningful tokens. If we see three numbers
    // followed by rg, we use the three numbers to calculate a gray
    // value that is perceptually similar to the color value and then
    // write the "g" operator to the output, discarding any spaces or
    // comments encountered embedded in the "rg" operator.

    // The stack and all_stack members are updated in such a way that
    // they always contain exactly the same non-ignorable tokens. The
    // stack member contains the tokens that would be left if you
    // removed all space and comment tokens from all_stack.

    // On each new token, flush out any space or comment tokens. Store
    // the incoming token. If we just got an rg preceded by the right
    // kinds of operands, replace the command. Flush any additional
    // accumulated tokens to keep the stack only four tokens deep.

    while ((! this->all_stack.empty()) &&
           isIgnorable(this->all_stack.at(0).getType()))
    {
        writeToken(this->all_stack.at(0));
        this->all_stack.pop_front();
    }
    this->all_stack.push_back(token);
    QPDFTokenizer::token_type_e token_type = token.getType();
    if (! isIgnorable(token_type))
    {
        this->stack.push_back(token);
        if ((this->stack.size() == 4) &&
            (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
            (isNumeric(this->stack.at(0).getType())) &&
            (isNumeric(this->stack.at(1).getType())) &&
            (isNumeric(this->stack.at(2).getType())))
        {
            double r = numericValue(this->stack.at(0));
            double g = numericValue(this->stack.at(1));
            double b = numericValue(this->stack.at(2));
            double gray = ((0.3 * r) + (0.59 * b) + (0.11 * g));
            if (gray > 1.0)
            {
                gray = 1.0;
            }
            if (gray < 0.0)
            {
                gray = 0.0;
            }
            write(QUtil::double_to_string(gray, 3));
            write(" g");
            this->stack.clear();
            this->all_stack.clear();
        }
    }
    if (this->stack.size() == 4)
    {
        writeToken(this->all_stack.at(0));
        this->all_stack.pop_front();
        this->stack.pop_front();
    }
}

void
ColorToGray::handleEOF()
{
    // Flush out any remaining accumulated tokens.
    while (! this->all_stack.empty())
    {
        writeToken(this->all_stack.at(0));
        this->all_stack.pop_front();
    }
}

int main(int argc, char* argv[])
{
    whoami = QUtil::getWhoami(argv[0]);

    // For libtool's sake....
    if (strncmp(whoami, "lt-", 3) == 0)
    {
	whoami += 3;
    }

    if (argc != 3)
    {
	usage();
    }
    char const* infilename = argv[1];
    char const* outfilename = argv[2];

    try
    {
	QPDF pdf;
	pdf.processFile(infilename);
        std::vector<QPDFPageObjectHelper> pages =
            QPDFPageDocumentHelper(pdf).getAllPages();
        for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
             iter != pages.end(); ++iter)
        {
            // Attach two token filters to each page of this file.
            // When the file is written, or when the pages' contents
            // are retrieved in any other way, the filters will be
            // applied. See comments on the filters for additional
            // details.
            QPDFPageObjectHelper& page(*iter);
            page.addContentTokenFilter(new StringReverser);
            page.addContentTokenFilter(new ColorToGray);
        }

        QPDFWriter w(pdf, outfilename);
        w.setStaticID(true);    // for testing only
        w.write();
    }
    catch (std::exception& e)
    {
	std::cerr << whoami << ": " << e.what() << std::endl;
	exit(2);
    }

    return 0;
}