aboutsummaryrefslogtreecommitdiffstats
path: root/examples/pdf-count-strings.cc
blob: 600042f7e06e365e11a90bc88ab62cbeae6a14e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//
// This example illustrates the use of QPDFObjectHandle::TokenFilter
// with filterContents. See also pdf-filter-tokens.cc for an example
// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter.
//

#include <iostream>
#include <string.h>
#include <stdlib.h>

#include <qpdf/QPDF.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/Pl_StdioFile.hh>

static char const* whoami = 0;

void usage()
{
    std::cerr << "Usage: " << whoami << " infile" << std::endl
              << "Applies token filters to infile"
              << std::endl;
    exit(2);
}

class StringCounter: public QPDFObjectHandle::TokenFilter
{
  public:
    StringCounter() :
        count(0)
    {
    }
    virtual ~StringCounter()
    {
    }
    virtual void handleToken(QPDFTokenizer::Token const&);
    virtual void handleEOF();
    int getCount() const;

  private:
    int count;
};

void
StringCounter::handleToken(QPDFTokenizer::Token const& token)
{
    // Count string tokens
    if (token.getType() == QPDFTokenizer::tt_string)
    {
        ++this->count;
    }
    // Preserve input verbatim by passing each token to any specified
    // downstream filter.
    writeToken(token);
}

void
StringCounter::handleEOF()
{
    // Write a comment at the end of the stream just to show how we
    // can enhance the output if we want.
    write("\n% strings found: ");
    write(QUtil::int_to_string(this->count));
}

int
StringCounter::getCount() const
{
    return this->count;
}

int main(int argc, char* argv[])
{
    whoami = QUtil::getWhoami(argv[0]);

    // For libtool's sake....
    if (strncmp(whoami, "lt-", 3) == 0)
    {
        whoami += 3;
    }

    if (argc != 2)
    {
        usage();
    }
    char const* infilename = argv[1];

    try
    {
        QPDF pdf;
        pdf.processFile(infilename);
        std::vector<QPDFPageObjectHelper> pages =
            QPDFPageDocumentHelper(pdf).getAllPages();
        int pageno = 0;
        for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
             iter != pages.end(); ++iter)
        {
            QPDFPageObjectHelper& ph(*iter);
            ++pageno;
            // Pass the contents of a page through our string counter.
            // If it's an even page, capture the output. This
            // illustrates that you may capture any output generated
            // by the filter, or you may ignore it.
            StringCounter counter;
            if (pageno % 2)
            {
                // Ignore output for odd pages.
                ph.filterContents(&counter);
            }
            else
            {
                // Write output to stdout for even pages.
                Pl_StdioFile out("stdout", stdout);
                std::cout << "% Contents of page " << pageno << std::endl;
                ph.filterContents(&counter, &out);
                std::cout << "\n% end " << pageno << std::endl;
            }
            std::cout << "Page " << pageno
                      << ": strings = " << counter.getCount() << std::endl;
        }
    }
    catch (std::exception& e)
    {
        std::cerr << whoami << ": " << e.what() << std::endl;
        exit(2);
    }

    return 0;
}