aboutsummaryrefslogtreecommitdiffstats
path: root/examples/pdf-count-strings.cc
blob: 2b5b5573d877dffe7a755da70b6cda2e06f93143 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
//
// This example illustrates the use of QPDFObjectHandle::TokenFilter with filterContents. See also
// pdf-filter-tokens.cc for an example that uses QPDFObjectHandle::TokenFilter with
// addContentTokenFilter.
//

#include <cstdlib>
#include <iostream>

#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QUtil.hh>

static char const* whoami = nullptr;

void
usage()
{
    std::cerr << "Usage: " << whoami << " infile" << std::endl
              << "Applies token filters to infile" << std::endl;
    exit(2);
}

class StringCounter: public QPDFObjectHandle::TokenFilter
{
  public:
    StringCounter() = default;
    ~StringCounter() override = default;
    void handleToken(QPDFTokenizer::Token const&) override;
    void handleEOF() override;
    int getCount() const;

  private:
    int count{0};
};

void
StringCounter::handleToken(QPDFTokenizer::Token const& token)
{
    // Count string tokens
    if (token.getType() == QPDFTokenizer::tt_string) {
        ++this->count;
    }
    // Preserve input verbatim by passing each token to any specified downstream filter.
    writeToken(token);
}

void
StringCounter::handleEOF()
{
    // Write a comment at the end of the stream just to show how we can enhance the output if we
    // want.
    write("\n% strings found: ");
    write(std::to_string(this->count));
}

int
StringCounter::getCount() const
{
    return this->count;
}

int
main(int argc, char* argv[])
{
    whoami = QUtil::getWhoami(argv[0]);

    if (argc != 2) {
        usage();
    }
    char const* infilename = argv[1];

    try {
        QPDF pdf;
        pdf.processFile(infilename);
        int pageno = 0;
        for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
            ++pageno;
            // Pass the contents of a page through our string counter. If it's an even page, capture
            // the output. This illustrates that you may capture any output generated by the filter,
            // or you may ignore it.
            StringCounter counter;
            if (pageno % 2) {
                // Ignore output for odd pages.
                page.filterContents(&counter);
            } else {
                // Write output to stdout for even pages.
                Pl_StdioFile out("stdout", stdout);
                std::cout << "% Contents of page " << pageno << std::endl;
                page.filterContents(&counter, &out);
                std::cout << "\n% end " << pageno << std::endl;
            }
            std::cout << "Page " << pageno << ": strings = " << counter.getCount() << std::endl;
        }
    } catch (std::exception& e) {
        std::cerr << whoami << ": " << e.what() << std::endl;
        exit(2);
    }

    return 0;
}