From 5708b5d0aa9c94ab663509fbb865aa27a134aeb3 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 11 Feb 2018 15:41:02 -0500 Subject: Add additional interface for filtering page contents --- examples/pdf-count-strings.cc | 131 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 examples/pdf-count-strings.cc (limited to 'examples/pdf-count-strings.cc') diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc new file mode 100644 index 00000000..81718298 --- /dev/null +++ b/examples/pdf-count-strings.cc @@ -0,0 +1,131 @@ +// +// This example illustrates the use of QPDFObjectHandle::TokenFilter +// with filterPageContents. See also pdf-filter-tokens.cc for an +// example that uses QPDFObjectHandle::TokenFilter with +// addContentTokenFilter. +// + +#include +#include +#include + +#include +#include +#include +#include + +static char const* whoami = 0; + +void usage() +{ + std::cerr << "Usage: " << whoami << " infile" << std::endl + << "Applies token filters to infile" + << std::endl; + exit(2); +} + +class StringCounter: public QPDFObjectHandle::TokenFilter +{ + public: + StringCounter() : + count(0) + { + } + virtual ~StringCounter() + { + } + virtual void handleToken(QPDFTokenizer::Token const&); + virtual void handleEOF(); + int getCount() const; + + private: + int count; +}; + +void +StringCounter::handleToken(QPDFTokenizer::Token const& token) +{ + // Count string tokens + if (token.getType() == QPDFTokenizer::tt_string) + { + ++this->count; + } + // Preserve input verbatim by passing each token to any specified + // downstream filter. + writeToken(token); +} + +void +StringCounter::handleEOF() +{ + // Write a comment at the end of the stream just to show how we + // can enhance the output if we want. + write("\n% strings found: "); + write(QUtil::int_to_string(this->count)); + // If you override handleEOF, you must always remember to call finish(). + finish(); +} + +int +StringCounter::getCount() const +{ + return this->count; +} + +int main(int argc, char* argv[]) +{ + whoami = QUtil::getWhoami(argv[0]); + + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + + if (argc != 2) + { + usage(); + } + char const* infilename = argv[1]; + + try + { + QPDF pdf; + pdf.processFile(infilename); + std::vector pages = pdf.getAllPages(); + int pageno = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle page = *iter; + ++pageno; + // Pass the contents of a page through our string counter. + // If it's an even page, capture the output. This + // illustrates that you may capture any output generated + // by the filter, or you may ignore it. + StringCounter counter; + if (pageno % 2) + { + // Ignore output for odd pages. + page.filterPageContents(&counter); + } + else + { + // Write output to stdout for even pages. + Pl_StdioFile out("stdout", stdout); + std::cout << "% Contents of page " << pageno << std::endl; + page.filterPageContents(&counter, &out); + std::cout << "\n% end " << pageno << std::endl; + } + std::cout << "Page " << pageno + << ": strings = " << counter.getCount() << std::endl; + } + } + catch (std::exception& e) + { + std::cerr << whoami << ": " << e.what() << std::endl; + exit(2); + } + + return 0; +} -- cgit v1.2.3-54-g00ecf