aboutsummaryrefslogtreecommitdiffstats
path: root/examples/pdf-count-strings.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-11 21:41:02 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:47 +0100
commit5708b5d0aa9c94ab663509fbb865aa27a134aeb3 (patch)
tree30a85d51d3d720dfca0a09b9dba4eef0c3fe2bec /examples/pdf-count-strings.cc
parentfd02944e1953931e07f124448350db91038020af (diff)
downloadqpdf-5708b5d0aa9c94ab663509fbb865aa27a134aeb3.tar.zst
Add additional interface for filtering page contents
Diffstat (limited to 'examples/pdf-count-strings.cc')
-rw-r--r--examples/pdf-count-strings.cc131
1 files changed, 131 insertions, 0 deletions
diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc
new file mode 100644
index 00000000..81718298
--- /dev/null
+++ b/examples/pdf-count-strings.cc
@@ -0,0 +1,131 @@
+//
+// This example illustrates the use of QPDFObjectHandle::TokenFilter
+// with filterPageContents. See also pdf-filter-tokens.cc for an
+// example that uses QPDFObjectHandle::TokenFilter with
+// addContentTokenFilter.
+//
+
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/Pl_StdioFile.hh>
+
+static char const* whoami = 0;
+
+void usage()
+{
+ std::cerr << "Usage: " << whoami << " infile" << std::endl
+ << "Applies token filters to infile"
+ << std::endl;
+ exit(2);
+}
+
+class StringCounter: public QPDFObjectHandle::TokenFilter
+{
+ public:
+ StringCounter() :
+ count(0)
+ {
+ }
+ virtual ~StringCounter()
+ {
+ }
+ virtual void handleToken(QPDFTokenizer::Token const&);
+ virtual void handleEOF();
+ int getCount() const;
+
+ private:
+ int count;
+};
+
+void
+StringCounter::handleToken(QPDFTokenizer::Token const& token)
+{
+ // Count string tokens
+ if (token.getType() == QPDFTokenizer::tt_string)
+ {
+ ++this->count;
+ }
+ // Preserve input verbatim by passing each token to any specified
+ // downstream filter.
+ writeToken(token);
+}
+
+void
+StringCounter::handleEOF()
+{
+ // Write a comment at the end of the stream just to show how we
+ // can enhance the output if we want.
+ write("\n% strings found: ");
+ write(QUtil::int_to_string(this->count));
+ // If you override handleEOF, you must always remember to call finish().
+ finish();
+}
+
+int
+StringCounter::getCount() const
+{
+ return this->count;
+}
+
+int main(int argc, char* argv[])
+{
+ whoami = QUtil::getWhoami(argv[0]);
+
+ // For libtool's sake....
+ if (strncmp(whoami, "lt-", 3) == 0)
+ {
+ whoami += 3;
+ }
+
+ if (argc != 2)
+ {
+ usage();
+ }
+ char const* infilename = argv[1];
+
+ try
+ {
+ QPDF pdf;
+ pdf.processFile(infilename);
+ std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+ int pageno = 0;
+ for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ QPDFObjectHandle page = *iter;
+ ++pageno;
+ // Pass the contents of a page through our string counter.
+ // If it's an even page, capture the output. This
+ // illustrates that you may capture any output generated
+ // by the filter, or you may ignore it.
+ StringCounter counter;
+ if (pageno % 2)
+ {
+ // Ignore output for odd pages.
+ page.filterPageContents(&counter);
+ }
+ else
+ {
+ // Write output to stdout for even pages.
+ Pl_StdioFile out("stdout", stdout);
+ std::cout << "% Contents of page " << pageno << std::endl;
+ page.filterPageContents(&counter, &out);
+ std::cout << "\n% end " << pageno << std::endl;
+ }
+ std::cout << "Page " << pageno
+ << ": strings = " << counter.getCount() << std::endl;
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::cerr << whoami << ": " << e.what() << std::endl;
+ exit(2);
+ }
+
+ return 0;
+}