aboutsummaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-02-11 21:41:02 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:47 +0100
commit5708b5d0aa9c94ab663509fbb865aa27a134aeb3 (patch)
tree30a85d51d3d720dfca0a09b9dba4eef0c3fe2bec /examples
parentfd02944e1953931e07f124448350db91038020af (diff)
downloadqpdf-5708b5d0aa9c94ab663509fbb865aa27a134aeb3.tar.zst
Add additional interface for filtering page contents
Diffstat (limited to 'examples')
-rw-r--r--examples/build.mk3
-rw-r--r--examples/pdf-count-strings.cc131
-rw-r--r--examples/pdf-filter-tokens.cc6
-rw-r--r--examples/qtest/count-strings.test17
-rw-r--r--examples/qtest/count-strings/in.pdfbin0 -> 1348 bytes
-rw-r--r--examples/qtest/count-strings/out16
6 files changed, 170 insertions, 3 deletions
diff --git a/examples/build.mk b/examples/build.mk
index f5b44669..b5748c11 100644
--- a/examples/build.mk
+++ b/examples/build.mk
@@ -7,7 +7,8 @@ BINS_examples = \
pdf-create \
pdf-parse-content \
pdf-split-pages \
- pdf-filter-tokens
+ pdf-filter-tokens \
+ pdf-count-strings
CBINS_examples = pdf-linearize
TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc
new file mode 100644
index 00000000..81718298
--- /dev/null
+++ b/examples/pdf-count-strings.cc
@@ -0,0 +1,131 @@
+//
+// This example illustrates the use of QPDFObjectHandle::TokenFilter
+// with filterPageContents. See also pdf-filter-tokens.cc for an
+// example that uses QPDFObjectHandle::TokenFilter with
+// addContentTokenFilter.
+//
+
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/Pl_StdioFile.hh>
+
+static char const* whoami = 0;
+
+void usage()
+{
+ std::cerr << "Usage: " << whoami << " infile" << std::endl
+ << "Applies token filters to infile"
+ << std::endl;
+ exit(2);
+}
+
+class StringCounter: public QPDFObjectHandle::TokenFilter
+{
+ public:
+ StringCounter() :
+ count(0)
+ {
+ }
+ virtual ~StringCounter()
+ {
+ }
+ virtual void handleToken(QPDFTokenizer::Token const&);
+ virtual void handleEOF();
+ int getCount() const;
+
+ private:
+ int count;
+};
+
+void
+StringCounter::handleToken(QPDFTokenizer::Token const& token)
+{
+ // Count string tokens
+ if (token.getType() == QPDFTokenizer::tt_string)
+ {
+ ++this->count;
+ }
+ // Preserve input verbatim by passing each token to any specified
+ // downstream filter.
+ writeToken(token);
+}
+
+void
+StringCounter::handleEOF()
+{
+ // Write a comment at the end of the stream just to show how we
+ // can enhance the output if we want.
+ write("\n% strings found: ");
+ write(QUtil::int_to_string(this->count));
+ // If you override handleEOF, you must always remember to call finish().
+ finish();
+}
+
+int
+StringCounter::getCount() const
+{
+ return this->count;
+}
+
+int main(int argc, char* argv[])
+{
+ whoami = QUtil::getWhoami(argv[0]);
+
+ // For libtool's sake....
+ if (strncmp(whoami, "lt-", 3) == 0)
+ {
+ whoami += 3;
+ }
+
+ if (argc != 2)
+ {
+ usage();
+ }
+ char const* infilename = argv[1];
+
+ try
+ {
+ QPDF pdf;
+ pdf.processFile(infilename);
+ std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+ int pageno = 0;
+ for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ QPDFObjectHandle page = *iter;
+ ++pageno;
+ // Pass the contents of a page through our string counter.
+ // If it's an even page, capture the output. This
+ // illustrates that you may capture any output generated
+ // by the filter, or you may ignore it.
+ StringCounter counter;
+ if (pageno % 2)
+ {
+ // Ignore output for odd pages.
+ page.filterPageContents(&counter);
+ }
+ else
+ {
+ // Write output to stdout for even pages.
+ Pl_StdioFile out("stdout", stdout);
+ std::cout << "% Contents of page " << pageno << std::endl;
+ page.filterPageContents(&counter, &out);
+ std::cout << "\n% end " << pageno << std::endl;
+ }
+ std::cout << "Page " << pageno
+ << ": strings = " << counter.getCount() << std::endl;
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::cerr << whoami << ": " << e.what() << std::endl;
+ exit(2);
+ }
+
+ return 0;
+}
diff --git a/examples/pdf-filter-tokens.cc b/examples/pdf-filter-tokens.cc
index 2566f72c..809c160b 100644
--- a/examples/pdf-filter-tokens.cc
+++ b/examples/pdf-filter-tokens.cc
@@ -1,6 +1,8 @@
//
-// This example illustrates the use of QPDFObjectHandle::TokenFilter.
-// Please see comments inline for details.
+// This example illustrates the use of QPDFObjectHandle::TokenFilter
+// with addContentTokenFilter. Please see comments inline for details.
+// See also pdf-count-strings.cc for a use of
+// QPDFObjectHandle::TokenFilter with filterPageContents.
//
#include <iostream>
diff --git a/examples/qtest/count-strings.test b/examples/qtest/count-strings.test
new file mode 100644
index 00000000..ba3f835b
--- /dev/null
+++ b/examples/qtest/count-strings.test
@@ -0,0 +1,17 @@
+#!/usr/bin/env perl
+require 5.008;
+BEGIN { $^W = 1; }
+use strict;
+
+chdir("count-strings");
+
+require TestDriver;
+
+my $td = new TestDriver('pdf-count-strings');
+
+$td->runtest("filter tokens",
+ {$td->COMMAND => "pdf-count-strings in.pdf"},
+ {$td->FILE => "out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+$td->report(1);
diff --git a/examples/qtest/count-strings/in.pdf b/examples/qtest/count-strings/in.pdf
new file mode 100644
index 00000000..591614c4
--- /dev/null
+++ b/examples/qtest/count-strings/in.pdf
Binary files differ
diff --git a/examples/qtest/count-strings/out b/examples/qtest/count-strings/out
new file mode 100644
index 00000000..87b024fc
--- /dev/null
+++ b/examples/qtest/count-strings/out
@@ -0,0 +1,16 @@
+Page 1: strings = 3
+% Contents of page 2
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Four ) Tj
+ (Five ) Tj
+ (Six )
+ (beautiful ) Tj
+ (strings) Tj
+ (!) Tj
+ET
+
+% strings found: 6
+% end 2
+Page 2: strings = 6