diff options
Diffstat (limited to 'examples')
-rw-r--r-- | examples/build.mk | 3 | ||||
-rw-r--r-- | examples/pdf-count-strings.cc | 131 | ||||
-rw-r--r-- | examples/pdf-filter-tokens.cc | 6 | ||||
-rw-r--r-- | examples/qtest/count-strings.test | 17 | ||||
-rw-r--r-- | examples/qtest/count-strings/in.pdf | bin | 0 -> 1348 bytes | |||
-rw-r--r-- | examples/qtest/count-strings/out | 16 |
6 files changed, 170 insertions, 3 deletions
diff --git a/examples/build.mk b/examples/build.mk index f5b44669..b5748c11 100644 --- a/examples/build.mk +++ b/examples/build.mk @@ -7,7 +7,8 @@ BINS_examples = \ pdf-create \ pdf-parse-content \ pdf-split-pages \ - pdf-filter-tokens + pdf-filter-tokens \ + pdf-count-strings CBINS_examples = pdf-linearize TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc new file mode 100644 index 00000000..81718298 --- /dev/null +++ b/examples/pdf-count-strings.cc @@ -0,0 +1,131 @@ +// +// This example illustrates the use of QPDFObjectHandle::TokenFilter +// with filterPageContents. See also pdf-filter-tokens.cc for an +// example that uses QPDFObjectHandle::TokenFilter with +// addContentTokenFilter. +// + +#include <iostream> +#include <string.h> +#include <stdlib.h> + +#include <qpdf/QPDF.hh> +#include <qpdf/QUtil.hh> +#include <qpdf/QPDFObjectHandle.hh> +#include <qpdf/Pl_StdioFile.hh> + +static char const* whoami = 0; + +void usage() +{ + std::cerr << "Usage: " << whoami << " infile" << std::endl + << "Applies token filters to infile" + << std::endl; + exit(2); +} + +class StringCounter: public QPDFObjectHandle::TokenFilter +{ + public: + StringCounter() : + count(0) + { + } + virtual ~StringCounter() + { + } + virtual void handleToken(QPDFTokenizer::Token const&); + virtual void handleEOF(); + int getCount() const; + + private: + int count; +}; + +void +StringCounter::handleToken(QPDFTokenizer::Token const& token) +{ + // Count string tokens + if (token.getType() == QPDFTokenizer::tt_string) + { + ++this->count; + } + // Preserve input verbatim by passing each token to any specified + // downstream filter. + writeToken(token); +} + +void +StringCounter::handleEOF() +{ + // Write a comment at the end of the stream just to show how we + // can enhance the output if we want. + write("\n% strings found: "); + write(QUtil::int_to_string(this->count)); + // If you override handleEOF, you must always remember to call finish(). + finish(); +} + +int +StringCounter::getCount() const +{ + return this->count; +} + +int main(int argc, char* argv[]) +{ + whoami = QUtil::getWhoami(argv[0]); + + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + + if (argc != 2) + { + usage(); + } + char const* infilename = argv[1]; + + try + { + QPDF pdf; + pdf.processFile(infilename); + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); + int pageno = 0; + for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle page = *iter; + ++pageno; + // Pass the contents of a page through our string counter. + // If it's an even page, capture the output. This + // illustrates that you may capture any output generated + // by the filter, or you may ignore it. + StringCounter counter; + if (pageno % 2) + { + // Ignore output for odd pages. + page.filterPageContents(&counter); + } + else + { + // Write output to stdout for even pages. + Pl_StdioFile out("stdout", stdout); + std::cout << "% Contents of page " << pageno << std::endl; + page.filterPageContents(&counter, &out); + std::cout << "\n% end " << pageno << std::endl; + } + std::cout << "Page " << pageno + << ": strings = " << counter.getCount() << std::endl; + } + } + catch (std::exception& e) + { + std::cerr << whoami << ": " << e.what() << std::endl; + exit(2); + } + + return 0; +} diff --git a/examples/pdf-filter-tokens.cc b/examples/pdf-filter-tokens.cc index 2566f72c..809c160b 100644 --- a/examples/pdf-filter-tokens.cc +++ b/examples/pdf-filter-tokens.cc @@ -1,6 +1,8 @@ // -// This example illustrates the use of QPDFObjectHandle::TokenFilter. -// Please see comments inline for details. +// This example illustrates the use of QPDFObjectHandle::TokenFilter +// with addContentTokenFilter. Please see comments inline for details. +// See also pdf-count-strings.cc for a use of +// QPDFObjectHandle::TokenFilter with filterPageContents. // #include <iostream> diff --git a/examples/qtest/count-strings.test b/examples/qtest/count-strings.test new file mode 100644 index 00000000..ba3f835b --- /dev/null +++ b/examples/qtest/count-strings.test @@ -0,0 +1,17 @@ +#!/usr/bin/env perl +require 5.008; +BEGIN { $^W = 1; } +use strict; + +chdir("count-strings"); + +require TestDriver; + +my $td = new TestDriver('pdf-count-strings'); + +$td->runtest("filter tokens", + {$td->COMMAND => "pdf-count-strings in.pdf"}, + {$td->FILE => "out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->report(1); diff --git a/examples/qtest/count-strings/in.pdf b/examples/qtest/count-strings/in.pdf Binary files differnew file mode 100644 index 00000000..591614c4 --- /dev/null +++ b/examples/qtest/count-strings/in.pdf diff --git a/examples/qtest/count-strings/out b/examples/qtest/count-strings/out new file mode 100644 index 00000000..87b024fc --- /dev/null +++ b/examples/qtest/count-strings/out @@ -0,0 +1,16 @@ +Page 1: strings = 3 +% Contents of page 2 +BT + /F1 24 Tf + 72 720 Td + (Four ) Tj + (Five ) Tj + (Six ) + (beautiful ) Tj + (strings) Tj + (!) Tj +ET + +% strings found: 6 +% end 2 +Page 2: strings = 6 |