From 0b2e9cb16886d422e6645a5d65c74a340ae6deff Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 7 Jun 2014 13:04:30 -0400 Subject: Example: fast split into single pages This is faster than using qpdf --pages to do it. --- ChangeLog | 3 ++ examples/build.mk | 3 +- examples/pdf-split-pages.cc | 77 ++++++++++++++++++++++++++++++++ examples/qtest/pdf-split-pages.test | 33 ++++++++++++++ examples/qtest/pdf-split-pages/exp1.pdf | 40 +++++++++++++++++ examples/qtest/pdf-split-pages/exp2.pdf | 40 +++++++++++++++++ examples/qtest/pdf-split-pages/in.pdf | Bin 0 -> 1287 bytes 7 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 examples/pdf-split-pages.cc create mode 100644 examples/qtest/pdf-split-pages.test create mode 100644 examples/qtest/pdf-split-pages/exp1.pdf create mode 100644 examples/qtest/pdf-split-pages/exp2.pdf create mode 100644 examples/qtest/pdf-split-pages/in.pdf diff --git a/ChangeLog b/ChangeLog index a83108cb..6e3caa48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2014-06-07 Jay Berkenbilt + * New example program: pdf-split-pages: efficiently split PDF + files into individual pages. + * Bug fix: don't fail on files that contain streams where /Filter or /DecodeParms references a stream. Before, qpdf would try to convert these to direct objects, which would fail because of the diff --git a/examples/build.mk b/examples/build.mk index bcb4440e..bd2119a5 100644 --- a/examples/build.mk +++ b/examples/build.mk @@ -5,7 +5,8 @@ BINS_examples = \ pdf-double-page-size \ pdf-invert-images \ pdf-create \ - pdf-parse-content + pdf-parse-content \ + pdf-split-pages CBINS_examples = pdf-linearize TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/examples/pdf-split-pages.cc b/examples/pdf-split-pages.cc new file mode 100644 index 00000000..0a70ed8b --- /dev/null +++ b/examples/pdf-split-pages.cc @@ -0,0 +1,77 @@ +// +// This is a stand-alone example of splitting a PDF into individual +// pages. It is much faster than using the qpdf command-line tool to +// split into separate files per page. +// + +#include +#include +#include +#include +#include +#include + +static bool static_id = false; + +static void process(char const* whoami, + char const* infile, + std::string outprefix) +{ + QPDF inpdf; + inpdf.processFile(infile); + std::vector const& pages = inpdf.getAllPages(); + int pageno_len = QUtil::int_to_string(pages.size()).length(); + int pageno = 0; + for (std::vector::const_iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle page = *iter; + std::string outfile = + outprefix + QUtil::int_to_string(++pageno, pageno_len) + ".pdf"; + QPDF outpdf; + outpdf.emptyPDF(); + outpdf.addPage(page, false); + QPDFWriter outpdfw(outpdf, outfile.c_str()); + if (static_id) + { + // For the test suite, uncompress streams and use static + // IDs. + outpdfw.setStaticID(true); + outpdfw.setStreamDataMode(qpdf_s_uncompress); + } + outpdfw.write(); + } +} + +int main(int argc, char* argv[]) +{ + char* whoami = QUtil::getWhoami(argv[0]); + + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + // For test suite + if ((argc > 1) && (strcmp(argv[1], " --static-id") == 0)) + { + static_id = true; + --argc; + ++argv; + } + + if (argc != 3) + { + std::cerr << "Usage: " << whoami << " infile outprefix" << std::endl; + } + try + { + process(whoami, argv[1], argv[2]); + } + catch (std::exception e) + { + std::cerr << whoami << ": exception: " << e.what() << std::endl; + return 2; + } + return 0; +} diff --git a/examples/qtest/pdf-split-pages.test b/examples/qtest/pdf-split-pages.test new file mode 100644 index 00000000..ce271b35 --- /dev/null +++ b/examples/qtest/pdf-split-pages.test @@ -0,0 +1,33 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +chdir("pdf-split-pages"); + +require TestDriver; + +my $td = new TestDriver('pdf-split-pages'); + +cleanup(); + +$td->runtest("split", + {$td->COMMAND => "pdf-split-pages ' --static-id' in.pdf out"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + +$td->runtest("check page 1", + {$td->FILE => "out1.pdf"}, + {$td->FILE => "exp1.pdf"}); + +$td->runtest("check page 2", + {$td->FILE => "out2.pdf"}, + {$td->FILE => "exp2.pdf"}); + +cleanup(); + +$td->report(3); + +sub cleanup +{ + unlink (); +} diff --git a/examples/qtest/pdf-split-pages/exp1.pdf b/examples/qtest/pdf-split-pages/exp1.pdf new file mode 100644 index 00000000..fa2225dc --- /dev/null +++ b/examples/qtest/pdf-split-pages/exp1.pdf @@ -0,0 +1,40 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >> +endobj +4 0 obj +<< /Length 44 >> +stream +BT + /F1 24 Tf + 72 720 Td + (Page 1) Tj +ET +endstream +endobj +5 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +6 0 obj +[ /PDF /Text ] +endobj +xref +0 7 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000266 00000 n +0000000359 00000 n +0000000466 00000 n +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +496 +%%EOF diff --git a/examples/qtest/pdf-split-pages/exp2.pdf b/examples/qtest/pdf-split-pages/exp2.pdf new file mode 100644 index 00000000..cd76e4de --- /dev/null +++ b/examples/qtest/pdf-split-pages/exp2.pdf @@ -0,0 +1,40 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >> +endobj +4 0 obj +<< /Length 44 >> +stream +BT + /F1 24 Tf + 72 720 Td + (Page 2) Tj +ET +endstream +endobj +5 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +6 0 obj +[ /PDF /Text ] +endobj +xref +0 7 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000266 00000 n +0000000359 00000 n +0000000466 00000 n +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +496 +%%EOF diff --git a/examples/qtest/pdf-split-pages/in.pdf b/examples/qtest/pdf-split-pages/in.pdf new file mode 100644 index 00000000..4ac18ed6 Binary files /dev/null and b/examples/qtest/pdf-split-pages/in.pdf differ -- cgit v1.2.3-54-g00ecf