summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2014-06-07 19:04:30 +0200
committerJay Berkenbilt <ejb@ql.org>2014-06-07 22:40:38 +0200
commit0b2e9cb16886d422e6645a5d65c74a340ae6deff (patch)
tree6942cf401c7c4a3b2746c5c463a6291976d51217
parent9f8aba1db7f101e424cbc2c742abe92868cc4bff (diff)
downloadqpdf-0b2e9cb16886d422e6645a5d65c74a340ae6deff.tar.zst
Example: fast split into single pages
This is faster than using qpdf --pages to do it.
-rw-r--r--ChangeLog3
-rw-r--r--examples/build.mk3
-rw-r--r--examples/pdf-split-pages.cc77
-rw-r--r--examples/qtest/pdf-split-pages.test33
-rw-r--r--examples/qtest/pdf-split-pages/exp1.pdf40
-rw-r--r--examples/qtest/pdf-split-pages/exp2.pdf40
-rw-r--r--examples/qtest/pdf-split-pages/in.pdfbin0 -> 1287 bytes
7 files changed, 195 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index a83108cb..6e3caa48 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2014-06-07 Jay Berkenbilt <ejb@ql.org>
+ * New example program: pdf-split-pages: efficiently split PDF
+ files into individual pages.
+
* Bug fix: don't fail on files that contain streams where /Filter
or /DecodeParms references a stream. Before, qpdf would try to
convert these to direct objects, which would fail because of the
diff --git a/examples/build.mk b/examples/build.mk
index bcb4440e..bd2119a5 100644
--- a/examples/build.mk
+++ b/examples/build.mk
@@ -5,7 +5,8 @@ BINS_examples = \
pdf-double-page-size \
pdf-invert-images \
pdf-create \
- pdf-parse-content
+ pdf-parse-content \
+ pdf-split-pages
CBINS_examples = pdf-linearize
TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
diff --git a/examples/pdf-split-pages.cc b/examples/pdf-split-pages.cc
new file mode 100644
index 00000000..0a70ed8b
--- /dev/null
+++ b/examples/pdf-split-pages.cc
@@ -0,0 +1,77 @@
+//
+// This is a stand-alone example of splitting a PDF into individual
+// pages. It is much faster than using the qpdf command-line tool to
+// split into separate files per page.
+//
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFWriter.hh>
+#include <qpdf/QUtil.hh>
+#include <string>
+#include <iostream>
+#include <cstdlib>
+
+static bool static_id = false;
+
+static void process(char const* whoami,
+ char const* infile,
+ std::string outprefix)
+{
+ QPDF inpdf;
+ inpdf.processFile(infile);
+ std::vector<QPDFObjectHandle> const& pages = inpdf.getAllPages();
+ int pageno_len = QUtil::int_to_string(pages.size()).length();
+ int pageno = 0;
+ for (std::vector<QPDFObjectHandle>::const_iterator iter = pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ QPDFObjectHandle page = *iter;
+ std::string outfile =
+ outprefix + QUtil::int_to_string(++pageno, pageno_len) + ".pdf";
+ QPDF outpdf;
+ outpdf.emptyPDF();
+ outpdf.addPage(page, false);
+ QPDFWriter outpdfw(outpdf, outfile.c_str());
+ if (static_id)
+ {
+ // For the test suite, uncompress streams and use static
+ // IDs.
+ outpdfw.setStaticID(true);
+ outpdfw.setStreamDataMode(qpdf_s_uncompress);
+ }
+ outpdfw.write();
+ }
+}
+
+int main(int argc, char* argv[])
+{
+ char* whoami = QUtil::getWhoami(argv[0]);
+
+ // For libtool's sake....
+ if (strncmp(whoami, "lt-", 3) == 0)
+ {
+ whoami += 3;
+ }
+ // For test suite
+ if ((argc > 1) && (strcmp(argv[1], " --static-id") == 0))
+ {
+ static_id = true;
+ --argc;
+ ++argv;
+ }
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage: " << whoami << " infile outprefix" << std::endl;
+ }
+ try
+ {
+ process(whoami, argv[1], argv[2]);
+ }
+ catch (std::exception e)
+ {
+ std::cerr << whoami << ": exception: " << e.what() << std::endl;
+ return 2;
+ }
+ return 0;
+}
diff --git a/examples/qtest/pdf-split-pages.test b/examples/qtest/pdf-split-pages.test
new file mode 100644
index 00000000..ce271b35
--- /dev/null
+++ b/examples/qtest/pdf-split-pages.test
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+require 5.008;
+use warnings;
+use strict;
+
+chdir("pdf-split-pages");
+
+require TestDriver;
+
+my $td = new TestDriver('pdf-split-pages');
+
+cleanup();
+
+$td->runtest("split",
+ {$td->COMMAND => "pdf-split-pages ' --static-id' in.pdf out"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+
+$td->runtest("check page 1",
+ {$td->FILE => "out1.pdf"},
+ {$td->FILE => "exp1.pdf"});
+
+$td->runtest("check page 2",
+ {$td->FILE => "out2.pdf"},
+ {$td->FILE => "exp2.pdf"});
+
+cleanup();
+
+$td->report(3);
+
+sub cleanup
+{
+ unlink (<out?.pdf>);
+}
diff --git a/examples/qtest/pdf-split-pages/exp1.pdf b/examples/qtest/pdf-split-pages/exp1.pdf
new file mode 100644
index 00000000..fa2225dc
--- /dev/null
+++ b/examples/qtest/pdf-split-pages/exp1.pdf
@@ -0,0 +1,40 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
+endobj
+4 0 obj
+<< /Length 44 >>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Page 1) Tj
+ET
+endstream
+endobj
+5 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+6 0 obj
+[ /PDF /Text ]
+endobj
+xref
+0 7
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+0000000266 00000 n
+0000000359 00000 n
+0000000466 00000 n
+trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+startxref
+496
+%%EOF
diff --git a/examples/qtest/pdf-split-pages/exp2.pdf b/examples/qtest/pdf-split-pages/exp2.pdf
new file mode 100644
index 00000000..cd76e4de
--- /dev/null
+++ b/examples/qtest/pdf-split-pages/exp2.pdf
@@ -0,0 +1,40 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
+endobj
+4 0 obj
+<< /Length 44 >>
+stream
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Page 2) Tj
+ET
+endstream
+endobj
+5 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+6 0 obj
+[ /PDF /Text ]
+endobj
+xref
+0 7
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+0000000266 00000 n
+0000000359 00000 n
+0000000466 00000 n
+trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
+startxref
+496
+%%EOF
diff --git a/examples/qtest/pdf-split-pages/in.pdf b/examples/qtest/pdf-split-pages/in.pdf
new file mode 100644
index 00000000..4ac18ed6
--- /dev/null
+++ b/examples/qtest/pdf-split-pages/in.pdf
Binary files differ