From a0768e419064b66ea6eb3e06a4398806b24311e8 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 21 Jun 2012 23:06:48 -0400 Subject: Add QPDF::emptyPDF() and pdf_from_scratch test code --- ChangeLog | 4 ++ TODO | 31 ++++++++- include/qpdf/QPDF.hh | 10 +++ libqpdf/QPDF.cc | 24 +++++++ libqpdf/QPDF_optimization.cc | 2 +- libqpdf/QPDF_pages.cc | 2 +- qpdf/build.mk | 2 +- qpdf/pdf_from_scratch.cc | 137 +++++++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf.test | 11 +++ qpdf/qtest/qpdf/from-scratch-0.pdf | 36 ++++++++++ 10 files changed, 254 insertions(+), 5 deletions(-) create mode 100644 qpdf/pdf_from_scratch.cc create mode 100644 qpdf/qtest/qpdf/from-scratch-0.pdf diff --git a/ChangeLog b/ChangeLog index 12609a21..5be2c8ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2012-06-21 Jay Berkenbilt + * Add QPDF::emptyPDF() to create an empty QPDF object suitable for + adding pages and other objects to. pdf_from_scratch.cc is test + code that exercises it. + * make/libtool.mk: Place user-specified CPPFLAGS and LDFLAGS later in the compilation so that if a user installs things in a non-standard place that they have to tell the build about, earlier diff --git a/TODO b/TODO index 6a20fa15..7f997b52 100644 --- a/TODO +++ b/TODO @@ -13,6 +13,35 @@ Next - update README-windows.txt docs to indicate that MSVC 2010 is the supported version and to update the information about mingw. + * Testing for files > 4GB + + - Create a PDF from scratch. Each page has a page number as text + and an image. The image can be 5000x5000 pixels using 8-bit + gray scale. It will be divided into 10 stripes of 500 pixels + each. The left and right 500 pixels of each stripe will + alternate black and white. The remaining part of the image will + have white stripes indicating 1 and black stripes indicating 0 + with the most-significant bit on top to indicate the page + number. In this way, every page will be unique and will consume + approximately 25 megabytes. Creating 200 pages like this will + make a file that is 5 GB. + + - The file will have to have object streams since a regular xref + table won't be able to support offsets that large. + + - A separate test program can create this file and do various + manipulations on it. This can be enabled with an environment + variable controlled by configure in much the same way image + comparison tests are enabled now. The argument to + --enable-large-file-test should be a path that has enough disk + space to do the tests, probably enough space for two coipes of + the file. The test program should also have an interactive mode + so we can generate the large file and then look at it with a + PDF viewer like Adobe Reader. + + * Consider adding an example that uses the page APIs, or update the + documentation to refer the user to the test suite. + Soon ==== @@ -24,8 +53,6 @@ Soon * See if I can support the new encryption formats mentioned in the open bug on sourceforge. Check other sourceforge bugs. - * Would be nice to confirm that it's working for > 4GB files. - * Splitting/merging concepts newPDF() could create a PDF with just a trailer, no pages, and a diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 6a910e58..ef958f8e 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -69,6 +69,16 @@ class QPDF char const* buf, size_t length, char const* password = 0); + // Create a QPDF object for an empty PDF. This PDF has no pages + // or objects other than a minimal trailer, a document catalog, + // and a /Pages tree containing zero pages. Pages and other + // objects can be added to the file in the normal way, and the + // trailer and document catalog can be mutated. Calling this + // method is equivalent to calling processFile on an equivalent + // PDF file. + QPDF_DLL + void emptyPDF(); + // Parameter settings // By default, warning messages are issued to std::cerr and output diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e97da295..b0a8741c 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -17,6 +17,24 @@ std::string QPDF::qpdf_version = "2.3.1"; +static char const* EMPTY_PDF = + "%PDF-1.3\n" + "1 0 obj\n" + "<< /Type /Catalog /Pages 2 0 R >>\n" + "endobj\n" + "2 0 obj\n" + "<< /Type /Pages /Kids [] /Count 0 >>\n" + "endobj\n" + "xref\n" + "0 3\n" + "0000000000 65535 f \n" + "0000000009 00000 n \n" + "0000000058 00000 n \n" + "trailer << /Size 3 /Root 1 0 R >>\n" + "startxref\n" + "110\n" + "%%EOF\n"; + void QPDF::InputSource::setLastOffset(qpdf_offset_t offset) { @@ -349,6 +367,12 @@ QPDF::processMemoryFile(char const* description, parse(password); } +void +QPDF::emptyPDF() +{ + processMemoryFile("empty file", EMPTY_PDF, strlen(EMPTY_PDF)); +} + void QPDF::setIgnoreXRefStreams(bool val) { diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 10b56b60..f2d33863 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -365,7 +365,7 @@ QPDF::optimizePagesTreeInternal( throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), this->last_object_description, this->file->getLastOffset(), - "invalid Type in page tree"); + "invalid Type " + type + " in page tree"); } } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 2b8a831e..89f5f195 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -73,7 +73,7 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), this->last_object_description, this->file->getLastOffset(), - ": invalid Type in page tree"); + "invalid Type " + type + " in page tree"); } } diff --git a/qpdf/build.mk b/qpdf/build.mk index 7bfed548..dfe169ab 100644 --- a/qpdf/build.mk +++ b/qpdf/build.mk @@ -1,4 +1,4 @@ -BINS_qpdf = qpdf test_driver +BINS_qpdf = qpdf test_driver pdf_from_scratch CBINS_qpdf = qpdf-ctest TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/qpdf/pdf_from_scratch.cc b/qpdf/pdf_from_scratch.cc new file mode 100644 index 00000000..22d1999e --- /dev/null +++ b/qpdf/pdf_from_scratch.cc @@ -0,0 +1,137 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char const* whoami = 0; + +void usage() +{ + std::cerr << "Usage: " << whoami << " n" << std::endl; + exit(2); +} + +static QPDFObjectHandle createPageContents(QPDF& pdf, std::string const& text) +{ + std::string contents = "BT /F1 15 Tf 72 720 Td (" + text + ") Tj ET\n"; + PointerHolder b = new Buffer(contents.length()); + unsigned char* bp = b->getBuffer(); + memcpy(bp, (char*)contents.c_str(), contents.length()); + return QPDFObjectHandle::newStream(&pdf, b); +} + +QPDFObjectHandle newName(std::string const& name) +{ + return QPDFObjectHandle::newName(name); +} + +void runtest(int n) +{ + QPDF pdf; + pdf.emptyPDF(); + if (n == 0) + { + // Create a minimal PDF from scratch. + + std::map keys; + std::vector items; + + keys.clear(); + keys["/Type"] = newName("/Font"); + keys["/Subtype"] = newName("/Type1"); + keys["/Name"] = newName("/F1"); + keys["/BaseFont"] = newName("/Helvetica"); + keys["/Encoding"] = newName("/WinAnsiEncoding"); + QPDFObjectHandle font = pdf.makeIndirectObject( + QPDFObjectHandle::newDictionary(keys)); + + items.clear(); + items.push_back(newName("/PDF")); + items.push_back(newName("/Text")); + QPDFObjectHandle procset = pdf.makeIndirectObject( + QPDFObjectHandle::newArray(items)); + + QPDFObjectHandle contents = createPageContents(pdf, "First Page"); + + items.clear(); + items.push_back(QPDFObjectHandle::newInteger(0)); + items.push_back(QPDFObjectHandle::newInteger(0)); + items.push_back(QPDFObjectHandle::newInteger(612)); + items.push_back(QPDFObjectHandle::newInteger(792)); + QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(items); + + keys.clear(); + keys["/F1"] = font; + QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(keys); + + keys.clear(); + keys["/ProcSet"] = procset; + keys["/Font"] = rfont; + QPDFObjectHandle resources = QPDFObjectHandle::newDictionary(keys); + + keys.clear(); + keys["/Type"] = newName("/Page"); + keys["/MediaBox"] = mediabox; + keys["/Contents"] = contents; + keys["/Resources"] = resources; + QPDFObjectHandle page = pdf.makeIndirectObject( + QPDFObjectHandle::newDictionary(keys)); + + pdf.addPage(page, true); + + QPDFWriter w(pdf, "a.pdf"); + w.setStaticID(true); + w.setStreamDataMode(qpdf_s_preserve); + w.write(); + } + else + { + throw std::runtime_error(std::string("invalid test ") + + QUtil::int_to_string(n)); + } + + std::cout << "test " << n << " done" << std::endl; +} + +int main(int argc, char* argv[]) +{ + QUtil::setLineBuf(stdout); + if ((whoami = strrchr(argv[0], '/')) == NULL) + { + whoami = argv[0]; + } + else + { + ++whoami; + } + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + + if (argc != 2) + { + usage(); + } + + try + { + int n = atoi(argv[1]); + runtest(n); + } + catch (std::exception& e) + { + std::cerr << e.what() << std::endl; + exit(2); + } + + return 0; +} diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a1a76ce3..4ec68c53 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -352,6 +352,17 @@ $td->runtest("shallow copy a stream", show_ntests(); # ---------- +$td->notify("--- PDF From Scratch ---"); +$n_tests += 2; + +$td->runtest("basic qpdf from scratch", + {$td->COMMAND => "pdf_from_scratch 0"}, + {$td->STRING => "test 0 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "from-scratch-0.pdf"}); +# ---------- $td->notify("--- Error Condition Tests ---"); # $n_tests incremented after initialization of badfiles below. diff --git a/qpdf/qtest/qpdf/from-scratch-0.pdf b/qpdf/qtest/qpdf/from-scratch-0.pdf new file mode 100644 index 00000000..5750ec0e --- /dev/null +++ b/qpdf/qtest/qpdf/from-scratch-0.pdf @@ -0,0 +1,36 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >> +endobj +4 0 obj +<< /Length 42 >> +stream +BT /F1 15 Tf 72 720 Td (First Page) Tj ET +endstream +endobj +5 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +6 0 obj +[ /PDF /Text ] +endobj +xref +0 7 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000266 00000 n +0000000357 00000 n +0000000464 00000 n +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +494 +%%EOF -- cgit v1.2.3-70-g09d2