summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2012-06-24 21:26:28 +0200
committerJay Berkenbilt <ejb@ql.org>2012-06-24 21:56:50 +0200
commit8318d81ada86d4ec8e343c47103932b6bbe45a42 (patch)
treea1ff22dc1584d84829d32c7b1d8698a332877763
parent781c313058e26b6ab6fda060a652a395d27cdb7a (diff)
downloadqpdf-8318d81ada86d4ec8e343c47103932b6bbe45a42.tar.zst
Fix and test support for files >= 4 GB
-rw-r--r--Makefile1
-rw-r--r--TODO34
-rw-r--r--autoconf.mk.in2
-rw-r--r--configure.ac8
-rw-r--r--include/qpdf/QPDF.hh47
-rw-r--r--include/qpdf/QPDFWriter.hh48
-rw-r--r--include/qpdf/QPDFXRefEntry.hh4
-rw-r--r--include/qpdf/qpdf-c.h4
-rw-r--r--libqpdf/BitStream.cc2
-rw-r--r--libqpdf/BitWriter.cc2
-rw-r--r--libqpdf/QPDF.cc20
-rw-r--r--libqpdf/QPDFWriter.cc35
-rw-r--r--libqpdf/QPDF_linearization.cc24
-rw-r--r--libqpdf/bits.icc4
-rw-r--r--libqpdf/qpdf-c.cc6
-rw-r--r--libqpdf/qpdf/BitStream.hh2
-rw-r--r--libqpdf/qpdf/BitWriter.hh2
-rw-r--r--qpdf/build.mk2
-rw-r--r--qpdf/qpdf-ctest.c8
-rw-r--r--qpdf/qtest/qpdf.test107
-rw-r--r--qpdf/qtest/qpdf/large_file-check-linearized.out5
-rw-r--r--qpdf/qtest/qpdf/large_file-check-normal.out5
-rw-r--r--qpdf/qtest/qpdf/large_file-check-ostream-linearized.out5
-rw-r--r--qpdf/qtest/qpdf/large_file-check-ostream.out5
-rw-r--r--qpdf/qtest/qpdf/large_file.out200
-rw-r--r--qpdf/qtest/qpdf/large_file_xref_reconstruct.out203
-rw-r--r--qpdf/test_large_file.cc368
27 files changed, 1030 insertions, 123 deletions
diff --git a/Makefile b/Makefile
index 9ea1817b..571f3e2e 100644
--- a/Makefile
+++ b/Makefile
@@ -82,6 +82,7 @@ CLEAN_TARGETS = $(foreach B,$(BUILD_ITEMS),clean_$(B))
# For test suitse
export QPDF_BIN = $(abspath qpdf/$(OUTPUT_DIR)/qpdf)
export SKIP_TEST_COMPARE_IMAGES
+export LARGE_FILE_TEST_PATH
clean:: $(CLEAN_TARGETS)
diff --git a/TODO b/TODO
index 6bd34aad..89b10d2d 100644
--- a/TODO
+++ b/TODO
@@ -15,32 +15,14 @@ Next
* Testing for files > 4GB
- - Create a PDF from scratch. Each page has a page number as text
- and an image. The image can be 5000x5000 pixels using 8-bit
- gray scale. It will be divided into 10 stripes of 500 pixels
- each. The left and right 500 pixels of each stripe will
- alternate black and white. The remaining part of the image will
- have white stripes indicating 1 and black stripes indicating 0
- with the most-significant bit on top to indicate the page
- number. In this way, every page will be unique and will consume
- approximately 25 megabytes. Creating 200 pages like this will
- make a file that is 5 GB.
-
- - The file will have to have object streams since a regular xref
- table won't be able to support offsets that large.
-
- - A separate test program can create this file and do various
- manipulations on it. This can be enabled with an environment
- variable controlled by configure in much the same way image
- comparison tests are enabled now. The argument to
- --enable-large-file-test should be a path that has enough disk
- space to do the tests, probably enough space for two coipes of
- the file. The test program should also have an interactive mode
- so we can generate the large file and then look at it with a
- PDF viewer like Adobe Reader. The test suite should actually
- read the file back in and look at all the page and stream
- contents to make sure the file is really correct. We need to
- test normal writing and linearization.
+ The large file test can be enabled with an environment variable
+ controlled by configure in much the same way image comparison tests
+ are enabled now. The argument to --width-large-file-test should be
+ a path that has enough disk space to do the tests, probably enough
+ space for two copies of the file.
+
+ The tests will take a very long time (possibly hours) to run, so we
+ will run them infrequently.
Soon
diff --git a/autoconf.mk.in b/autoconf.mk.in
index baf11658..860fc4ab 100644
--- a/autoconf.mk.in
+++ b/autoconf.mk.in
@@ -36,3 +36,5 @@ SKIP_TEST_COMPARE_IMAGES=@SKIP_TEST_COMPARE_IMAGES@
BUILDRULES=@BUILDRULES@
HAVE_LD_VERSION_SCRIPT=@HAVE_LD_VERSION_SCRIPT@
WINDOWS_WORDSIZE=@WINDOWS_WORDSIZE@
+SHOW_FAILED_TEST_OUTPUT=@SHOW_FAILED_TEST_OUTPUT@
+LARGE_FILE_TEST_PATH=@LARGE_FILE_TEST_PATH@
diff --git a/configure.ac b/configure.ac
index 21481311..3fe85fe7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,6 +54,14 @@ if test "$BUILD_INTERNAL_LIBS" = "0"; then
AC_SEARCH_LIBS(pcre_compile,pcre,,[MISSING_PCRE=1; MISSING_ANY=1])
fi
+LARGE_FILE_TEST_PATH=
+AC_SUBST(LARGE_FILE_TEST_PATH)
+AC_ARG_WITH(large-file-test-path,
+ AS_HELP_STRING([--with-large-file-test-path=path],
+ [To enable testing of files > 4GB, give the path to a directory with at least 11 GB free. The test suite will write temporary files to this directory. Alternatively, just set the LARGE_FILE_TEST_PATH environment variable to the path before running the test suite.]),
+ [LARGE_FILE_TEST_PATH=$withval],
+ [LARGE_FILE_TEST_PATH=])
+
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
AC_TYPE_UINT16_T
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index 518069d7..7849cb76 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -337,7 +337,7 @@ class QPDF
QPDF_DLL
void generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber,
PointerHolder<Buffer>& hint_stream,
int& S, int& O);
@@ -531,8 +531,9 @@ class QPDF
void reconstruct_xref(QPDFExc& e);
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
- int processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
- void insertXrefEntry(int obj, int f0, int f1, int f2,
+ qpdf_offset_t processXRefStream(
+ qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
+ void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2,
bool overwrite = false);
void setLastObjectDescription(std::string const& description,
int objid, int generation);
@@ -609,13 +610,13 @@ class QPDF
}
int delta_nobjects; // 1
- int delta_page_length; // 2
+ qpdf_offset_t delta_page_length; // 2
int nshared_objects; // 3
// vectors' sizes = nshared_objects
std::vector<int> shared_identifiers; // 4
std::vector<int> shared_numerators; // 5
- int delta_content_offset; // 6
- int delta_content_length; // 7
+ qpdf_offset_t delta_content_offset; // 6
+ qpdf_offset_t delta_content_length; // 7
};
// PDF 1.4: Table F.3
@@ -639,7 +640,7 @@ class QPDF
}
int min_nobjects; // 1
- int first_page_offset; // 2
+ qpdf_offset_t first_page_offset; // 2
int nbits_delta_nobjects; // 3
int min_page_length; // 4
int nbits_delta_page_length; // 5
@@ -686,7 +687,7 @@ class QPDF
}
int first_shared_obj; // 1
- int first_shared_offset; // 2
+ qpdf_offset_t first_shared_offset; // 2
int nshared_first_page; // 3
int nshared_total; // 4
int nbits_nobjects; // 5
@@ -708,7 +709,7 @@ class QPDF
}
int first_object; // 1
- int first_object_offset; // 2
+ qpdf_offset_t first_object_offset; // 2
int nobjects; // 3
int group_length; // 4
};
@@ -730,14 +731,14 @@ class QPDF
{
}
- int file_size; // /L
- int first_page_object; // /O
- int first_page_end; // /E
- int npages; // /N
- int xref_zero_offset; // /T
- int first_page; // /P
- int H_offset; // offset of primary hint stream
- int H_length; // length of primary hint stream
+ qpdf_offset_t file_size; // /L
+ int first_page_object; // /O
+ qpdf_offset_t first_page_end; // /E
+ int npages; // /N
+ qpdf_offset_t xref_zero_offset; // /T
+ int first_page; // /P
+ qpdf_offset_t H_offset; // offset of primary hint stream
+ qpdf_offset_t H_length; // length of primary hint stream
};
// Computed hint table value data structures. These tables
@@ -851,7 +852,7 @@ class QPDF
void readHSharedObject(BitStream);
void readHGeneric(BitStream, HGeneric&);
int maxEnd(ObjUser const& ou);
- int getLinearizationOffset(ObjGen const&);
+ qpdf_offset_t getLinearizationOffset(ObjGen const&);
QPDFObjectHandle getUncompressedObject(
QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
int lengthNextN(int first_object, int n,
@@ -878,19 +879,19 @@ class QPDF
std::map<int, int> const& object_stream_data);
int outputLengthNextN(
int in_object, int n,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHPageOffset(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHSharedObject(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHOutline(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void writeHPageOffset(BitWriter&);
void writeHSharedObject(BitWriter&);
@@ -942,7 +943,7 @@ class QPDF
std::vector<QPDFExc> warnings;
// Linearization data
- int first_xref_item_offset; // actual value from file
+ qpdf_offset_t first_xref_item_offset; // actual value from file
bool uncompressed_after_compressed;
// Linearization parameter dictionary and hint table data: may be
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
index bcb9335b..25acbb14 100644
--- a/include/qpdf/QPDFWriter.hh
+++ b/include/qpdf/QPDFWriter.hh
@@ -212,8 +212,8 @@ class QPDFWriter
enum trailer_e { t_normal, t_lin_first, t_lin_second };
void init();
- int bytesNeeded(unsigned long n);
- void writeBinary(unsigned long val, unsigned int bytes);
+ int bytesNeeded(unsigned long long n);
+ void writeBinary(unsigned long long val, unsigned int bytes);
void writeString(std::string const& str);
void writeBuffer(PointerHolder<Buffer>&);
void writeStringQDF(std::string const& str);
@@ -226,7 +226,7 @@ class QPDFWriter
void writeObjectStream(QPDFObjectHandle object);
void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
void writeTrailer(trailer_e which, int size,
- bool xref_stream, int prev = 0);
+ bool xref_stream, qpdf_offset_t prev = 0);
void unparseObject(QPDFObjectHandle object, int level,
unsigned int flags);
void unparseObject(QPDFObjectHandle object, int level,
@@ -263,24 +263,28 @@ class QPDFWriter
void writeEncryptionDictionary();
void writeHeader();
void writeHintStream(int hint_id);
- int writeXRefTable(trailer_e which, int first, int last, int size);
- int writeXRefTable(trailer_e which, int first, int last, int size,
- // for linearization
- int prev,
- bool suppress_offsets,
- int hint_id,
- qpdf_offset_t hint_offset,
- qpdf_offset_t hint_length);
- int writeXRefStream(int objid, int max_id, int max_offset,
- trailer_e which, int first, int last, int size);
- int writeXRefStream(int objid, int max_id, int max_offset,
- trailer_e which, int first, int last, int size,
- // for linearization
- int prev,
- int hint_id,
- qpdf_offset_t hint_offset,
- qpdf_offset_t hint_length,
- bool skip_compression);
+ qpdf_offset_t writeXRefTable(
+ trailer_e which, int first, int last, int size);
+ qpdf_offset_t writeXRefTable(
+ trailer_e which, int first, int last, int size,
+ // for linearization
+ qpdf_offset_t prev,
+ bool suppress_offsets,
+ int hint_id,
+ qpdf_offset_t hint_offset,
+ qpdf_offset_t hint_length);
+ qpdf_offset_t writeXRefStream(
+ int objid, int max_id, qpdf_offset_t max_offset,
+ trailer_e which, int first, int last, int size);
+ qpdf_offset_t writeXRefStream(
+ int objid, int max_id, qpdf_offset_t max_offset,
+ trailer_e which, int first, int last, int size,
+ // for linearization
+ qpdf_offset_t prev,
+ int hint_id,
+ qpdf_offset_t hint_offset,
+ qpdf_offset_t hint_length,
+ bool skip_compression);
int calculateXrefStreamPadding(int xref_bytes);
// When filtering subsections, push additional pipelines to the
@@ -336,7 +340,7 @@ class QPDFWriter
std::list<QPDFObjectHandle> object_queue;
std::map<int, int> obj_renumber;
std::map<int, QPDFXRefEntry> xref;
- std::map<int, size_t> lengths;
+ std::map<int, qpdf_offset_t> lengths;
int next_objid;
int cur_stream_length_id;
size_t cur_stream_length;
diff --git a/include/qpdf/QPDFXRefEntry.hh b/include/qpdf/QPDFXRefEntry.hh
index f8d3f930..338c3ed3 100644
--- a/include/qpdf/QPDFXRefEntry.hh
+++ b/include/qpdf/QPDFXRefEntry.hh
@@ -28,9 +28,9 @@ class QPDFXRefEntry
QPDF_DLL
int getType() const;
QPDF_DLL
- qpdf_offset_t getOffset() const; // only for type 1
+ qpdf_offset_t getOffset() const; // only for type 1
QPDF_DLL
- int getObjStreamNumber() const; // only for type 2
+ int getObjStreamNumber() const; // only for type 2
QPDF_DLL
int getObjStreamIndex() const; // only for type 2
diff --git a/include/qpdf/qpdf-c.h b/include/qpdf/qpdf-c.h
index 1a65e4af..ee199026 100644
--- a/include/qpdf/qpdf-c.h
+++ b/include/qpdf/qpdf-c.h
@@ -154,7 +154,7 @@ extern "C" {
QPDF_DLL
char const* qpdf_get_error_filename(qpdf_data q, qpdf_error e);
QPDF_DLL
- unsigned long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
+ unsigned long long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
QPDF_DLL
char const* qpdf_get_error_message_detail(qpdf_data q, qpdf_error e);
@@ -195,7 +195,7 @@ extern "C" {
QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
char const* description,
char const* buffer,
- unsigned long size,
+ unsigned long long size,
char const* password);
/* Read functions below must be called after qpdf_read or
diff --git a/libqpdf/BitStream.cc b/libqpdf/BitStream.cc
index 703ce8f2..eb511f72 100644
--- a/libqpdf/BitStream.cc
+++ b/libqpdf/BitStream.cc
@@ -19,7 +19,7 @@ BitStream::reset()
bits_available = 8 * nbytes;
}
-unsigned long
+unsigned long long
BitStream::getBits(int nbits)
{
return read_bits(this->p, this->bit_offset,
diff --git a/libqpdf/BitWriter.cc b/libqpdf/BitWriter.cc
index 441501cb..4fb375cb 100644
--- a/libqpdf/BitWriter.cc
+++ b/libqpdf/BitWriter.cc
@@ -12,7 +12,7 @@ BitWriter::BitWriter(Pipeline* pl) :
}
void
-BitWriter::writeBits(unsigned long val, unsigned int bits)
+BitWriter::writeBits(unsigned long long val, unsigned int bits)
{
write_bits(this->ch, this->bit_offset, val, bits, this->pl);
}
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 6b275d28..743ba93b 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -571,7 +571,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
in_obj = true;
int obj = atoi(m.getMatch(1).c_str());
int gen = atoi(m.getMatch(2).c_str());
- int offset = this->file->getLastOffset();
+ qpdf_offset_t offset = this->file->getLastOffset();
insertXrefEntry(obj, 1, offset, gen, true);
}
else if ((! this->trailer.isInitialized()) &&
@@ -634,6 +634,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
}
}
+ if (! this->trailer.isInitialized())
+ {
+ throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
+ "unable to find trailer while reading xref");
+ }
int size = this->trailer.getKey("/Size").getIntValue();
int max_obj = 0;
if (! xref_table.empty())
@@ -704,7 +709,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
QUtil::int_to_string(i) + ")");
}
- int f1 = atoi(m2.getMatch(1).c_str());
+ // For xref_table, these will always be small enough to be ints
+ qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
int f2 = atoi(m2.getMatch(2).c_str());
char type = m2.getMatch(3)[0];
if (type == 'f')
@@ -855,7 +861,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
return xref_offset;
}
-int
+qpdf_offset_t
QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{
QPDFObjectHandle dict = xref_obj.getDict();
@@ -957,7 +963,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{
// Read this entry
unsigned char const* entry = data + (entry_size * i);
- int fields[3];
+ qpdf_offset_t fields[3];
unsigned char const* p = entry;
for (int j = 0; j < 3; ++j)
{
@@ -1002,7 +1008,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
// This is needed by checkLinearization()
this->first_xref_item_offset = xref_offset;
}
- insertXrefEntry(obj, fields[0], fields[1], fields[2]);
+ insertXrefEntry(obj, (int)fields[0], fields[1], (int)fields[2]);
}
if (! this->trailer.isInitialized())
@@ -1031,7 +1037,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
}
void
-QPDF::insertXrefEntry(int obj, int f0, int f1, int f2, bool overwrite)
+QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
{
// Populate the xref table in such a way that the first reference
// to an object that we see, which is the one in the latest xref
@@ -1558,7 +1564,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
QPDFXRefEntry const& entry = (*iter).second;
if (entry.getType() == 1)
{
- int obj_offset = entry.getOffset();
+ qpdf_offset_t obj_offset = entry.getOffset();
if ((obj_offset > stream_offset) &&
((this_obj_offset == 0) ||
(this_obj_offset > obj_offset)))
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 308f4c53..26713225 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -540,7 +540,7 @@ QPDFWriter::setDataKey(int objid)
}
int
-QPDFWriter::bytesNeeded(unsigned long n)
+QPDFWriter::bytesNeeded(unsigned long long n)
{
int bytes = 0;
while (n)
@@ -552,10 +552,10 @@ QPDFWriter::bytesNeeded(unsigned long n)
}
void
-QPDFWriter::writeBinary(unsigned long val, unsigned int bytes)
+QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
{
- assert(bytes <= sizeof(unsigned long));
- unsigned char data[sizeof(unsigned long)];
+ assert(bytes <= sizeof(unsigned long long));
+ unsigned char data[sizeof(unsigned long long)];
for (unsigned int i = 0; i < bytes; ++i)
{
data[bytes - i - 1] = (unsigned char)(val & 0xff);
@@ -849,7 +849,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
}
void
-QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev)
+QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
+ qpdf_offset_t prev)
{
QPDFObjectHandle trailer = pdf.getTrailer();
if (! xref_stream)
@@ -1812,15 +1813,15 @@ QPDFWriter::writeHintStream(int hint_id)
closeObject(hint_id);
}
-int
+qpdf_offset_t
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
{
return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0);
}
-int
+qpdf_offset_t
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
- int prev, bool suppress_offsets,
+ qpdf_offset_t prev, bool suppress_offsets,
int hint_id, qpdf_offset_t hint_offset,
qpdf_offset_t hint_length)
{
@@ -1838,7 +1839,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
}
else
{
- int offset = 0;
+ qpdf_offset_t offset = 0;
if (! suppress_offsets)
{
offset = this->xref[i].getOffset();
@@ -1858,24 +1859,24 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
return space_before_zero;
}
-int
-QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
+qpdf_offset_t
+QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size)
{
return writeXRefStream(objid, max_id, max_offset,
which, first, last, size, 0, 0, 0, 0, false);
}
-int
-QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
+qpdf_offset_t
+QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size,
- int prev, int hint_id,
+ qpdf_offset_t prev, int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length,
bool skip_compression)
{
qpdf_offset_t xref_offset = this->pipeline->getCount();
- int space_before_zero = xref_offset - 1;
+ qpdf_offset_t space_before_zero = xref_offset - 1;
// field 1 contains offsets and object stream identifiers
int f1_size = std::max(bytesNeeded(max_offset),
@@ -1921,7 +1922,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
case 1:
{
- int offset = e.getOffset();
+ qpdf_offset_t offset = e.getOffset();
if ((hint_id != 0) &&
(i != hint_id) &&
(offset >= hint_offset))
@@ -2309,7 +2310,7 @@ QPDFWriter::writeLinearized()
// Save hint offset since it will be set to zero by
// calling openObject.
- int hint_offset = this->xref[hint_id].getOffset();
+ qpdf_offset_t hint_offset = this->xref[hint_id].getOffset();
// Write hint stream to a buffer
pushPipeline(new Pl_Buffer("hint buffer"));
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
index 48bb4d2b..fdd0d702 100644
--- a/libqpdf/QPDF_linearization.cc
+++ b/libqpdf/QPDF_linearization.cc
@@ -18,10 +18,10 @@
#include <math.h>
#include <string.h>
-template <class T>
+template <class T, class int_type>
static void
load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
- int bits_wanted, int T::*field)
+ int bits_wanted, int_type T::*field)
{
// nitems times, read bits_wanted from the given bit stream,
// storing results in the ith vector entry.
@@ -144,7 +144,7 @@ QPDF::isLinearized()
QPDFObjectHandle L = candidate.getKey("/L");
if (L.isInteger())
{
- int Li = L.getIntValue();
+ qpdf_offset_t Li = L.getIntValue();
this->file->seek(0, SEEK_END);
if (Li != this->file->tell())
{
@@ -649,11 +649,11 @@ QPDF::maxEnd(ObjUser const& ou)
return end;
}
-int
+qpdf_offset_t
QPDF::getLinearizationOffset(ObjGen const& og)
{
QPDFXRefEntry entry = this->xref_table[og];
- int result = 0;
+ qpdf_offset_t result = 0;
switch (entry.getType())
{
case 1:
@@ -1787,7 +1787,7 @@ static inline int nbits(int val)
int
QPDF::outputLengthNextN(
int in_object, int n,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
// Figure out the length of a series of n consecutive objects in
@@ -1808,7 +1808,7 @@ QPDF::outputLengthNextN(
void
QPDF::calculateHPageOffset(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
// Page Offset Hint Table
@@ -1900,7 +1900,7 @@ QPDF::calculateHPageOffset(
void
QPDF::calculateHSharedObject(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
CHSharedObject& cso = this->c_shared_object_data;
@@ -1946,7 +1946,7 @@ QPDF::calculateHSharedObject(
void
QPDF::calculateHOutline(
std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
HGeneric& cho = this->c_outline_data;
@@ -1967,10 +1967,10 @@ QPDF::calculateHOutline(
cho.first_object, ho.nobjects, lengths, obj_renumber);
}
-template <class T>
+template <class T, class int_type>
static void
write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec,
- int bits, int T::*field)
+ int bits, int_type T::*field)
{
// nitems times, write bits bits from the given field of the ith
// vector to the given bit writer.
@@ -2095,7 +2095,7 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t)
void
QPDF::generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
- std::map<int, size_t> const& lengths,
+ std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber,
PointerHolder<Buffer>& hint_buffer,
int& S, int& O)
diff --git a/libqpdf/bits.icc b/libqpdf/bits.icc
index 31765986..bcd7dd85 100644
--- a/libqpdf/bits.icc
+++ b/libqpdf/bits.icc
@@ -15,7 +15,7 @@
// this code includes with the symbol defined.
#ifdef BITS_READ
-static unsigned long
+static unsigned long long
read_bits(unsigned char const*& p, unsigned int& bit_offset,
unsigned int& bits_available, unsigned int bits_wanted)
{
@@ -95,7 +95,7 @@ read_bits(unsigned char const*& p, unsigned int& bit_offset,
#ifdef BITS_WRITE
static void
write_bits(unsigned char& ch, unsigned int& bit_offset,
- unsigned long val, unsigned int bits, Pipeline* pipeline)
+ unsigned long long val, unsigned int bits, Pipeline* pipeline)
{
if (bits > 32)
{
diff --git a/libqpdf/qpdf-c.cc b/libqpdf/qpdf-c.cc
index 65a5de72..0312ae50 100644
--- a/libqpdf/qpdf-c.cc
+++ b/libqpdf/qpdf-c.cc
@@ -31,7 +31,7 @@ struct _qpdf_data
// Parameters for functions we call
char const* filename; // or description
char const* buffer;
- unsigned long size;
+ unsigned long long size;
char const* password;
bool write_memory;
Buffer* output_buffer;
@@ -218,7 +218,7 @@ char const* qpdf_get_error_filename(qpdf_data qpdf, qpdf_error e)
return e->exc->getFilename().c_str();
}
-unsigned long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
+unsigned long long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
{
if (e == 0)
{
@@ -268,7 +268,7 @@ QPDF_ERROR_CODE qpdf_read(qpdf_data qpdf, char const* filename,
QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
char const* description,
char const* buffer,
- unsigned long size,
+ unsigned long long size,
char const* password)
{
QPDF_ERROR_CODE status = QPDF_SUCCESS;
diff --git a/libqpdf/qpdf/BitStream.hh b/libqpdf/qpdf/BitStream.hh
index 92bbd735..e45a90ee 100644
--- a/libqpdf/qpdf/BitStream.hh
+++ b/libqpdf/qpdf/BitStream.hh
@@ -13,7 +13,7 @@ class BitStream
QPDF_DLL
void reset();
QPDF_DLL
- unsigned long getBits(int nbits);
+ unsigned long long getBits(int nbits);
QPDF_DLL
void skipToNextByte();
diff --git a/libqpdf/qpdf/BitWriter.hh b/libqpdf/qpdf/BitWriter.hh
index 5eae398f..7e3b07a9 100644
--- a/libqpdf/qpdf/BitWriter.hh
+++ b/libqpdf/qpdf/BitWriter.hh
@@ -15,7 +15,7 @@ class BitWriter
QPDF_DLL
BitWriter(Pipeline* pl);
QPDF_DLL
- void writeBits(unsigned long val, unsigned int bits);
+ void writeBits(unsigned long long val, unsigned int bits);
// Force any partial byte to be written to the pipeline.
QPDF_DLL
void flush();
diff --git a/qpdf/build.mk b/qpdf/build.mk
index dfe169ab..e3d50976 100644
--- a/qpdf/build.mk
+++ b/qpdf/build.mk
@@ -1,4 +1,4 @@
-BINS_qpdf = qpdf test_driver pdf_from_scratch
+BINS_qpdf = qpdf test_driver pdf_from_scratch test_large_file
CBINS_qpdf = qpdf-ctest
TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B)))
diff --git a/qpdf/qpdf-ctest.c b/qpdf/qpdf-ctest.c
index ffb1fff8..0b6c5f1f 100644
--- a/qpdf/qpdf-ctest.c
+++ b/qpdf/qpdf-ctest.c
@@ -17,7 +17,10 @@ static void report_errors()
printf("warning: %s\n", qpdf_get_error_full_text(qpdf, e));
printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
- printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
+ /* If your compiler doesn't support %lld, change to %ld and
+ * lose precision in the error message.
+ */
+ printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
}
if (qpdf_has_error(qpdf))
@@ -27,7 +30,8 @@ static void report_errors()
printf("error: %s\n", qpdf_get_error_full_text(qpdf, e));
printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
- printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
+ /* see above comment about %lld */
+ printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
}
else
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 94252db4..fa26597d 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -21,6 +21,7 @@ if ((exists $ENV{'SKIP_TEST_COMPARE_IMAGES'}) &&
{
$compare_images = 0;
}
+my $large_file_test_path = $ENV{'LARGE_FILE_TEST_PATH'} || undef;
my $have_acroread = 0;
@@ -1447,8 +1448,114 @@ for (my $n = 1; $n <= 2; ++$n)
}
show_ntests();
+# ----------
+$td->notify("--- Large File Tests ---");
+my $nlarge = 1;
+if (defined $large_file_test_path)
+{
+ $nlarge = 2;
+}
+else
+{
+ $td->notify("--- Skipping tests on actual large files ---");
+}
+$n_tests += $nlarge * 13;
+for (my $large = 0; $large < $nlarge; ++$large)
+{
+ if ($large)
+ {
+ $td->notify("--- Running tests on actual large files ---");
+ }
+ else
+ {
+ $td->notify("--- Running large file tests on small files ---");
+ }
+ my $size = ($large ? "large" : "small");
+ my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf";
+ $td->runtest("write test file",
+ {$td->COMMAND => "test_large_file write $size $file"},
+ {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ $td->runtest("read test file",
+ {$td->COMMAND => "test_large_file read $size $file"},
+ {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ $td->runtest("check",
+ {$td->COMMAND => "qpdf --suppress-recovery --check $file",
+ $td->FILTER => "grep -v checking"},
+ {$td->FILE => "large_file-check-normal.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+
+ for my $ostream (0, 1)
+ {
+ for my $linearize (0, 1)
+ {
+ if (($ostream == 0) && ($linearize == 0))
+ {
+ # Original file has no object streams and is not linearized.
+ next;
+ }
+ my $args = "";
+ my $omode = $ostream ? "generate" : "disable";
+ my $lin = $linearize ? "--linearize" : "";
+ my $newfile = "$file-new";
+
+ $td->runtest("transform: ostream=$ostream, linearize=$linearize",
+ {$td->COMMAND =>
+ "qpdf --stream-data=preserve" .
+ " --object-streams=$omode" .
+ " $lin $file $newfile"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ $td->runtest("read test file",
+ {$td->COMMAND =>
+ "test_large_file read $size $newfile"},
+ {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ my $check_out =
+ ($linearize
+ ? ($ostream
+ ? "large_file-check-ostream-linearized.out"
+ : "large_file-check-linearized.out")
+ : ($ostream
+ ? "large_file-check-ostream.out"
+ : "large_file-check-normal.out"));
+ $td->runtest("check: ostream=$ostream, linearize=$linearize",
+ {$td->COMMAND =>
+ "qpdf --suppress-recovery --check $newfile",
+ $td->FILTER => "grep -v checking"},
+ {$td->FILE => $check_out, $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ unlink $newfile;
+ }
+ }
+
+ # Clobber xref
+ open(F, "+<$file") or die;
+ seek(F, -50, 2);
+ my $pos = tell F;
+ my $buf;
+ read(F, $buf, 50);
+ die unless $buf =~ m/^(.*startxref\n)\d+/s;
+ $pos += length($1);
+ seek(F, $pos, 0) or die;
+ print F "oops" or die;
+ close(F);
+ my $cmd = +{$td->COMMAND => "test_large_file read $size $file"};
+ if ($large)
+ {
+ $cmd->{$td->FILTER} = "sed -e s,$large_file_test_path/,,";
+ }
+ $td->runtest("reconstruct xref table",
+ $cmd,
+ {$td->FILE => "large_file_xref_reconstruct.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ unlink $file;
+}
# ----------
+
cleanup();
# See comments at beginning about calculation of number of tests. We
diff --git a/qpdf/qtest/qpdf/large_file-check-linearized.out b/qpdf/qtest/qpdf/large_file-check-linearized.out
new file mode 100644
index 00000000..6d3407bd
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file-check-linearized.out
@@ -0,0 +1,5 @@
+PDF Version: 1.3
+File is not encrypted
+File is linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/large_file-check-normal.out b/qpdf/qtest/qpdf/large_file-check-normal.out
new file mode 100644
index 00000000..c5cc5b5f
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file-check-normal.out
@@ -0,0 +1,5 @@
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out b/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out
new file mode 100644
index 00000000..079bb65d
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out
@@ -0,0 +1,5 @@
+PDF Version: 1.5
+File is not encrypted
+File is linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/large_file-check-ostream.out b/qpdf/qtest/qpdf/large_file-check-ostream.out
new file mode 100644
index 00000000..8ea0e86f
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file-check-ostream.out
@@ -0,0 +1,5 @@
+PDF Version: 1.5
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/large_file.out b/qpdf/qtest/qpdf/large_file.out
new file mode 100644
index 00000000..b1990d25
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file.out
@@ -0,0 +1,200 @@
+page 1 of 200
+page 2 of 200
+page 3 of 200
+page 4 of 200
+page 5 of 200
+page 6 of 200
+page 7 of 200
+page 8 of 200
+page 9 of 200
+page 10 of 200
+page 11 of 200
+page 12 of 200
+page 13 of 200
+page 14 of 200
+page 15 of 200
+page 16 of 200
+page 17 of 200
+page 18 of 200
+page 19 of 200
+page 20 of 200
+page 21 of 200
+page 22 of 200
+page 23 of 200
+page 24 of 200
+page 25 of 200
+page 26 of 200
+page 27 of 200
+page 28 of 200
+page 29 of 200
+page 30 of 200
+page 31 of 200
+page 32 of 200
+page 33 of 200
+page 34 of 200
+page 35 of 200
+page 36 of 200
+page 37 of 200
+page 38 of 200
+page 39 of 200
+page 40 of 200
+page 41 of 200
+page 42 of 200
+page 43 of 200
+page 44 of 200
+page 45 of 200
+page 46 of 200
+page 47 of 200
+page 48 of 200
+page 49 of 200
+page 50 of 200
+page 51 of 200
+page 52 of 200
+page 53 of 200
+page 54 of 200
+page 55 of 200
+page 56 of 200
+page 57 of 200
+page 58 of 200
+page 59 of 200
+page 60 of 200
+page 61 of 200
+page 62 of 200
+page 63 of 200
+page 64 of 200
+page 65 of 200
+page 66 of 200
+page 67 of 200
+page 68 of 200
+page 69 of 200
+page 70 of 200
+page 71 of 200
+page 72 of 200
+page 73 of 200
+page 74 of 200
+page 75 of 200
+page 76 of 200
+page 77 of 200
+page 78 of 200
+page 79 of 200
+page 80 of 200
+page 81 of 200
+page 82 of 200
+page 83 of 200
+page 84 of 200
+page 85 of 200
+page 86 of 200
+page 87 of 200
+page 88 of 200
+page 89 of 200
+page 90 of 200
+page 91 of 200
+page 92 of 200
+page 93 of 200
+page 94 of 200
+page 95 of 200
+page 96 of 200
+page 97 of 200
+page 98 of 200
+page 99 of 200
+page 100 of 200
+page 101 of 200
+page 102 of 200
+page 103 of 200
+page 104 of 200
+page 105 of 200
+page 106 of 200
+page 107 of 200
+page 108 of 200
+page 109 of 200
+page 110 of 200
+page 111 of 200
+page 112 of 200
+page 113 of 200
+page 114 of 200
+page 115 of 200
+page 116 of 200
+page 117 of 200
+page 118 of 200
+page 119 of 200
+page 120 of 200
+page 121 of 200
+page 122 of 200
+page 123 of 200
+page 124 of 200
+page 125 of 200
+page 126 of 200
+page 127 of 200
+page 128 of 200
+page 129 of 200
+page 130 of 200
+page 131 of 200
+page 132 of 200
+page 133 of 200
+page 134 of 200
+page 135 of 200
+page 136 of 200
+page 137 of 200
+page 138 of 200
+page 139 of 200
+page 140 of 200
+page 141 of 200
+page 142 of 200
+page 143 of 200
+page 144 of 200
+page 145 of 200
+page 146 of 200
+page 147 of 200
+page 148 of 200
+page 149 of 200
+page 150 of 200
+page 151 of 200
+page 152 of 200
+page 153 of 200
+page 154 of 200
+page 155 of 200
+page 156 of 200
+page 157 of 200
+page 158 of 200
+page 159 of 200
+page 160 of 200
+page 161 of 200
+page 162 of 200
+page 163 of 200
+page 164 of 200
+page 165 of 200
+page 166 of 200
+page 167 of 200
+page 168 of 200
+page 169 of 200
+page 170 of 200
+page 171 of 200
+page 172 of 200
+page 173 of 200
+page 174 of 200
+page 175 of 200
+page 176 of 200
+page 177 of 200
+page 178 of 200
+page 179 of 200
+page 180 of 200
+page 181 of 200
+page 182 of 200
+page 183 of 200
+page 184 of 200
+page 185 of 200
+page 186 of 200
+page 187 of 200
+page 188 of 200
+page 189 of 200
+page 190 of 200
+page 191 of 200
+page 192 of 200
+page 193 of 200
+page 194 of 200
+page 195 of 200
+page 196 of 200
+page 197 of 200
+page 198 of 200
+page 199 of 200
+page 200 of 200
diff --git a/qpdf/qtest/qpdf/large_file_xref_reconstruct.out b/qpdf/qtest/qpdf/large_file_xref_reconstruct.out
new file mode 100644
index 00000000..da5b25b1
--- /dev/null
+++ b/qpdf/qtest/qpdf/large_file_xref_reconstruct.out
@@ -0,0 +1,203 @@
+WARNING: a.pdf: file is damaged
+WARNING: a.pdf: can't find startxref
+WARNING: a.pdf: Attempting to reconstruct cross-reference table
+page 1 of 200
+page 2 of 200
+page 3 of 200
+page 4 of 200
+page 5 of 200
+page 6 of 200
+page 7 of 200
+page 8 of 200
+page 9 of 200
+page 10 of 200
+page 11 of 200
+page 12 of 200
+page 13 of 200
+page 14 of 200
+page 15 of 200
+page 16 of 200
+page 17 of 200
+page 18 of 200
+page 19 of 200
+page 20 of 200
+page 21 of 200
+page 22 of 200
+page 23 of 200
+page 24 of 200
+page 25 of 200
+page 26 of 200
+page 27 of 200
+page 28 of 200
+page 29 of 200
+page 30 of 200
+page 31 of 200
+page 32 of 200
+page 33 of 200
+page 34 of 200
+page 35 of 200
+page 36 of 200
+page 37 of 200
+page 38 of 200
+page 39 of 200
+page 40 of 200
+page 41 of 200
+page 42 of 200
+page 43 of 200
+page 44 of 200
+page 45 of 200
+page 46 of 200
+page 47 of 200
+page 48 of 200
+page 49 of 200
+page 50 of 200
+page 51 of 200
+page 52 of 200
+page 53 of 200
+page 54 of 200
+page 55 of 200
+page 56 of 200
+page 57 of 200
+page 58 of 200
+page 59 of 200
+page 60 of 200
+page 61 of 200
+page 62 of 200
+page 63 of 200
+page 64 of 200
+page 65 of 200
+page 66 of 200
+page 67 of 200
+page 68 of 200
+page 69 of 200
+page 70 of 200
+page 71 of 200
+page 72 of 200
+page 73 of 200
+page 74 of 200
+page 75 of 200
+page 76 of 200
+page 77 of 200
+page 78 of 200
+page 79 of 200
+page 80 of 200
+page 81 of 200
+page 82 of 200
+page 83 of 200
+page 84 of 200
+page 85 of 200
+page 86 of 200
+page 87 of 200
+page 88 of 200
+page 89 of 200
+page 90 of 200
+page 91 of 200
+page 92 of 200
+page 93 of 200
+page 94 of 200
+page 95 of 200
+page 96 of 200
+page 97 of 200
+page 98 of 200
+page 99 of 200
+page 100 of 200
+page 101 of 200
+page 102 of 200
+page 103 of 200
+page 104 of 200
+page 105 of 200
+page 106 of 200
+page 107 of 200
+page 108 of 200
+page 109 of 200
+page 110 of 200
+page 111 of 200
+page 112 of 200
+page 113 of 200
+page 114 of 200
+page 115 of 200
+page 116 of 200
+page 117 of 200
+page 118 of 200
+page 119 of 200
+page 120 of 200
+page 121 of 200
+page 122 of 200
+page 123 of 200
+page 124 of 200
+page 125 of 200
+page 126 of 200
+page 127 of 200
+page 128 of 200
+page 129 of 200
+page 130 of 200
+page 131 of 200
+page 132 of 200
+page 133 of 200
+page 134 of 200
+page 135 of 200
+page 136 of 200
+page 137 of 200
+page 138 of 200
+page 139 of 200
+page 140 of 200
+page 141 of 200
+page 142 of 200
+page 143 of 200
+page 144 of 200
+page 145 of 200
+page 146 of 200
+page 147 of 200
+page 148 of 200
+page 149 of 200
+page 150 of 200
+page 151 of 200
+page 152 of 200
+page 153 of 200
+page 154 of 200
+page 155 of 200
+page 156 of 200
+page 157 of 200
+page 158 of 200
+page 159 of 200
+page 160 of 200
+page 161 of 200
+page 162 of 200
+page 163 of 200
+page 164 of 200
+page 165 of 200
+page 166 of 200
+page 167 of 200
+page 168 of 200
+page 169 of 200
+page 170 of 200
+page 171 of 200
+page 172 of 200
+page 173 of 200
+page 174 of 200
+page 175 of 200
+page 176 of 200
+page 177 of 200
+page 178 of 200
+page 179 of 200
+page 180 of 200
+page 181 of 200
+page 182 of 200
+page 183 of 200
+page 184 of 200
+page 185 of 200
+page 186 of 200
+page 187 of 200
+page 188 of 200
+page 189 of 200
+page 190 of 200
+page 191 of 200
+page 192 of 200
+page 193 of 200
+page 194 of 200
+page 195 of 200
+page 196 of 200
+page 197 of 200
+page 198 of 200
+page 199 of 200
+page 200 of 200
diff --git a/qpdf/test_large_file.cc b/qpdf/test_large_file.cc
new file mode 100644
index 00000000..c1ee4060
--- /dev/null
+++ b/qpdf/test_large_file.cc
@@ -0,0 +1,368 @@
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFWriter.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/QUtil.hh>
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+// Run "test_large_file write small a.pdf" to get a PDF file that you
+// can look at in a reader.
+
+// This program reads and writes specially crafted files for testing
+// large file support. In write mode, write a file of npages pages
+// where each page contains unique text and a unique image. The image
+// is a binary representation of the page number. The image contains
+// horizontal stripes with light stripes representing 1, dark stripes
+// representing 0, and the high bit on top. In read mode, read the
+// file back checking to make sure all the image data and page
+// contents are as expected.
+
+// Running this is small mode produces a small file that is easy to
+// look at in any viewer. Since there is no question about proper
+// functionality for small files, writing and reading the small file
+// allows the qpdf library to test this test program. Writing and
+// reading the large file then allows us to verify large file support
+// with confidence.
+
+static char const* whoami = 0;
+
+// Height should be a multiple of 10
+static int const nstripes = 10;
+static int const stripesize_large = 500;
+static int const stripesize_small = 5;
+static int const npages = 200;
+
+// initialized in main
+int stripesize = 0;
+int width = 0;
+int height = 0;
+static unsigned char* buf = 0;
+
+static inline unsigned char get_pixel_color(int n, int row)
+{
+ return (n & (1 << (nstripes - 1 - row))) ? '\xc0' : '\x40';
+}
+
+class ImageChecker: public Pipeline
+{
+ public:
+ ImageChecker(int n);
+ virtual ~ImageChecker();
+ virtual void write(unsigned char* data, size_t len);
+ virtual void finish();
+
+ private:
+ int n;
+ size_t offset;
+ bool okay;
+};
+
+ImageChecker::ImageChecker(int n) :
+ Pipeline("image checker", 0),
+ n(n),
+ offset(0),
+ okay(true)
+{
+}
+
+ImageChecker::~ImageChecker()
+{
+}
+
+void
+ImageChecker::write(unsigned char* data, size_t len)
+{
+ for (size_t i = 0; i < len; ++i)
+ {
+ int y = (this->offset + i) / width / stripesize;
+ unsigned char color = get_pixel_color(n, y);
+ if (data[i] != color)
+ {
+ okay = false;
+ }
+ }
+ this->offset += len;
+}
+
+void
+ImageChecker::finish()
+{
+ if (! okay)
+ {
+ std::cout << "errors found checking image data for page " << n
+ << std::endl;
+ }
+}
+
+class ImageProvider: public QPDFObjectHandle::StreamDataProvider
+{
+ public:
+ ImageProvider(int n);
+ virtual ~ImageProvider();
+ virtual void provideStreamData(int objid, int generation,
+ Pipeline* pipeline);
+ size_t getLength() const;
+
+ private:
+ int n;
+};
+
+ImageProvider::ImageProvider(int n) :
+ n(n)
+{
+}
+
+ImageProvider::~ImageProvider()
+{
+}
+
+void
+ImageProvider::provideStreamData(int objid, int generation,
+ Pipeline* pipeline)
+{
+ if (buf == 0)
+ {
+ buf = new unsigned char[width * stripesize];
+ }
+ std::cout << "page " << n << " of " << npages << std::endl;
+ for (int y = 0; y < nstripes; ++y)
+ {
+ unsigned char color = get_pixel_color(n, y);
+ memset(buf, (int) color, width * stripesize);
+ pipeline->write(buf, width * stripesize);
+ }
+ pipeline->finish();
+}
+
+size_t
+ImageProvider::getLength() const
+{
+ return width * height;
+}
+
+void usage()
+{
+ std::cerr << "Usage: " << whoami << " {read|write} {large|small} outfile"
+ << std::endl;
+ exit(2);
+}
+
+static void set_parameters(bool large)
+{
+ stripesize = large ? stripesize_large : stripesize_small;
+ height = nstripes * stripesize;
+ width = height;
+}
+
+std::string generate_page_contents(int pageno)
+{
+ std::string contents =
+ "BT /F1 24 Tf 72 720 Td (page " + QUtil::int_to_string(pageno) +
+ ") Tj ET\n"
+ "q 468 0 0 468 72 72 cm /Im1 Do Q\n";
+ return contents;
+}
+
+static QPDFObjectHandle create_page_contents(QPDF& pdf, int pageno)
+{
+ std::string contents = generate_page_contents(pageno);
+ PointerHolder<Buffer> b = new Buffer(contents.length());
+ unsigned char* bp = b->getBuffer();
+ memcpy(bp, (char*)contents.c_str(), contents.length());
+ return QPDFObjectHandle::newStream(&pdf, b);
+}
+
+QPDFObjectHandle newName(std::string const& name)
+{
+ return QPDFObjectHandle::newName(name);
+}
+
+QPDFObjectHandle newInteger(int val)
+{
+ return QPDFObjectHandle::newInteger(val);
+}
+
+static void create_pdf(char const* filename)
+{
+ QPDF pdf;
+
+ pdf.emptyPDF();
+
+ QPDFObjectHandle font = pdf.makeIndirectObject(
+ QPDFObjectHandle::newDictionary());
+ font.replaceKey("/Type", newName("/Font"));
+ font.replaceKey("/Subtype", newName("/Type1"));
+ font.replaceKey("/Name", newName("/F1"));
+ font.replaceKey("/BaseFont", newName("/Helvetica"));
+ font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
+
+ QPDFObjectHandle procset =
+ pdf.makeIndirectObject(QPDFObjectHandle::newArray());
+ procset.appendItem(newName("/PDF"));
+ procset.appendItem(newName("/Text"));
+ procset.appendItem(newName("/ImageC"));
+
+ QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
+ rfont.replaceKey("/F1", font);
+
+ QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();
+ mediabox.appendItem(newInteger(0));
+ mediabox.appendItem(newInteger(0));
+ mediabox.appendItem(newInteger(612));
+ mediabox.appendItem(newInteger(792));
+
+ for (int pageno = 1; pageno <= npages; ++pageno)
+ {
+ QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
+ QPDFObjectHandle image_dict = image.getDict();
+ image_dict.replaceKey("/Type", newName("/XObject"));
+ image_dict.replaceKey("/Subtype", newName("/Image"));
+ image_dict.replaceKey("/ColorSpace", newName("/DeviceGray"));
+ image_dict.replaceKey("/BitsPerComponent", newInteger(8));
+ image_dict.replaceKey("/Width", newInteger(width));
+ image_dict.replaceKey("/Height", newInteger(height));
+ ImageProvider* p = new ImageProvider(pageno);
+ PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
+ image.replaceStreamData(provider,
+ QPDFObjectHandle::newNull(),
+ QPDFObjectHandle::newNull(),
+ p->getLength());
+
+ QPDFObjectHandle xobject = QPDFObjectHandle::newDictionary();
+ xobject.replaceKey("/Im1", image);
+
+ QPDFObjectHandle resources = QPDFObjectHandle::newDictionary();
+ resources.replaceKey("/ProcSet", procset);
+ resources.replaceKey("/Font", rfont);
+ resources.replaceKey("/XObject", xobject);
+
+ QPDFObjectHandle contents = create_page_contents(pdf, pageno);
+
+ QPDFObjectHandle page = pdf.makeIndirectObject(
+ QPDFObjectHandle::newDictionary());
+ page.replaceKey("/Type", newName("/Page"));
+ page.replaceKey("/MediaBox", mediabox);
+ page.replaceKey("/Contents", contents);
+ page.replaceKey("/Resources", resources);
+
+ pdf.addPage(page, false);
+ }
+
+ QPDFWriter w(pdf, filename);
+ w.setStaticID(true); // for testing only
+ w.setStreamDataMode(qpdf_s_preserve);
+ w.setObjectStreamMode(qpdf_o_disable);
+ w.write();
+}
+
+static void check_page_contents(int pageno, QPDFObjectHandle page)
+{
+ PointerHolder<Buffer> buf =
+ page.getKey("/Contents").getStreamData();
+ std::string actual_contents =
+ std::string((char *)(buf->getBuffer()), buf->getSize());
+ std::string expected_contents = generate_page_contents(pageno);
+ if (expected_contents != actual_contents)
+ {
+ std::cout << "page contents wrong for page " << pageno << std::endl
+ << "ACTUAL: " << actual_contents
+ << "EXPECTED: " << expected_contents
+ << "----\n";
+ }
+}
+
+static void check_image(int pageno, QPDFObjectHandle page)
+{
+ QPDFObjectHandle image =
+ page.getKey("/Resources").getKey("/XObject").getKey("/Im1");
+ ImageChecker ic(pageno);
+ image.pipeStreamData(&ic, true, false, false);
+}
+
+static void check_pdf(char const* filename)
+{
+ QPDF pdf;
+ pdf.processFile(filename);
+ std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
+ assert(pages.size() == (size_t)npages);
+ for (int i = 0; i < npages; ++i)
+ {
+ int pageno = i + 1;
+ std::cout << "page " << pageno << " of " << npages << std::endl;
+ check_page_contents(pageno, pages[i]);
+ check_image(pageno, pages[i]);
+ }
+}
+
+int main(int argc, char* argv[])
+{
+ whoami = QUtil::getWhoami(argv[0]);
+ QUtil::setLineBuf(stdout);
+
+ // For libtool's sake....
+ if (strncmp(whoami, "lt-", 3) == 0)
+ {
+ whoami += 3;
+ }
+ if (argc != 4)
+ {
+ usage();
+ }
+ char const* operation = argv[1];
+ char const* size = argv[2];
+ char const* filename = argv[3];
+
+ bool op_write = false;
+ bool size_large = false;
+
+ if (strcmp(operation, "write") == 0)
+ {
+ op_write = true;
+ }
+ else if (strcmp(operation, "read") == 0)
+ {
+ op_write = false;
+ }
+ else
+ {
+ usage();
+ }
+
+ if (strcmp(size, "large") == 0)
+ {
+ size_large = true;
+ }
+ else if (strcmp(size, "small") == 0)
+ {
+ size_large = false;
+ }
+ else
+ {
+ usage();
+ }
+
+ set_parameters(size_large);
+
+ try
+ {
+ if (op_write)
+ {
+ create_pdf(filename);
+ }
+ else
+ {
+ check_pdf(filename);
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::cerr << e.what() << std::endl;
+ exit(2);
+ }
+
+ delete [] buf;
+
+ return 0;
+}