65 files changed, 12412 insertions, 0 deletions
diff --git a/libqpdf/BitStream.cc b/libqpdf/BitStream.cc
new file mode 100644
index 00000000..c6fda4e6
--- /dev/null
+++ b/libqpdf/BitStream.cc
@@ -0,0 +1,45 @@
+
+
+#include <qpdf/BitStream.hh>
+
+// See comments in bits.cc
+#define BITS_READ 1
+#include "bits.icc"
+
+BitStream::BitStream(unsigned char const* p, int nbytes) :
+    start(p),
+    nbytes(nbytes)
+{
+    reset();
+}
+
+void
+BitStream::reset()
+{
+    p = start;
+    bit_offset = 7;
+    bits_available = 8 * nbytes;
+}
+
+unsigned long
+BitStream::getBits(int nbits)
+{
+    return read_bits(this->p, this->bit_offset,
+		     this->bits_available, nbits);
+}
+
+void
+BitStream::skipToNextByte()
+{
+    if (bit_offset != 7)
+    {
+	unsigned int bits_to_skip = bit_offset + 1;
+	if (bits_available < bits_to_skip)
+	{
+	    throw QEXC::Internal("overflow skipping to next byte in bitstream");
+	}
+	bit_offset = 7;
+	++p;
+	bits_available -= bits_to_skip;
+    }
+}
diff --git a/libqpdf/BitWriter.cc b/libqpdf/BitWriter.cc
new file mode 100644
index 00000000..f682aac5
--- /dev/null
+++ b/libqpdf/BitWriter.cc
@@ -0,0 +1,30 @@
+
+
+#include <qpdf/BitWriter.hh>
+
+// See comments in bits.cc
+#define BITS_WRITE 1
+#include "bits.icc"
+
+BitWriter::BitWriter(Pipeline* pl) :
+    pl(pl),
+    ch(0),
+    bit_offset(7)
+{
+}
+
+void
+BitWriter::writeBits(unsigned long val, int bits)
+{
+    write_bits(this->ch, this->bit_offset, val, bits, this->pl);
+}
+
+void
+BitWriter::flush()
+{
+    if (bit_offset < 7)
+    {
+	int bits_to_write = bit_offset + 1;
+	write_bits(this->ch, this->bit_offset, 0, bits_to_write, this->pl);
+    }
+}
diff --git a/libqpdf/Buffer.cc b/libqpdf/Buffer.cc
new file mode 100644
index 00000000..3dde1f90
--- /dev/null
+++ b/libqpdf/Buffer.cc
@@ -0,0 +1,79 @@
+
+#include <qpdf/Buffer.hh>
+
+#include <string.h>
+
+Buffer::Buffer()
+{
+    init(0);
+}
+
+Buffer::Buffer(unsigned long size)
+{
+    init(size);
+}
+
+Buffer::Buffer(Buffer const& rhs)
+{
+    init(0);
+    copy(rhs);
+}
+
+Buffer&
+Buffer::operator=(Buffer const& rhs)
+{
+    copy(rhs);
+    return *this;
+}
+
+Buffer::~Buffer()
+{
+    destroy();
+}
+
+void
+Buffer::init(unsigned long size)
+{
+    this->size = size;
+    this->buf = (size ? new unsigned char[size] : 0);
+}
+
+void
+Buffer::copy(Buffer const& rhs)
+{
+    if (this != &rhs)
+    {
+	this->destroy();
+	this->init(rhs.size);
+	if (this->size)
+	{
+	    memcpy(this->buf, rhs.buf, this->size);
+	}
+    }
+}
+
+void
+Buffer::destroy()
+{
+    delete [] this->buf;
+    this->size = 0;
+    this->buf = 0;
+}
+
+unsigned long
+Buffer::getSize() const
+{
+    return this->size;
+}
+
+unsigned char const*
+Buffer::getBuffer() const
+{
+    return this->buf;
+}
+
+unsigned char*
+Buffer::getBuffer()
+{
+    return this->buf;
+}
diff --git a/libqpdf/MD5.cc b/libqpdf/MD5.cc
new file mode 100644
index 00000000..ecdd8a33
--- /dev/null
+++ b/libqpdf/MD5.cc
@@ -0,0 +1,441 @@
+// This file implements a class for computation of MD5 checksums.
+// It is derived from the reference algorithm for MD5 as given in
+// RFC 1321.  The original copyright notice is as follows:
+//
+/////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+// rights reserved.
+//
+// License to copy and use this software is granted provided that it
+// is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+// Algorithm" in all material mentioning or referencing this software
+// or this function.
+//
+// License is also granted to make and use derivative works provided
+// that such works are identified as "derived from the RSA Data
+// Security, Inc. MD5 Message-Digest Algorithm" in all material
+// mentioning or referencing the derived work.
+//
+// RSA Data Security, Inc. makes no representations concerning either
+// the merchantability of this software or the suitability of this
+// software for any particular purpose. It is provided "as is"
+// without express or implied warranty of any kind.
+//
+// These notices must be retained in any copies of any part of this
+// documentation and/or software.
+//
+/////////////////////////////////////////////////////////////////////////
+
+#include <qpdf/MD5.hh>
+
+#include <stdio.h>
+#include <memory.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+int const S11 = 7;
+int const S12 = 12;
+int const S13 = 17;
+int const S14 = 22;
+int const S21 = 5;
+int const S22 = 9;
+int const S23 = 14;
+int const S24 = 20;
+int const S31 = 4;
+int const S32 = 11;
+int const S33 = 16;
+int const S34 = 23;
+int const S41 = 6;
+int const S42 = 10;
+int const S43 = 15;
+int const S44 = 21;
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+// F, G, H and I are basic MD5 functions.
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+// ROTATE_LEFT rotates x left n bits.
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+// Rotation is separate from addition to prevent recomputation.
+#define FF(a, b, c, d, x, s, ac) { \
+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) { \
+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) { \
+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) { \
+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+
+// MD5 initialization. Begins an MD5 operation, writing a new context.
+void MD5::init()
+{
+    count[0] = count[1] = 0;
+    // Load magic initialization constants.
+    state[0] = 0x67452301;
+    state[1] = 0xefcdab89;
+    state[2] = 0x98badcfe;
+    state[3] = 0x10325476;
+
+    finalized = false;
+    memset(digest_val, 0, sizeof(digest_val));
+}
+
+// MD5 block update operation. Continues an MD5 message-digest
+// operation, processing another message block, and updating the
+// context.
+
+void MD5::update(unsigned char *input,
+		 unsigned int inputLen)
+{
+    unsigned int i, index, partLen;
+
+    // Compute number of bytes mod 64
+    index = (unsigned int)((count[0] >> 3) & 0x3F);
+
+    // Update number of bits
+    if ((count[0] += ((UINT4)inputLen << 3))
+	< ((UINT4)inputLen << 3))
+	count[1]++;
+    count[1] += ((UINT4)inputLen >> 29);
+
+    partLen = 64 - index;
+
+    // Transform as many times as possible.
+
+    if (inputLen >= partLen) {
+	memcpy
+	    ((POINTER)&buffer[index], (POINTER)input, partLen);
+	transform(state, buffer);
+
+	for (i = partLen; i + 63 < inputLen; i += 64)
+	    transform(state, &input[i]);
+
+	index = 0;
+    }
+    else
+	i = 0;
+
+    // Buffer remaining input
+    memcpy
+	((POINTER)&buffer[index], (POINTER)&input[i],
+	 inputLen-i);
+}
+
+// MD5 finalization. Ends an MD5 message-digest operation, writing the
+// the message digest and zeroizing the context.
+void MD5::final()
+{
+    if (finalized)
+    {
+	return;
+    }
+
+    unsigned char bits[8];
+    unsigned int index, padLen;
+
+    // Save number of bits
+    encode(bits, count, 8);
+
+    // Pad out to 56 mod 64.
+
+    index = (unsigned int)((count[0] >> 3) & 0x3f);
+    padLen = (index < 56) ? (56 - index) : (120 - index);
+    update(PADDING, padLen);
+
+    // Append length (before padding)
+    update(bits, 8);
+    // Store state in digest_val
+    encode(digest_val, state, 16);
+
+    // Zeroize sensitive information.
+    memset(state, 0, sizeof(state));
+    memset(count, 0, sizeof(count));
+    memset(buffer, 0, sizeof(buffer));
+
+    finalized = true;
+}
+
+// MD5 basic transformation. Transforms state based on block.
+void MD5::transform(UINT4 state[4], unsigned char block[64])
+{
+    UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+    decode(x, block, 64);
+
+    // Round 1
+    FF (a, b, c, d, x[ 0], S11, 0xd76aa478); // 1
+    FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); // 2
+    FF (c, d, a, b, x[ 2], S13, 0x242070db); // 3
+    FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); // 4
+    FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); // 5
+    FF (d, a, b, c, x[ 5], S12, 0x4787c62a); // 6
+    FF (c, d, a, b, x[ 6], S13, 0xa8304613); // 7
+    FF (b, c, d, a, x[ 7], S14, 0xfd469501); // 8
+    FF (a, b, c, d, x[ 8], S11, 0x698098d8); // 9
+    FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); // 10
+    FF (c, d, a, b, x[10], S13, 0xffff5bb1); // 11
+    FF (b, c, d, a, x[11], S14, 0x895cd7be); // 12
+    FF (a, b, c, d, x[12], S11, 0x6b901122); // 13
+    FF (d, a, b, c, x[13], S12, 0xfd987193); // 14
+    FF (c, d, a, b, x[14], S13, 0xa679438e); // 15
+    FF (b, c, d, a, x[15], S14, 0x49b40821); // 16
+
+    // Round 2
+    GG (a, b, c, d, x[ 1], S21, 0xf61e2562); // 17
+    GG (d, a, b, c, x[ 6], S22, 0xc040b340); // 18
+    GG (c, d, a, b, x[11], S23, 0x265e5a51); // 19
+    GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); // 20
+    GG (a, b, c, d, x[ 5], S21, 0xd62f105d); // 21
+    GG (d, a, b, c, x[10], S22,  0x2441453); // 22
+    GG (c, d, a, b, x[15], S23, 0xd8a1e681); // 23
+    GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); // 24
+    GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); // 25
+    GG (d, a, b, c, x[14], S22, 0xc33707d6); // 26
+    GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); // 27
+    GG (b, c, d, a, x[ 8], S24, 0x455a14ed); // 28
+    GG (a, b, c, d, x[13], S21, 0xa9e3e905); // 29
+    GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); // 30
+    GG (c, d, a, b, x[ 7], S23, 0x676f02d9); // 31
+    GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); // 32
+
+    // Round 3
+    HH (a, b, c, d, x[ 5], S31, 0xfffa3942); // 33
+    HH (d, a, b, c, x[ 8], S32, 0x8771f681); // 34
+    HH (c, d, a, b, x[11], S33, 0x6d9d6122); // 35
+    HH (b, c, d, a, x[14], S34, 0xfde5380c); // 36
+    HH (a, b, c, d, x[ 1], S31, 0xa4beea44); // 37
+    HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); // 38
+    HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); // 39
+    HH (b, c, d, a, x[10], S34, 0xbebfbc70); // 40
+    HH (a, b, c, d, x[13], S31, 0x289b7ec6); // 41
+    HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); // 42
+    HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); // 43
+    HH (b, c, d, a, x[ 6], S34,  0x4881d05); // 44
+    HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); // 45
+    HH (d, a, b, c, x[12], S32, 0xe6db99e5); // 46
+    HH (c, d, a, b, x[15], S33, 0x1fa27cf8); // 47
+    HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); // 48
+
+    // Round 4
+    II (a, b, c, d, x[ 0], S41, 0xf4292244); // 49
+    II (d, a, b, c, x[ 7], S42, 0x432aff97); // 50
+    II (c, d, a, b, x[14], S43, 0xab9423a7); // 51
+    II (b, c, d, a, x[ 5], S44, 0xfc93a039); // 52
+    II (a, b, c, d, x[12], S41, 0x655b59c3); // 53
+    II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); // 54
+    II (c, d, a, b, x[10], S43, 0xffeff47d); // 55
+    II (b, c, d, a, x[ 1], S44, 0x85845dd1); // 56
+    II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); // 57
+    II (d, a, b, c, x[15], S42, 0xfe2ce6e0); // 58
+    II (c, d, a, b, x[ 6], S43, 0xa3014314); // 59
+    II (b, c, d, a, x[13], S44, 0x4e0811a1); // 60
+    II (a, b, c, d, x[ 4], S41, 0xf7537e82); // 61
+    II (d, a, b, c, x[11], S42, 0xbd3af235); // 62
+    II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); // 63
+    II (b, c, d, a, x[ 9], S44, 0xeb86d391); // 64
+
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+
+    // Zeroize sensitive information.
+
+    memset ((POINTER)x, 0, sizeof (x));
+}
+
+// Encodes input (UINT4) into output (unsigned char). Assumes len is a
+// multiple of 4.
+void MD5::encode(unsigned char *output, UINT4 *input, unsigned int len)
+{
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4) {
+	output[j] = (unsigned char)(input[i] & 0xff);
+	output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
+	output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
+	output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+    }
+}
+
+// Decodes input (unsigned char) into output (UINT4). Assumes len is a
+// multiple of 4.
+void MD5::decode(UINT4 *output, unsigned char *input, unsigned int len)
+{
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4)
+	output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
+	    (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
+}
+
+// Public functions
+
+MD5::MD5()
+{
+    init();
+}
+
+void MD5::reset()
+{
+    init();
+}
+
+void MD5::encodeString(char const* str)
+{
+    unsigned int len = strlen(str);
+
+    update((unsigned char *)str, len);
+    final();
+}
+
+void MD5::appendString(char const* input_string)
+{
+    update((unsigned char *)input_string, strlen(input_string));
+}
+
+void MD5::encodeDataIncrementally(char const* data, int len)
+{
+    update((unsigned char *)data, len);
+}
+
+void MD5::encodeFile(char const *filename, int up_to_size)
+    throw (QEXC::System)
+{
+    FILE *file;
+    unsigned char buffer[1024];
+
+    if ((file = fopen (filename, "rb")) == NULL)
+    {
+	throw QEXC::System(std::string("MD5: can't open ") + filename, errno);
+    }
+
+    int len;
+    int so_far = 0;
+    int to_try = 1024;
+    do
+    {
+	if ((up_to_size >= 0) && ((so_far + to_try) > up_to_size))
+	{
+	    to_try = up_to_size - so_far;
+	}
+	len = fread(buffer, 1, to_try, file);
+	if (len > 0)
+	{
+	    update(buffer, len);
+	    so_far += len;
+	    if ((up_to_size >= 0) && (so_far >= up_to_size))
+	    {
+		break;
+	    }
+	}
+    } while (len > 0);
+    if (ferror(file))
+    {
+	// Assume, perhaps incorrectly, that errno was set by the
+	// underlying call to read....
+	(void) fclose(file);
+	throw QEXC::System(std::string("MD5: read error on ") + filename, errno);
+    }
+    (void) fclose(file);
+
+    final();
+}
+
+void MD5::digest(Digest result)
+{
+    final();
+    memcpy(result, digest_val, sizeof(digest_val));
+}
+
+void MD5::print()
+{
+    final();
+
+    unsigned int i;
+    for (i = 0; i < 16; ++i)
+    {
+	printf("%02x", digest_val[i]);
+    }
+    printf("\n");
+}
+
+std::string MD5::unparse()
+{
+    final();
+
+    char result[33];
+    char* p = result;
+    unsigned int i;
+    for (i = 0; i < 16; ++i)
+    {
+	sprintf(p, "%02x", digest_val[i]);
+	p += 2;
+    }
+    return result;
+}
+
+std::string
+MD5::getDataChecksum(char const* buf, int len)
+{
+    MD5 m;
+    m.encodeDataIncrementally(buf, len);
+    return m.unparse();
+}
+
+std::string
+MD5::getFileChecksum(char const* filename, int up_to_size)
+{
+    MD5 m;
+    m.encodeFile(filename, up_to_size);
+    return m.unparse();
+}
+
+bool
+MD5::checkDataChecksum(char const* const checksum,
+		       char const* buf, int len)
+{
+    std::string actual_checksum = getDataChecksum(buf, len);
+    return (checksum == actual_checksum);
+}
+
+bool
+MD5::checkFileChecksum(char const* const checksum,
+		       char const* filename, int up_to_size)
+{
+    bool result = false;
+    try
+    {
+	std::string actual_checksum = getFileChecksum(filename, up_to_size);
+	result = (checksum == actual_checksum);
+    }
+    catch (QEXC::System)
+    {
+	// Ignore -- return false
+    }
+    return result;
+}
diff --git a/libqpdf/Makefile b/libqpdf/Makefile
new file mode 100644
index 00000000..90899055
--- /dev/null
+++ b/libqpdf/Makefile
@@ -0,0 +1 @@
+include ../make/proxy.mk
diff --git a/libqpdf/PCRE.cc b/libqpdf/PCRE.cc
new file mode 100644
index 00000000..afa6e954
--- /dev/null
+++ b/libqpdf/PCRE.cc
@@ -0,0 +1,365 @@
+
+
+#include <qpdf/PCRE.hh>
+#include <qpdf/QUtil.hh>
+
+#include <iostream>
+
+PCRE::Exception::Exception(std::string const& message)
+{
+    this->setMessage("PCRE error: " + message);
+}
+
+PCRE::NoBackref::NoBackref() :
+    Exception("no match")
+{
+}
+
+PCRE::Match::Match(int nbackrefs, char const* subject)
+{
+    this->init(-1, nbackrefs, subject);
+}
+
+PCRE::Match::~Match()
+{
+    this->destroy();
+}
+
+PCRE::Match::Match(Match const& rhs)
+{
+    this->copy(rhs);
+}
+
+PCRE::Match&
+PCRE::Match::operator=(Match const& rhs)
+{
+    if (this != &rhs)
+    {
+	this->destroy();
+	this->copy(rhs);
+    }
+    return *this;
+}
+
+void
+PCRE::Match::init(int nmatches, int nbackrefs, char const* subject)
+{
+    this->nmatches = nmatches;
+    this->nbackrefs = nbackrefs;
+    this->subject = subject;
+    this->ovecsize = 3 * (1 + nbackrefs);
+    this->ovector = 0;
+    if (this->ovecsize)
+    {
+	this->ovector = new int[this->ovecsize];
+    }
+}
+
+void
+PCRE::Match::copy(Match const& rhs)
+{
+    this->init(rhs.nmatches, rhs.nbackrefs, rhs.subject);
+    int i;
+    for (i = 0; i < this->ovecsize; ++i)
+    {
+	this->ovector[i] = rhs.ovector[i];
+    }
+}
+
+void
+PCRE::Match::destroy()
+{
+    delete [] this->ovector;
+}
+
+PCRE::Match::operator bool()
+{
+    return (this->nmatches >= 0);
+}
+
+
+std::string
+PCRE::Match::getMatch(int n, int flags)
+    throw(QEXC::General, Exception)
+{
+    // This method used to be implemented in terms of
+    // pcre_get_substring, but that function gives you an empty string
+    // for an unmatched backreference that is in range.
+
+    int offset;
+    int length;
+    try
+    {
+	getOffsetLength(n, offset, length);
+    }
+    catch (NoBackref&)
+    {
+	if (flags & gm_no_substring_returns_empty)
+	{
+	    return "";
+	}
+	else
+	{
+	    throw;
+	}
+    }
+
+    return std::string(this->subject).substr(offset, length);
+}
+
+void
+PCRE::Match::getOffsetLength(int n, int& offset, int& length) throw(Exception)
+{
+    if ((this->nmatches < 0) ||
+	(n > this->nmatches - 1) ||
+	(this->ovector[n * 2] == -1))
+    {
+	throw NoBackref();
+    }
+    offset = this->ovector[n * 2];
+    length = this->ovector[n * 2 + 1] - offset;
+}
+
+
+int
+PCRE::Match::getOffset(int n) throw(Exception)
+{
+    int offset;
+    int length;
+    this->getOffsetLength(n, offset, length);
+    return offset;
+}
+
+
+int
+PCRE::Match::getLength(int n) throw(Exception)
+{
+    int offset;
+    int length;
+    this->getOffsetLength(n, offset, length);
+    return length;
+}
+
+
+int
+PCRE::Match::nMatches() const
+{
+    return this->nmatches;
+}
+
+PCRE::PCRE(char const* pattern, int options) throw (Exception)
+{
+    char const *errptr;
+    int erroffset;
+    this->code = pcre_compile(pattern, options, &errptr, &erroffset, 0);
+    if (this->code)
+    {
+	this->nbackrefs = pcre_info(this->code, 0, 0);
+    }
+    else
+    {
+	std::string message = (std::string("compilation of ") + pattern +
+			  " failed at offset " +
+			  QUtil::int_to_string(erroffset) + ": " +
+			  errptr);
+	throw Exception(message);
+    }
+}
+
+PCRE::~PCRE()
+{
+    pcre_free(this->code);
+}
+
+PCRE::Match
+PCRE::match(char const* subject, int options, int startoffset, int size)
+    throw (QEXC::General, Exception)
+{
+    if (size == -1)
+    {
+	size = strlen(subject);
+    }
+
+    Match result(this->nbackrefs, subject);
+    int status = pcre_exec(this->code, 0, subject, size,
+			   startoffset, options,
+			   result.ovector, result.ovecsize);
+    if (status >= 0)
+    {
+	result.nmatches = status;
+    }
+    else
+    {
+	std::string message;
+
+	switch (status)
+	{
+	  case PCRE_ERROR_NOMATCH:
+	    break;
+
+	  case PCRE_ERROR_BADOPTION:
+	    message = "bad option passed to PCRE::match()";
+	    throw Exception(message);
+	    break;
+
+	  case PCRE_ERROR_NOMEMORY:
+	    message = "insufficient memory";
+	    throw Exception(message);
+	    break;
+
+	  case PCRE_ERROR_NULL:
+	  case PCRE_ERROR_BADMAGIC:
+	  case PCRE_ERROR_UNKNOWN_NODE:
+	  default:
+	    message = "pcre_exec returned " + QUtil::int_to_string(status);
+	    throw QEXC::Internal(message);
+	}
+    }
+
+    return result;
+}
+
+void
+PCRE::test(int n)
+{
+    try
+    {
+	if (n == 1)
+	{
+	    static char const* utf8 = "abπdefq";
+	    PCRE u1("^([[:alpha:]]+)");
+	    PCRE u2("^([\\p{L}]+)", PCRE_UTF8);
+	    PCRE::Match m1 = u1.match(utf8);
+	    if (m1)
+	    {
+		std::cout << "no utf8: " << m1.getMatch(1) << std::endl;
+	    }
+	    PCRE::Match m2 = u2.match(utf8);
+	    if (m2)
+	    {
+		std::cout << "utf8: " << m2.getMatch(1) << std::endl;
+	    }
+	    return;
+	}
+
+	try
+	{
+	    PCRE pcre1("a**");
+	}
+	catch (Exception& e)
+	{
+	    std::cout << e.unparse() << std::endl;
+	}
+
+	PCRE pcre2("^([^\\s:]*)\\s*:\\s*(.*?)\\s*$");
+	PCRE::Match m2 = pcre2.match("key: value one two three ");
+	if (m2)
+	{
+	    std::cout << m2.nMatches() << std::endl;
+	    std::cout << m2.getMatch(0) << std::endl;
+	    std::cout << m2.getOffset(0) << std::endl;
+	    std::cout << m2.getLength(0) << std::endl;
+	    std::cout << m2.getMatch(1) << std::endl;
+	    std::cout << m2.getOffset(1) << std::endl;
+	    std::cout << m2.getLength(1) << std::endl;
+	    std::cout << m2.getMatch(2) << std::endl;
+	    std::cout << m2.getOffset(2) << std::endl;
+	    std::cout << m2.getLength(2) << std::endl;
+	    try
+	    {
+		std::cout << m2.getMatch(3) << std::endl;
+	    }
+	    catch (Exception& e)
+	    {
+		std::cout << e.unparse() << std::endl;
+	    }
+	    try
+	    {
+		std::cout << m2.getOffset(3) << std::endl;
+	    }
+	    catch (Exception& e)
+	    {
+		std::cout << e.unparse() << std::endl;
+	    }
+	}
+	PCRE pcre3("^(a+)(b+)?$");
+	PCRE::Match m3 = pcre3.match("aaa");
+	try
+	{
+	    if (m3)
+	    {
+		std::cout << m3.nMatches() << std::endl;
+		std::cout << m3.getMatch(0) << std::endl;
+		std::cout << m3.getMatch(1) << std::endl;
+		std::cout << "-"
+			  << m3.getMatch(
+			      2, Match::gm_no_substring_returns_empty)
+			  << "-" << std::endl;
+		std::cout << "hello" << std::endl;
+		std::cout << m3.getMatch(2) << std::endl;
+		std::cout << "can't see this" << std::endl;
+	    }
+	}
+	catch (Exception& e)
+	{
+	    std::cout << e.unparse() << std::endl;
+	}
+
+	// backref: 1   2 3        4      5
+	PCRE pcre4("^((?:(a(b)?)(?:,(c))?)|(c))?$");
+	static char const* candidates[] = {
+	    "qqqcqqq",		// no match
+	    "ab,c",		// backrefs: 0, 1, 2, 3, 4
+	    "ab",		// backrefs: 0, 1, 2, 3
+	    "a",		// backrefs: 0, 1, 2
+	    "a,c",		// backrefs: 0, 1, 2, 4
+	    "c",		// backrefs: 0, 1, 5
+	    "",			// backrefs: 0
+	    0
+	};
+	for (char const** p = candidates; *p; ++p)
+	{
+	    PCRE::Match m(pcre4.match(*p));
+	    if (m)
+	    {
+		int nmatches = m.nMatches();
+		for (int i = 0; i < nmatches; ++i)
+		{
+		    std::cout << *p << ": " << i << ": ";
+		    try
+		    {
+			std::string match = m.getMatch(i);
+			std::cout << match;
+		    }
+		    catch (NoBackref&)
+		    {
+			std::cout << "no backref (getMatch)";
+		    }
+		    std::cout << std::endl;
+
+		    std::cout << *p << ": " << i << ": ";
+		    try
+		    {
+			int offset;
+			int length;
+			m.getOffsetLength(i, offset, length);
+			std::cout << offset << ", " << length;
+		    }
+		    catch (NoBackref&)
+		    {
+			std::cout << "no backref (getOffsetLength)";
+		    }
+		    std:: cout << std::endl;
+		}
+	    }
+	    else
+	    {
+		std::cout << *p << ": no match" << std::endl;
+	    }
+	}
+    }
+    catch (QEXC::General& e)
+    {
+	std::cout << "unexpected exception: " << e.unparse() << std::endl;
+    }
+}
diff --git a/libqpdf/Pipeline.cc b/libqpdf/Pipeline.cc
new file mode 100644
index 00000000..17c0c8b2
--- /dev/null
+++ b/libqpdf/Pipeline.cc
@@ -0,0 +1,25 @@
+
+
+#include <qpdf/Pipeline.hh>
+
+Pipeline::Pipeline(char const* identifier, Pipeline* next) :
+    identifier(identifier),
+    next(next)
+{
+}
+
+Pipeline::~Pipeline()
+{
+}
+
+Pipeline*
+Pipeline::getNext(bool allow_null)
+{
+    if ((next == 0) && (! allow_null))
+    {
+	throw Exception(
+	    this->identifier +
+	    ": Pipeline::getNext() called on pipeline with no next");
+    }
+    return this->next;
+}
diff --git a/libqpdf/Pl_ASCII85Decoder.cc b/libqpdf/Pl_ASCII85Decoder.cc
new file mode 100644
index 00000000..4ecdaf41
--- /dev/null
+++ b/libqpdf/Pl_ASCII85Decoder.cc
@@ -0,0 +1,131 @@
+#include <qpdf/Pl_ASCII85Decoder.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+
+Pl_ASCII85Decoder::Pl_ASCII85Decoder(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    pos(0),
+    eod(0)
+{
+    memset(this->inbuf, 117, 5);
+}
+
+Pl_ASCII85Decoder::~Pl_ASCII85Decoder()
+{
+}
+
+void
+Pl_ASCII85Decoder::write(unsigned char* buf, int len)
+{
+    if (eod > 1)
+    {
+	return;
+    }
+    for (int i = 0; i < len; ++i)
+    {
+	if (eod > 1)
+	{
+	    break;
+	}
+	else if (eod == 1)
+	{
+	    if (buf[i] == '>')
+	    {
+		flush();
+		eod = 2;
+	    }
+	    else
+	    {
+		throw QEXC::General(
+		    "broken end-of-data sequence in base 85 data");
+	    }
+	}
+	else
+	{
+	    switch (buf[i])
+	    {
+	      case ' ':
+	      case '\f':
+	      case '\v':
+	      case '\t':
+	      case '\r':
+	      case '\n':
+		QTC::TC("libtests", "Pl_ASCII85Decoder ignore space");
+		// ignore whitespace
+		break;
+
+	      case '~':
+		eod = 1;
+		break;
+
+	      case 'z':
+		if (pos != 0)
+		{
+		    throw QEXC::General(
+			"unexpected z during base 85 decode");
+		}
+		else
+		{
+		    QTC::TC("libtests", "Pl_ASCII85Decoder read z");
+		    getNext()->write((unsigned char*)"\000\000\000\000", 4);
+		}
+		break;
+
+	      default:
+		if ((buf[i] < 33) || (buf[i] > 117))
+		{
+		    throw QEXC::General
+			("character out of range during base 85 decode");
+		}
+		else
+		{
+		    this->inbuf[this->pos++] = buf[i];
+		    if (pos == 5)
+		    {
+			flush();
+		    }
+		}
+		break;
+	    }
+	}
+    }
+}
+
+void
+Pl_ASCII85Decoder::flush()
+{
+    if (this->pos == 0)
+    {
+	QTC::TC("libtests", "Pl_ASCII85Decoder no-op flush");
+	return;
+    }
+    unsigned long lval = 0;
+    for (int i = 0; i < 5; ++i)
+    {
+	lval *= 85;
+	lval += (this->inbuf[i] - 33);
+    }
+
+    unsigned char outbuf[4];
+    memset(outbuf, 0, 4);
+    for (int i = 3; i >= 0; --i)
+    {
+	outbuf[i] = lval & 0xff;
+	lval >>= 8;
+    }
+
+    QTC::TC("libtests", "Pl_ASCII85Decoder partial flush",
+	    (this->pos == 5) ? 0 : 1);
+    getNext()->write(outbuf, this->pos - 1);
+
+    this->pos = 0;
+    memset(this->inbuf, 117, 5);
+}
+
+void
+Pl_ASCII85Decoder::finish()
+{
+    flush();
+    getNext()->finish();
+}
diff --git a/libqpdf/Pl_ASCIIHexDecoder.cc b/libqpdf/Pl_ASCIIHexDecoder.cc
new file mode 100644
index 00000000..d1b4ef1c
--- /dev/null
+++ b/libqpdf/Pl_ASCIIHexDecoder.cc
@@ -0,0 +1,108 @@
+#include <qpdf/Pl_ASCIIHexDecoder.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+#include <ctype.h>
+
+Pl_ASCIIHexDecoder::Pl_ASCIIHexDecoder(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    pos(0),
+    eod(false)
+{
+    strcpy(this->inbuf, "00");
+}
+
+Pl_ASCIIHexDecoder::~Pl_ASCIIHexDecoder()
+{
+}
+
+void
+Pl_ASCIIHexDecoder::write(unsigned char* buf, int len)
+{
+    if (this->eod)
+    {
+	return;
+    }
+    for (int i = 0; i < len; ++i)
+    {
+	char ch = toupper(buf[i]);
+	switch (ch)
+	{
+	  case ' ':
+	  case '\f':
+	  case '\v':
+	  case '\t':
+	  case '\r':
+	  case '\n':
+	    QTC::TC("libtests", "Pl_ASCIIHexDecoder ignore space");
+	    // ignore whitespace
+	    break;
+
+	  case '>':
+	    this->eod = true;
+	    flush();
+	    break;
+
+	  default:
+	    if (((ch >= '0') && (ch <= '9')) ||
+		((ch >= 'A') && (ch <= 'F')))
+	    {
+		this->inbuf[this->pos++] = ch;
+		if (this->pos == 2)
+		{
+		    flush();
+		}
+	    }
+	    else
+	    {
+		char t[2];
+		t[0] = ch;
+		t[1] = 0;
+		throw QEXC::General(
+		    std::string("character out of range during base Hex decode: ") + t);
+	    }
+	    break;
+	}
+	if (this->eod)
+	{
+	    break;
+	}
+    }
+}
+
+void
+Pl_ASCIIHexDecoder::flush()
+{
+    if (this->pos == 0)
+    {
+	QTC::TC("libtests", "Pl_ASCIIHexDecoder no-op flush");
+	return;
+    }
+    int b[2];
+    for (int i = 0; i < 2; ++i)
+    {
+	if (this->inbuf[i] >= 'A')
+	{
+	    b[i] = this->inbuf[i] - 'A' + 10;
+	}
+	else
+	{
+	    b[i] = this->inbuf[i] - '0';
+	}
+    }
+    unsigned char ch = (unsigned char)((b[0] << 4) + b[1]);
+
+    QTC::TC("libtests", "Pl_ASCIIHexDecoder partial flush",
+	    (this->pos == 2) ? 0 : 1);
+    getNext()->write(&ch, 1);
+
+    this->pos = 0;
+    strcpy(this->inbuf, "00");
+}
+
+void
+Pl_ASCIIHexDecoder::finish()
+{
+    flush();
+    getNext()->finish();
+}
diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc
new file mode 100644
index 00000000..185cf636
--- /dev/null
+++ b/libqpdf/Pl_Buffer.cc
@@ -0,0 +1,67 @@
+
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/QEXC.hh>
+#include <assert.h>
+
+Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    ready(false),
+    total_size(0)
+{
+}
+
+Pl_Buffer::~Pl_Buffer()
+{
+}
+
+void
+Pl_Buffer::write(unsigned char* buf, int len)
+{
+    Buffer* b = new Buffer(len);
+    memcpy(b->getBuffer(), buf, len);
+    this->data.push_back(b);
+    this->ready = false;
+    this->total_size += len;
+
+    if (getNext(true))
+    {
+	getNext()->write(buf, len);
+    }
+}
+
+void
+Pl_Buffer::finish()
+{
+    this->ready = true;
+    if (getNext(true))
+    {
+	getNext()->finish();
+    }
+}
+
+Buffer*
+Pl_Buffer::getBuffer()
+{
+    if (! this->ready)
+    {
+	throw QEXC::Internal("Pl_Buffer::getBuffer() called when not ready");
+    }
+
+    Buffer* b = new Buffer(this->total_size);
+    unsigned char* p = b->getBuffer();
+    while (! this->data.empty())
+    {
+	PointerHolder<Buffer> bph = this->data.front();
+	this->data.pop_front();
+	Buffer* bp = bph.getPointer();
+	size_t bytes = bp->getSize();
+	memcpy(p, bp->getBuffer(), bytes);
+	p += bytes;
+	this->total_size -= bytes;
+    }
+
+    assert(this->total_size == 0);
+    this->ready = false;
+
+    return b;
+}
diff --git a/libqpdf/Pl_Count.cc b/libqpdf/Pl_Count.cc
new file mode 100644
index 00000000..8a361ad5
--- /dev/null
+++ b/libqpdf/Pl_Count.cc
@@ -0,0 +1,42 @@
+
+#include <qpdf/Pl_Count.hh>
+
+Pl_Count::Pl_Count(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    count(0),
+    last_char('\0')
+{
+}
+
+Pl_Count::~Pl_Count()
+{
+}
+
+void
+Pl_Count::write(unsigned char* buf, int len)
+{
+    if (len)
+    {
+	this->count += len;
+	getNext()->write(buf, len);
+	this->last_char = buf[len - 1];
+    }
+}
+
+void
+Pl_Count::finish()
+{
+    getNext()->finish();
+}
+
+int
+Pl_Count::getCount() const
+{
+    return this->count;
+}
+
+unsigned char
+Pl_Count::getLastChar() const
+{
+    return this->last_char;
+}
diff --git a/libqpdf/Pl_Discard.cc b/libqpdf/Pl_Discard.cc
new file mode 100644
index 00000000..1632ea23
--- /dev/null
+++ b/libqpdf/Pl_Discard.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/Pl_Discard.hh>
+
+// Exercised in md5 test suite
+
+Pl_Discard::Pl_Discard() :
+    Pipeline("discard", 0)
+{
+}
+
+Pl_Discard::~Pl_Discard()
+{
+}
+
+void
+Pl_Discard::write(unsigned char* buf, int len)
+{
+}
+
+void
+Pl_Discard::finish()
+{
+}
diff --git a/libqpdf/Pl_Flate.cc b/libqpdf/Pl_Flate.cc
new file mode 100644
index 00000000..ba60c472
--- /dev/null
+++ b/libqpdf/Pl_Flate.cc
@@ -0,0 +1,198 @@
+
+#include <qpdf/Pl_Flate.hh>
+
+#include <qpdf/QUtil.hh>
+
+Pl_Flate::Pl_Flate(char const* identifier, Pipeline* next,
+		   action_e action, int out_bufsize) :
+    Pipeline(identifier, next),
+    out_bufsize(out_bufsize),
+    action(action),
+    initialized(false)
+{
+    this->outbuf = new unsigned char[out_bufsize];
+
+    zstream.zalloc = (alloc_func)0;
+    zstream.zfree = (free_func)0;
+    zstream.opaque = (voidpf)0;
+    zstream.next_in = 0;
+    zstream.avail_in = 0;
+    zstream.next_out = this->outbuf;
+    zstream.avail_out = out_bufsize;
+}
+
+Pl_Flate::~Pl_Flate()
+{
+    if (this->outbuf)
+    {
+	delete [] this->outbuf;
+	this->outbuf = 0;
+    }
+}
+
+void
+Pl_Flate::write(unsigned char* data, int len)
+{
+    if (this->outbuf == 0)
+    {
+	throw Exception(
+	    this->identifier +
+	    ": Pl_Flate: write() called after finish() called");
+    }
+    handleData(data, len, Z_NO_FLUSH);
+}
+
+void
+Pl_Flate::handleData(unsigned char* data, int len, int flush)
+{
+    this->zstream.next_in = data;
+    this->zstream.avail_in = len;
+
+    if (! this->initialized)
+    {
+	int err = Z_OK;
+	if (this->action == a_deflate)
+	{
+	    err = deflateInit(&this->zstream, Z_DEFAULT_COMPRESSION);
+	}
+	else
+	{
+	    err = inflateInit(&this->zstream);
+	}
+	checkError("Init", err);
+	this->initialized = true;
+    }
+
+    int err = Z_OK;
+
+    bool done = false;
+    while (! done)
+    {
+	if (action == a_deflate)
+	{
+	    err = deflate(&this->zstream, flush);
+	}
+	else
+	{
+	    err = inflate(&this->zstream, flush);
+	}
+	switch (err)
+	{
+	  case Z_BUF_ERROR:
+	    // Probably shouldn't be able to happen, but possible as a
+	    // boundary condition: if the last call to inflate exactly
+	    // filled the output buffer, it's possible that the next
+	    // call to inflate could have nothing to do.
+	    done = true;
+	    break;
+
+	  case Z_STREAM_END:
+	    done = true;
+	    // fall through
+
+	  case Z_OK:
+	    {
+		if ((this->zstream.avail_in == 0) &&
+		    (this->zstream.avail_out > 0))
+		{
+		    // There is nothing left to read, and there was
+		    // sufficient buffer space to write everything we
+		    // needed, so we're done for now.
+		    done = true;
+		}
+		uLong ready = (this->out_bufsize - this->zstream.avail_out);
+		if (ready > 0)
+		{
+		    this->getNext()->write(this->outbuf, ready);
+		    this->zstream.next_out = this->outbuf;
+		    this->zstream.avail_out = this->out_bufsize;
+		}
+	    }
+	    break;
+
+	  default:
+	    this->checkError("data", err);
+	    break;
+	}
+    }
+}
+
+void
+Pl_Flate::finish()
+{
+    if (this->outbuf)
+    {
+	if (this->initialized)
+	{
+	    unsigned char buf[1];
+	    buf[0] = '\0';
+	    handleData(buf, 0, Z_FINISH);
+	    int err = Z_OK;
+	    if (action == a_deflate)
+	    {
+		err = deflateEnd(&this->zstream);
+	    }
+	    else
+	    {
+		err = inflateEnd(&this->zstream);
+	    }
+	    checkError("End", err);
+	}
+
+	delete [] this->outbuf;
+	this->outbuf = 0;
+    }
+    this->getNext()->finish();
+}
+
+void
+Pl_Flate::checkError(char const* prefix, int error_code)
+{
+    if (error_code != Z_OK)
+    {
+	char const* action_str = (action == a_deflate ? "deflate" : "inflate");
+	std::string msg =
+	    this->identifier + ": " + action_str + ": " + prefix + ": ";
+
+	if (this->zstream.msg)
+	{
+	    msg += this->zstream.msg;
+	}
+	else
+	{
+	    switch (error_code)
+	    {
+	      case Z_ERRNO:
+		msg += "zlib system error";
+		break;
+
+	      case Z_STREAM_ERROR:
+		msg += "zlib stream error";
+		break;
+
+	      case Z_DATA_ERROR:
+		msg += "zlib data error";
+		break;
+
+	      case Z_MEM_ERROR:
+		msg += "zlib memory error";
+		break;
+
+	      case Z_BUF_ERROR:
+		msg += "zlib buffer error";
+		break;
+
+	      case Z_VERSION_ERROR:
+		msg += "zlib version error";
+		break;
+
+	      default:
+		msg += std::string("zlib unknown error (") +
+		    QUtil::int_to_string(error_code) + ")";
+		break;
+	    }
+	}
+
+	throw Exception(msg);
+    }
+}
diff --git a/libqpdf/Pl_LZWDecoder.cc b/libqpdf/Pl_LZWDecoder.cc
new file mode 100644
index 00000000..e85531e9
--- /dev/null
+++ b/libqpdf/Pl_LZWDecoder.cc
@@ -0,0 +1,229 @@
+#include <qpdf/Pl_LZWDecoder.hh>
+
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+#include <assert.h>
+
+Pl_LZWDecoder::Pl_LZWDecoder(char const* identifier, Pipeline* next,
+			     bool early_code_change) :
+    Pipeline(identifier, next),
+    code_size(9),
+    next(0),
+    byte_pos(0),
+    bit_pos(0),
+    bits_available(0),
+    code_change_delta(early_code_change ? 1 : 0),
+    eod(false),
+    last_code(256)
+{
+    memset(buf, 0, 3);
+}
+
+
+Pl_LZWDecoder::~Pl_LZWDecoder()
+{
+}
+
+void
+Pl_LZWDecoder::write(unsigned char* bytes, int len)
+{
+    for (int i = 0; i < len; ++i)
+    {
+	this->buf[next++] = bytes[i];
+	if (this->next == 3)
+	{
+	    this->next = 0;
+	}
+	this->bits_available += 8;
+	if (this->bits_available >= this->code_size)
+	{
+	    sendNextCode();
+	}
+    }
+}
+
+void
+Pl_LZWDecoder::finish()
+{
+    getNext()->finish();
+}
+
+void
+Pl_LZWDecoder::sendNextCode()
+{
+    int high = this->byte_pos;
+    int med = (this->byte_pos + 1) % 3;
+    int low = (this->byte_pos + 2) % 3;
+
+    int bits_from_high = 8 - this->bit_pos;
+    int bits_from_med = this->code_size - bits_from_high;
+    int bits_from_low = 0;
+    if (bits_from_med > 8)
+    {
+	bits_from_low = bits_from_med - 8;
+	bits_from_med = 8;
+    }
+    int high_mask = (1 << bits_from_high) - 1;
+    int med_mask = 0xff - ((1 << (8 - bits_from_med)) - 1);
+    int low_mask = 0xff - ((1 << (8 - bits_from_low)) - 1);
+    int code = 0;
+    code += (this->buf[high] & high_mask) << bits_from_med;
+    code += ((this->buf[med] & med_mask) >> (8 - bits_from_med));
+    if (bits_from_low)
+    {
+	code <<= bits_from_low;
+	code += ((this->buf[low] & low_mask) >> (8 - bits_from_low));
+	this->byte_pos = low;
+	this->bit_pos = bits_from_low;
+    }
+    else
+    {
+	this->byte_pos = med;
+	this->bit_pos = bits_from_med;
+    }
+    if (this->bit_pos == 8)
+    {
+	this->bit_pos = 0;
+	++this->byte_pos;
+	this->byte_pos %= 3;
+    }
+    this->bits_available -= this->code_size;
+
+    handleCode(code);
+}
+
+unsigned char
+Pl_LZWDecoder::getFirstChar(int code)
+{
+    unsigned char result = '\0';
+    if (code < 256)
+    {
+	result = (unsigned char) code;
+    }
+    else
+    {
+	assert(code > 257);
+	unsigned int idx = code - 258;
+	assert(idx < table.size());
+	Buffer& b = table[idx];
+	result = b.getBuffer()[0];
+    }
+    return result;
+}
+
+void
+Pl_LZWDecoder::addToTable(unsigned char next)
+{
+    unsigned int last_size = 0;
+    unsigned char const* last_data = 0;
+    unsigned char tmp[1];
+
+    if (this->last_code < 256)
+    {
+	tmp[0] = this->last_code;
+	last_data = tmp;
+	last_size = 1;
+    }
+    else
+    {
+	assert(this->last_code > 257);
+	unsigned int idx = this->last_code - 258;
+	assert(idx < table.size());
+	Buffer& b = table[idx];
+	last_data = b.getBuffer();
+	last_size = b.getSize();
+    }
+
+    Buffer entry(1 + last_size);
+    unsigned char* new_data = entry.getBuffer();
+    memcpy(new_data, last_data, last_size);
+    new_data[last_size] = next;
+    this->table.push_back(entry);
+}
+
+void
+Pl_LZWDecoder::handleCode(int code)
+{
+    if (this->eod)
+    {
+	return;
+    }
+
+    if (code == 256)
+    {
+	if (! this->table.empty())
+	{
+	    QTC::TC("libtests", "Pl_LZWDecoder intermediate reset");
+	}
+	this->table.clear();
+	this->code_size = 9;
+    }
+    else if (code == 257)
+    {
+	this->eod = true;
+    }
+    else
+    {
+	if (this->last_code != 256)
+	{
+	    // Add to the table from last time.  New table entry would
+	    // be what we read last plus the first character of what
+	    // we're reading now.
+	    unsigned char next = '\0';
+	    unsigned int table_size = table.size();
+	    if (code < 256)
+	    {
+		// just read < 256; last time's next was code
+		next = code;
+	    }
+	    else if (code > 257)
+	    {
+		unsigned int idx = code - 258;
+		if (idx > table_size)
+		{
+		    throw QEXC::General("LZWDecoder: bad code received");
+		}
+		else if (idx == table_size)
+		{
+		    // The encoder would have just created this entry,
+		    // so the first character of this entry would have
+		    // been the same as the first character of the
+		    // last entry.
+		    QTC::TC("libtests", "Pl_LZWDecoder last was table size");
+		    next = getFirstChar(this->last_code);
+		}
+		else
+		{
+		    next = getFirstChar(code);
+		}
+	    }
+	    unsigned int last_idx = 258 + table_size;
+	    if (last_idx == 4095)
+	    {
+		throw QEXC::General("LZWDecoder: table full");
+	    }
+	    addToTable(next);
+	    unsigned int change_idx = last_idx + code_change_delta;
+	    if ((change_idx == 511) ||
+		(change_idx == 1023) ||
+		(change_idx == 2047))
+	    {
+		++this->code_size;
+	    }
+	}
+
+	if (code < 256)
+	{
+	    unsigned char ch = (unsigned char) code;
+	    getNext()->write(&ch, 1);
+	}
+	else
+	{
+	    Buffer& b = table[code - 258];
+	    getNext()->write(b.getBuffer(), b.getSize());
+	}
+    }
+
+    this->last_code = code;
+}
diff --git a/libqpdf/Pl_MD5.cc b/libqpdf/Pl_MD5.cc
new file mode 100644
index 00000000..0a2711b8
--- /dev/null
+++ b/libqpdf/Pl_MD5.cc
@@ -0,0 +1,43 @@
+
+#include <qpdf/Pl_MD5.hh>
+
+#include <qpdf/QEXC.hh>
+
+Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    in_progress(false)
+{
+}
+
+Pl_MD5::~Pl_MD5()
+{
+}
+
+void
+Pl_MD5::write(unsigned char* buf, int len)
+{
+    if (! this->in_progress)
+    {
+	this->md5.reset();
+	this->in_progress = true;
+    }
+    this->md5.encodeDataIncrementally((char*) buf, len);
+    this->getNext()->write(buf, len);
+}
+
+void
+Pl_MD5::finish()
+{
+    this->getNext()->finish();
+    this->in_progress = false;
+}
+
+std::string
+Pl_MD5::getHexDigest()
+{
+    if (this->in_progress)
+    {
+	throw QEXC::General("digest requested for in-progress MD5 Pipeline");
+    }
+    return this->md5.unparse();
+}
diff --git a/libqpdf/Pl_PNGFilter.cc b/libqpdf/Pl_PNGFilter.cc
new file mode 100644
index 00000000..28b87c5e
--- /dev/null
+++ b/libqpdf/Pl_PNGFilter.cc
@@ -0,0 +1,146 @@
+
+#include <qpdf/Pl_PNGFilter.hh>
+#include <string.h>
+
+Pl_PNGFilter::Pl_PNGFilter(char const* identifier, Pipeline* next,
+			   action_e action, unsigned int columns,
+			   unsigned int bytes_per_pixel) :
+    Pipeline(identifier, next),
+    action(action),
+    columns(columns),
+    cur_row(0),
+    prev_row(0),
+    buf1(0),
+    buf2(0),
+    pos(0)
+{
+    this->buf1 = new unsigned char[columns + 1];
+    this->buf2 = new unsigned char[columns + 1];
+    this->cur_row = buf1;
+
+    // number of bytes per incoming row
+    this->incoming = (action == a_encode ? columns : columns + 1);
+}
+
+Pl_PNGFilter::~Pl_PNGFilter()
+{
+    delete [] buf1;
+    delete [] buf2;
+}
+
+void
+Pl_PNGFilter::write(unsigned char* data, int len)
+{
+    int left = this->incoming - this->pos;
+    unsigned int offset = 0;
+    while (len >= left)
+    {
+	// finish off current row
+	memcpy(this->cur_row + this->pos, data + offset, left);
+	offset += left;
+	len -= left;
+
+	processRow();
+
+	// Swap rows
+	unsigned char* t = this->prev_row;
+	this->prev_row = this->cur_row;
+	this->cur_row = t ? t : this->buf2;
+	memset(this->cur_row, 0, this->columns + 1);
+	left = this->incoming;
+	this->pos = 0;
+    }
+    if (len)
+    {
+	memcpy(this->cur_row + this->pos, data + offset, len);
+    }
+    this->pos += len;
+}
+
+void
+Pl_PNGFilter::processRow()
+{
+    if (this->action == a_encode)
+    {
+	encodeRow();
+    }
+    else
+    {
+	decodeRow();
+    }
+}
+
+void
+Pl_PNGFilter::decodeRow()
+{
+    int filter = (int) this->cur_row[0];
+    if (this->prev_row)
+    {
+	switch (filter)
+	{
+	  case 0:			// none
+	    break;
+
+	  case 1:			// sub
+	    throw Exception("sub filter not implemented");
+	    break;
+
+	  case 2:			// up
+	    for (unsigned int i = 1; i <= this->columns; ++i)
+	    {
+		this->cur_row[i] += this->prev_row[i];
+	    }
+	    break;
+
+	  case 3:			// average
+	    throw Exception("average filter not implemented");
+	    break;
+
+	  case 4:			// Paeth
+	    throw Exception("Paeth filter not implemented");
+	    break;
+
+	  default:
+	    // ignore
+	    break;
+	}
+    }
+
+    getNext()->write(this->cur_row + 1, this->columns);
+}
+
+void
+Pl_PNGFilter::encodeRow()
+{
+    // For now, hard-code to using UP filter.
+    unsigned char ch = 2;
+    getNext()->write(&ch, 1);
+    if (this->prev_row)
+    {
+	for (unsigned int i = 0; i < this->columns; ++i)
+	{
+	    ch = this->cur_row[i] - this->prev_row[i];
+	    getNext()->write(&ch, 1);
+	}
+    }
+    else
+    {
+	getNext()->write(this->cur_row, this->columns);
+    }
+}
+
+void
+Pl_PNGFilter::finish()
+{
+    if (this->pos)
+    {
+	// write partial row
+	processRow();
+    }
+    this->prev_row = 0;
+    this->cur_row = buf1;
+    this->pos = 0;
+    memset(this->cur_row, 0, this->columns + 1);
+
+    getNext()->finish();
+}
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
new file mode 100644
index 00000000..63f0caaf
--- /dev/null
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -0,0 +1,179 @@
+
+#include <qpdf/Pl_QPDFTokenizer.hh>
+#include <qpdf/QPDF_String.hh>
+#include <qpdf/QPDF_Name.hh>
+
+Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
+    Pipeline(identifier, next),
+    newline_after_next_token(false),
+    just_wrote_nl(false),
+    last_char_was_cr(false),
+    unread_char(false),
+    char_to_unread('\0'),
+    pass_through(false)
+{
+}
+
+Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
+{
+}
+
+void
+Pl_QPDFTokenizer::writeNext(char const* buf, int len)
+{
+    if (len)
+    {
+	unsigned char* t = new unsigned char[len];
+	memcpy(t, buf, len);
+	getNext()->write(t, len);
+	delete [] t;
+	this->just_wrote_nl = (buf[len-1] == '\n');
+    }
+}
+
+void
+Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
+{
+    std::string value = token.getRawValue();
+
+    switch (token.getType())
+    {
+      case QPDFTokenizer::tt_string:
+	value = QPDF_String(token.getValue()).unparse();
+	break;
+
+      case QPDFTokenizer::tt_name:
+	value = QPDF_Name(token.getValue()).unparse();
+	break;
+
+      default:
+	break;
+    }
+    writeNext(value.c_str(), value.length());
+}
+
+void
+Pl_QPDFTokenizer::processChar(char ch)
+{
+    if (this->pass_through)
+    {
+	// We're not noramlizing anymore -- just write this without
+	// looking at it.
+	writeNext(&ch, 1);
+	return;
+    }
+
+    tokenizer.presentCharacter(ch);
+    QPDFTokenizer::Token token;
+    if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+    {
+	writeToken(token);
+	if (this->newline_after_next_token)
+	{
+	    writeNext("\n", 1);
+	    this->newline_after_next_token = false;
+	}
+	if ((token.getType() == QPDFTokenizer::tt_word) &&
+	    (token.getValue() == "BI"))
+	{
+	    // Uh oh.... we're not sophisticated enough to handle
+	    // inline images safely.  We'd have to to set up all the
+	    // filters and pipe the iamge data through it until the
+	    // filtered output was the right size for an image of the
+	    // specified dimensions.  Then we'd either have to write
+	    // out raw image data or continue to write filtered data,
+	    // resuming normalization when we get to the end.
+	    // Insetad, for now, we'll just turn off noramlization for
+	    // the remainder of this stream.
+	    this->pass_through = true;
+	    if (this->unread_char)
+	    {
+		writeNext(&this->char_to_unread, 1);
+		this->unread_char = false;
+	    }
+	}
+    }
+    else
+    {
+	bool suppress = false;
+	if ((ch == '\n') && (this->last_char_was_cr))
+	{
+	    // Always ignore \n following \r
+	    suppress = true;
+	}
+
+	if ((this->last_char_was_cr = (ch == '\r')))
+	{
+	    ch = '\n';
+	}
+
+	if (this->tokenizer.betweenTokens())
+	{
+	    if (! suppress)
+	    {
+		writeNext(&ch, 1);
+	    }
+	}
+	else
+	{
+	    if (ch == '\n')
+	    {
+		this->newline_after_next_token = true;
+	    }
+	}
+    }
+}
+
+
+void
+Pl_QPDFTokenizer::checkUnread()
+{
+    if (this->unread_char)
+    {
+	processChar(this->char_to_unread);
+	if (this->unread_char)
+	{
+	    throw QEXC::Internal("unread_char still true after processing "
+				 "unread character");
+	}
+    }
+}
+
+void
+Pl_QPDFTokenizer::write(unsigned char* buf, int len)
+{
+    checkUnread();
+    for (int i = 0; i < len; ++i)
+    {
+	processChar(buf[i]);
+	checkUnread();
+    }
+}
+
+void
+Pl_QPDFTokenizer::finish()
+{
+    this->tokenizer.presentEOF();
+    if (! this->pass_through)
+    {
+	QPDFTokenizer::Token token;
+	if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+	{
+	    writeToken(token);
+	    if (unread_char)
+	    {
+		if (this->char_to_unread == '\r')
+		{
+		    this->char_to_unread = '\n';
+		}
+		writeNext(&this->char_to_unread, 1);
+	    }
+	}
+    }
+    if (! this->just_wrote_nl)
+    {
+	writeNext("\n", 1);
+    }
+
+    getNext()->finish();
+}
diff --git a/libqpdf/Pl_RC4.cc b/libqpdf/Pl_RC4.cc
new file mode 100644
index 00000000..74e53c8b
--- /dev/null
+++ b/libqpdf/Pl_RC4.cc
@@ -0,0 +1,57 @@
+
+#include <qpdf/Pl_RC4.hh>
+
+#include <qpdf/QUtil.hh>
+
+Pl_RC4::Pl_RC4(char const* identifier, Pipeline* next,
+	       unsigned char const* key_data, int key_len,
+	       int out_bufsize) :
+    Pipeline(identifier, next),
+    out_bufsize(out_bufsize),
+    rc4(key_data, key_len)
+{
+    this->outbuf = new unsigned char[out_bufsize];
+}
+
+Pl_RC4::~Pl_RC4()
+{
+    if (this->outbuf)
+    {
+	delete [] this->outbuf;
+	this->outbuf = 0;
+    }
+}
+
+void
+Pl_RC4::write(unsigned char* data, int len)
+{
+    if (this->outbuf == 0)
+    {
+	throw Exception(
+	    this->identifier +
+	    ": Pl_RC4: write() called after finish() called");
+    }
+
+    int bytes_left = len;
+    unsigned char* p = data;
+
+    while (bytes_left > 0)
+    {
+	int bytes = (bytes_left < this->out_bufsize ? bytes_left : out_bufsize);
+	bytes_left -= bytes;
+	rc4.process(p, bytes, outbuf);
+	p += bytes;
+	getNext()->write(outbuf, bytes);
+    }
+}
+
+void
+Pl_RC4::finish()
+{
+    if (this->outbuf)
+    {
+	delete [] this->outbuf;
+	this->outbuf = 0;
+    }
+    this->getNext()->finish();
+}
diff --git a/libqpdf/Pl_StdioFile.cc b/libqpdf/Pl_StdioFile.cc
new file mode 100644
index 00000000..c0f42afd
--- /dev/null
+++ b/libqpdf/Pl_StdioFile.cc
@@ -0,0 +1,48 @@
+
+#include <qpdf/Pl_StdioFile.hh>
+
+#include <errno.h>
+
+Pl_StdioFile::Pl_StdioFile(char const* identifier, FILE* f) :
+    Pipeline(identifier, 0),
+    file(f)
+{
+}
+
+Pl_StdioFile::~Pl_StdioFile()
+{
+}
+
+void
+Pl_StdioFile::write(unsigned char* buf, int len)
+{
+    size_t so_far = 0;
+    while (len > 0)
+    {
+	so_far = fwrite(buf, 1, len, this->file);
+	if (so_far == 0)
+	{
+	    throw QEXC::System(this->identifier + ": Pl_StdioFile::write",
+			       errno);
+	}
+	else
+	{
+	    buf += so_far;
+	    len -= so_far;
+	}
+    }
+}
+
+void
+Pl_StdioFile::finish()
+{
+    if (fileno(this->file) != -1)
+    {
+	fflush(this->file);
+    }
+    else
+    {
+	throw QEXC::Internal(this->identifier +
+			     ": Pl_StdioFile::finish: stream already closed");
+    }
+}
diff --git a/libqpdf/QEXC.cc b/libqpdf/QEXC.cc
new file mode 100644
index 00000000..c65afbb6
--- /dev/null
+++ b/libqpdf/QEXC.cc
@@ -0,0 +1,67 @@
+
+#include <qpdf/QEXC.hh>
+#include <string.h>
+#include <errno.h>
+
+QEXC::Base::Base()
+{
+    // nothing needed
+}
+
+QEXC::Base::Base(std::string const& message) :
+    message(message)
+{
+    // nothing needed
+}
+
+std::string const&
+QEXC::Base::unparse() const
+{
+    return this->message;
+}
+
+void
+QEXC::Base::setMessage(std::string const& message)
+{
+    this->message = message;
+}
+
+const char*
+QEXC::Base::what() const throw()
+{
+    // Since unparse() returns a const string reference, its
+    // implementors must arrange to have it return a reference to a
+    // string that is not going to disappear.  It is therefore safe
+    // for us to return it's c_str() pointer.
+    return this->unparse().c_str();
+}
+
+QEXC::General::General()
+{
+    // nothing needed
+}
+
+QEXC::General::General(std::string const& message) :
+    Base(message)
+{
+    // nothing needed
+}
+
+QEXC::System::System(std::string const& prefix, int sys_errno)
+{
+    // Note: using sys_errno in case errno is a macro.
+    this->sys_errno = sys_errno;
+    this->setMessage(prefix + ": " + strerror(sys_errno));
+}
+
+int
+QEXC::System::getErrno() const
+{
+    return this->sys_errno;
+}
+
+QEXC::Internal::Internal(std::string const& message) :
+    Base("INTERNAL ERROR: " + message)
+{
+    // nothing needed
+}
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
new file mode 100644
index 00000000..6f51fa2c
--- /dev/null
+++ b/libqpdf/QPDF.cc
@@ -0,0 +1,1851 @@
+
+#include <qpdf/QPDF.hh>
+
+#include <vector>
+#include <map>
+#include <string.h>
+#include <memory.h>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/PCRE.hh>
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+
+void
+QPDF::InputSource::setLastOffset(off_t offset)
+{
+    this->last_offset = offset;
+}
+
+off_t
+QPDF::InputSource::getLastOffset() const
+{
+    return this->last_offset;
+}
+
+std::string
+QPDF::InputSource::readLine()
+{
+    // Read a line terminated by one or more \r or \n characters
+    // without caring what the exact terminator is.  Consume the
+    // trailing newline characters but don't return them.
+
+    off_t offset = this->tell();
+    std::string buf;
+    enum { st_before_nl, st_at_nl } state = st_before_nl;
+    char ch;
+    while (1)
+    {
+	size_t len = this->read(&ch, 1);
+	if (len == 0)
+	{
+	    break;
+	}
+
+	if (state == st_before_nl)
+	{
+	    if ((ch == '\012') || (ch == '\015'))
+	    {
+		state = st_at_nl;
+	    }
+	    else
+	    {
+		buf += ch;
+	    }
+	}
+	else if (state == st_at_nl)
+	{
+	    if ((ch == '\012') || (ch == '\015'))
+	    {
+		// do nothing
+	    }
+	    else
+	    {
+		// unread this character
+		this->unreadCh(ch);
+		break;
+	    }
+	}
+    }
+    // Override last offset to be where we started this line rather
+    // than before the last character read
+    this->last_offset = offset;
+    return buf;
+}
+
+QPDF::FileInputSource::FileInputSource() :
+    file(0)
+{
+}
+
+void
+QPDF::FileInputSource::setFilename(char const* filename)
+{
+    destroy();
+    this->filename = filename;
+    this->file = QUtil::fopen_wrapper(std::string("open ") + this->filename,
+				      fopen(this->filename.c_str(), "rb"));
+}
+
+QPDF::FileInputSource::~FileInputSource()
+{
+    destroy();
+}
+
+void
+QPDF::FileInputSource::destroy()
+{
+    if (this->file)
+    {
+	fclose(this->file);
+	this->file = 0;
+    }
+}
+
+std::string const&
+QPDF::FileInputSource::getName() const
+{
+    return this->filename;
+}
+
+off_t
+QPDF::FileInputSource::tell()
+{
+    return ftell(this->file);
+}
+
+void
+QPDF::FileInputSource::seek(off_t offset, int whence)
+{
+    QUtil::os_wrapper(std::string("seek to ") + this->filename + ", offset " +
+		      QUtil::int_to_string(offset) + " (" +
+		      QUtil::int_to_string(whence) + ")",
+		      fseek(this->file, offset, whence));
+}
+
+void
+QPDF::FileInputSource::rewind()
+{
+    ::rewind(this->file);
+}
+
+size_t
+QPDF::FileInputSource::read(char* buffer, int length)
+{
+    this->last_offset = ftell(this->file);
+    size_t len = fread(buffer, 1, length, this->file);
+    if ((len == 0) && ferror(this->file))
+    {
+	throw QPDFExc(this->filename, this->last_offset,
+		      std::string("read ") +
+		      QUtil::int_to_string(length) + " bytes");
+    }
+    return len;
+}
+
+void
+QPDF::FileInputSource::unreadCh(char ch)
+{
+    QUtil::os_wrapper(this->filename + ": unread character",
+		      ungetc((unsigned char)ch, this->file));
+}
+
+QPDF::BufferInputSource::BufferInputSource(std::string const& description,
+					   Buffer* buf) :
+    description(description),
+    buf(buf),
+    cur_offset(0)
+{
+}
+
+QPDF::BufferInputSource::~BufferInputSource()
+{
+}
+
+std::string const&
+QPDF::BufferInputSource::getName() const
+{
+    return this->description;
+}
+
+off_t
+QPDF::BufferInputSource::tell()
+{
+    return this->cur_offset;
+}
+
+void
+QPDF::BufferInputSource::seek(off_t offset, int whence)
+{
+    switch (whence)
+    {
+      case SEEK_SET:
+	this->cur_offset = offset;
+	break;
+
+      case SEEK_END:
+	this->cur_offset = this->buf->getSize() - offset;
+	break;
+
+      case SEEK_CUR:
+	this->cur_offset += offset;
+	break;
+
+      default:
+	throw QEXC::Internal("invalid argument to BufferInputSource::seek");
+	break;
+    }
+}
+
+void
+QPDF::BufferInputSource::rewind()
+{
+    this->cur_offset = 0;
+}
+
+size_t
+QPDF::BufferInputSource::read(char* buffer, int length)
+{
+    off_t end_pos = this->buf->getSize();
+    if (this->cur_offset >= end_pos)
+    {
+	this->last_offset = end_pos;
+	return 0;
+    }
+
+    this->last_offset = this->cur_offset;
+    size_t len = std::min((int)(end_pos - this->cur_offset), length);
+    memcpy(buffer, buf->getBuffer() + this->cur_offset, len);
+    this->cur_offset += len;
+    return len;
+}
+
+void
+QPDF::BufferInputSource::unreadCh(char ch)
+{
+    if (this->cur_offset > 0)
+    {
+	--this->cur_offset;
+    }
+}
+
+QPDF::ObjGen::ObjGen(int o = 0, int g = 0) :
+    obj(o),
+    gen(g)
+{
+}
+
+bool
+QPDF::ObjGen::ObjGen::operator<(ObjGen const& rhs) const
+{
+    return ((this->obj < rhs.obj) ||
+	    ((this->obj == rhs.obj) && (this->gen < rhs.gen)));
+}
+
+QPDF::QPDF() :
+    encrypted(false),
+    encryption_initialized(false),
+    ignore_xref_streams(false),
+    suppress_warnings(false),
+    attempt_recovery(true),
+    cached_key_objid(0),
+    cached_key_generation(0),
+    first_xref_item_offset(0),
+    uncompressed_after_compressed(false)
+{
+}
+
+QPDF::~QPDF()
+{
+}
+
+void
+QPDF::processFile(char const* filename, char const* password)
+{
+    this->file.setFilename(filename);
+    this->provided_password = password;
+    parse();
+}
+
+void
+QPDF::setIgnoreXRefStreams(bool val)
+{
+    this->ignore_xref_streams = val;
+}
+
+void
+QPDF::setSuppressWarnings(bool val)
+{
+    this->suppress_warnings = val;
+}
+
+void
+QPDF::setAttemptRecovery(bool val)
+{
+    this->attempt_recovery = val;
+}
+
+std::vector<std::string>
+QPDF::getWarnings()
+{
+    std::vector<std::string> result = this->warnings;
+    this->warnings.clear();
+    return result;
+}
+
+void
+QPDF::parse()
+{
+    static PCRE header_re("^%PDF-(1.\\d+)\\b");
+    static PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
+
+    std::string line = this->file.readLine();
+    PCRE::Match m1 = header_re.match(line.c_str());
+    if (m1)
+    {
+	this->pdf_version = m1.getMatch(1);
+	if (atof(this->pdf_version.c_str()) < 1.2)
+	{
+	    this->tokenizer.allowPoundAnywhereInName();
+	}
+    }
+    else
+    {
+	QTC::TC("qpdf", "QPDF not a pdf file");
+	throw QPDFExc(this->file.getName(), 0, "not a PDF file");
+    }
+
+    // PDF spec says %%EOF must be found within the last 1024 bytes of
+    // the file.  We add an extra 30 characters to leave room for the
+    // startxref stuff.
+    static int const tbuf_size = 1054;
+    this->file.seek(0, SEEK_END);
+    if (this->file.tell() > tbuf_size)
+    {
+	this->file.seek(-tbuf_size, SEEK_END);
+    }
+    else
+    {
+	this->file.rewind();
+    }
+    char* buf = new char[tbuf_size + 1];
+    // Put buf in a PointerHolder to guarantee deletion of buf.  This
+    // calls delete rather than delete [], but it's okay since buf is
+    // an array of fundamental types.
+    PointerHolder<char> b(buf);
+    memset(buf, '\0', tbuf_size + 1);
+    this->file.read(buf, tbuf_size);
+
+    // Since buf may contain null characters, we can't do a regexp
+    // search on buf directly.  Find the last occurrence within buf
+    // where the regexp matches.
+    char* p = buf;
+    char const* candidate = "";
+    while ((p = (char*)memchr(p, 's', tbuf_size - (p - buf))) != 0)
+    {
+	if (eof_re.match(p))
+	{
+	    candidate = p;
+	}
+	++p;
+    }
+
+    try
+    {
+	PCRE::Match m2 = eof_re.match(candidate);
+	if (! m2)
+	{
+	    QTC::TC("qpdf", "QPDF can't find startxref");
+	    throw QPDFExc(this->file.getName() + ": can't find startxref");
+	}
+	off_t xref_offset = atoi(m2.getMatch(1).c_str());
+	read_xref(xref_offset);
+    }
+    catch (QPDFExc& e)
+    {
+	if (this->attempt_recovery)
+	{
+	    reconstruct_xref(e);
+	    QTC::TC("qpdf", "QPDF reconstructed xref table");
+	}
+	else
+	{
+	    throw e;
+	}
+    }
+
+    initializeEncryption();
+}
+
+void
+QPDF::warn(QPDFExc const& e)
+{
+    this->warnings.push_back(e.unparse());
+    if (! this->suppress_warnings)
+    {
+	std::cerr << "WARNING: " << this->warnings.back() << std::endl;
+    }
+}
+
+void
+QPDF::setTrailer(QPDFObjectHandle obj)
+{
+    if (this->trailer.isInitialized())
+    {
+	return;
+    }
+    this->trailer = obj;
+}
+
+void
+QPDF::reconstruct_xref(QPDFExc& e)
+{
+    static PCRE obj_re("^(\\d+) (\\d+) obj\\b");
+    static PCRE endobj_re("^endobj\\b");
+    static PCRE trailer_re("^trailer\\b");
+
+    warn(QPDFExc(this->file.getName(), 0, "file is damaged"));
+    warn(e);
+    warn(QPDFExc("Attempting to reconstruct cross-reference table"));
+
+    this->file.seek(0, SEEK_END);
+    off_t eof = this->file.tell();
+    this->file.seek(0, SEEK_SET);
+    bool in_obj = false;
+    while (this->file.tell() < eof)
+    {
+	std::string line = this->file.readLine();
+	if (in_obj)
+	{
+	    if (endobj_re.match(line.c_str()))
+	    {
+		in_obj = false;
+	    }
+	}
+	else
+	{
+	    PCRE::Match m = obj_re.match(line.c_str());
+	    if (m)
+	    {
+		in_obj = true;
+		int obj = atoi(m.getMatch(1).c_str());
+		int gen = atoi(m.getMatch(2).c_str());
+		int offset = this->file.getLastOffset();
+		insertXrefEntry(obj, 1, offset, gen);
+	    }
+	    else if ((! this->trailer.isInitialized()) &&
+		     trailer_re.match(line.c_str()))
+	    {
+		// read "trailer"
+		this->file.seek(this->file.getLastOffset(), SEEK_SET);
+		readToken(&this->file);
+		QPDFObjectHandle t = readObject(&this->file, 0, 0, false);
+		if (! t.isDictionary())
+		{
+		    // Oh well.  It was worth a try.
+		}
+		else
+		{
+		    setTrailer(t);
+		}
+	    }
+	}
+    }
+
+    if (! this->trailer.isInitialized())
+    {
+	// We could check the last encountered object to see if it was
+	// an xref stream.  If so, we could try to get the trailer
+	// from there.  This may make it possible to recover files
+	// with bad startxref pointers even when they have object
+	// streams.
+
+	throw QPDFExc(this->file.getName() + ": unable to find trailer "
+		      "dictionary while recovering damanged file");
+    }
+
+    // We could iterate through the objects looking for streams and
+    // try to find objects inside of them, but it's probably not worth
+    // the trouble.  Acrobat can't recover files with any errors in an
+    // xref stream, and this would be a real long shot anyway.  If we
+    // wanted to do anything that involved looking at stream contents,
+    // we'd also have to call initializeEncryption() here.  It's safe
+    // to call it more than once.
+}
+
+void
+QPDF::read_xref(off_t xref_offset)
+{
+    std::map<int, int> free_table;
+    while (xref_offset)
+    {
+	this->file.seek(xref_offset, SEEK_SET);
+	std::string line = this->file.readLine();
+	if (line == "xref")
+	{
+	    xref_offset = read_xrefTable(this->file.tell());
+	}
+	else
+	{
+	    xref_offset = read_xrefStream(xref_offset);
+	}
+    }
+
+    int size = this->trailer.getKey("/Size").getIntValue();
+    int max_obj = (*(xref_table.rbegin())).first.obj;
+    if (! this->deleted_objects.empty())
+    {
+	max_obj = std::max(max_obj, *(this->deleted_objects.rbegin()));
+    }
+    if (size != max_obj + 1)
+    {
+	QTC::TC("qpdf", "QPDF xref size mismatch");
+	warn(QPDFExc(this->file.getName() +
+		     std::string(": reported number of objects (") +
+		     QUtil::int_to_string(size) +
+		     ") inconsistent with actual number of objects (" +
+		     QUtil::int_to_string(max_obj + 1) + ")"));
+    }
+
+    // We no longer need the deleted_objects table, so go ahead and
+    // clear it out to make sure we never depend on its being set.
+    this->deleted_objects.clear();
+}
+
+int
+QPDF::read_xrefTable(off_t xref_offset)
+{
+    static PCRE xref_first_re("^(\\d+)\\s+(\\d+)");
+    static PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
+
+    std::vector<ObjGen> deleted_items;
+
+    this->file.seek(xref_offset, SEEK_SET);
+    bool done = false;
+    while (! done)
+    {
+	std::string line = this->file.readLine();
+	PCRE::Match m1 = xref_first_re.match(line.c_str());
+	if (! m1)
+	{
+	    QTC::TC("qpdf", "QPDF invalid xref");
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "xref syntax invalid");
+	}
+	int obj = atoi(m1.getMatch(1).c_str());
+	int num = atoi(m1.getMatch(2).c_str());
+	static int const xref_entry_size = 20;
+	char xref_entry[xref_entry_size + 1];
+	for (int i = obj; i < obj + num; ++i)
+	{
+	    if (i == 0)
+	    {
+		// This is needed by checkLinearization()
+		this->first_xref_item_offset = this->file.tell();
+	    }
+	    memset(xref_entry, 0, sizeof(xref_entry));
+	    this->file.read(xref_entry, xref_entry_size);
+	    PCRE::Match m2 = xref_entry_re.match(xref_entry);
+	    if (! m2)
+	    {
+		QTC::TC("qpdf", "QPDF invalid xref entry");
+		throw QPDFExc(
+		    this->file.getName(), this->file.getLastOffset(),
+		    "invalid xref entry (obj=" +
+		    QUtil::int_to_string(i) + ")");
+	    }
+
+	    int f1 = atoi(m2.getMatch(1).c_str());
+	    int f2 = atoi(m2.getMatch(2).c_str());
+	    char type = m2.getMatch(3)[0];
+	    if (type == 'f')
+	    {
+		// Save deleted items until after we've checked the
+		// XRefStm, if any.
+		deleted_items.push_back(ObjGen(i, f2));
+	    }
+	    else
+	    {
+		insertXrefEntry(i, 1, f1, f2);
+	    }
+	}
+	off_t pos = this->file.tell();
+	QPDFTokenizer::Token t = readToken(&this->file);
+	if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))
+	{
+	    done = true;
+	}
+	else
+	{
+	    this->file.seek(pos, SEEK_SET);
+	}
+    }
+
+    // Set offset to previous xref table if any
+    QPDFObjectHandle cur_trailer = readObject(&this->file, 0, 0, false);
+    if (! cur_trailer.isDictionary())
+    {
+	QTC::TC("qpdf", "QPDF missing trailer");
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "expected trailer dictionary");
+    }
+
+    if (! this->trailer.isInitialized())
+    {
+	setTrailer(cur_trailer);
+
+	if (! this->trailer.hasKey("/Size"))
+	{
+	    QTC::TC("qpdf", "QPDF trailer lacks size");
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "trailer dictionary lacks /Size key");
+	}
+	if (! this->trailer.getKey("/Size").isInteger())
+	{
+	    QTC::TC("qpdf", "QPDF trailer size not integer");
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "/Size key in trailer dictionary is not "
+			  "an integer");
+	}
+    }
+
+    if (cur_trailer.hasKey("/XRefStm"))
+    {
+	if (this->ignore_xref_streams)
+	{
+	    QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
+	}
+	else
+	{
+	    if (cur_trailer.getKey("/XRefStm").isInteger())
+	    {
+		// Read the xref stream but disregard any return value
+		// -- we'll use our trailer's /Prev key instead of the
+		// xref stream's.
+		(void) read_xrefStream(
+		    cur_trailer.getKey("/XRefStm").getIntValue());
+	    }
+	    else
+	    {
+		throw QPDFExc(this->file.getName(), xref_offset,
+			      "invalid /XRefStm");
+	    }
+	}
+    }
+
+    // Handle any deleted items now that we've read the /XRefStm.
+    for (std::vector<ObjGen>::iterator iter = deleted_items.begin();
+	 iter != deleted_items.end(); ++iter)
+    {
+	ObjGen& og = *iter;
+	insertXrefEntry(og.obj, 0, 0, og.gen);
+    }
+
+    if (cur_trailer.hasKey("/Prev"))
+    {
+	if (! cur_trailer.getKey("/Prev").isInteger())
+	{
+	    QTC::TC("qpdf", "QPDF trailer prev not integer");
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "/Prev key in trailer dictionary is not "
+			  "an integer");
+	}
+	QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
+	xref_offset = cur_trailer.getKey("/Prev").getIntValue();
+    }
+    else
+    {
+	xref_offset = 0;
+    }
+
+    return xref_offset;
+}
+
+int
+QPDF::read_xrefStream(off_t xref_offset)
+{
+    bool found = false;
+    if (! this->ignore_xref_streams)
+    {
+	int xobj;
+	int xgen;
+	QPDFObjectHandle xref_obj;
+	try
+	{
+	    xref_obj = readObjectAtOffset(xref_offset, 0, 0, xobj, xgen);
+	}
+	catch (QPDFExc& e)
+	{
+	    // ignore -- report error below
+	}
+	if (xref_obj.isInitialized() &&
+	    xref_obj.isStream() &&
+	    xref_obj.getDict().getKey("/Type").isName() &&
+	    xref_obj.getDict().getKey("/Type").getName() == "/XRef")
+	{
+	    QTC::TC("qpdf", "QPDF found xref stream");
+	    found = true;
+	    xref_offset = processXRefStream(xref_offset, xref_obj);
+	}
+    }
+
+    if (! found)
+    {
+	QTC::TC("qpdf", "QPDF can't find xref");
+	throw QPDFExc(this->file.getName(), xref_offset, "xref not found");
+    }
+
+    return xref_offset;
+}
+
+int
+QPDF::processXRefStream(off_t xref_offset, QPDFObjectHandle& xref_obj)
+{
+    QPDFObjectHandle dict = xref_obj.getDict();
+    QPDFObjectHandle W_obj = dict.getKey("/W");
+    QPDFObjectHandle Index_obj = dict.getKey("/Index");
+    if (! (W_obj.isArray() &&
+	   (W_obj.getArrayNItems() >= 3) &&
+	   W_obj.getArrayItem(0).isInteger() &&
+	   W_obj.getArrayItem(1).isInteger() &&
+	   W_obj.getArrayItem(2).isInteger() &&
+	   dict.getKey("/Size").isInteger() &&
+	   (Index_obj.isArray() || Index_obj.isNull())))
+    {
+	throw QPDFExc(this->file.getName(), xref_offset,
+		      "Cross-reference stream does not have"
+		      " proper /W and /Index keys");
+    }
+    std::vector<int> indx;
+    if (Index_obj.isArray())
+    {
+	int n_index = Index_obj.getArrayNItems();
+	if ((n_index % 2) || (n_index < 2))
+	{
+	    throw QPDFExc(this->file.getName(), xref_offset,
+			  "Cross-reference stream's /Index has an"
+			  " invalid number of values");
+	}
+	for (int i = 0; i < n_index; ++i)
+	{
+	    if (Index_obj.getArrayItem(i).isInteger())
+	    {
+		indx.push_back(Index_obj.getArrayItem(i).getIntValue());
+	    }
+	    else
+	    {
+		throw QPDFExc(this->file.getName(), xref_offset,
+			      "Cross-reference stream's /Index's item " +
+			      QUtil::int_to_string(i) +
+			      " is not an integer");
+	    }
+	}
+	QTC::TC("qpdf", "QPDF xref /Index is array",
+		n_index == 2 ? 0 : 1);
+    }
+    else
+    {
+	QTC::TC("qpdf", "QPDF xref /Index is null");
+	int size = dict.getKey("/Size").getIntValue();
+	indx.push_back(0);
+	indx.push_back(size);
+    }
+
+    int num_entries = 0;
+    for (unsigned int i = 1; i < indx.size(); i += 2)
+    {
+	num_entries += indx[i];
+    }
+
+    int W[3];
+    int entry_size = 0;
+    for (int i = 0; i < 3; ++i)
+    {
+	W[i] = W_obj.getArrayItem(i).getIntValue();
+	entry_size += W[i];
+    }
+
+    int expected_size = entry_size * num_entries;
+
+    PointerHolder<Buffer> bp = xref_obj.getStreamData();
+    int actual_size = bp.getPointer()->getSize();
+
+    if (expected_size != actual_size)
+    {
+	throw QPDFExc(this->file.getName(), xref_offset,
+		      "Cross-reference stream data has the wrong size;"
+		      " expected = " + QUtil::int_to_string(expected_size) +
+		      "; actual = " + QUtil::int_to_string(actual_size));
+    }
+
+    int cur_chunk = 0;
+    int chunk_count = 0;
+
+    bool saw_first_compressed_object = false;
+
+    unsigned char const* data = bp.getPointer()->getBuffer();
+    for (int i = 0; i < num_entries; ++i)
+    {
+	// Read this entry
+	unsigned char const* entry = data + (entry_size * i);
+	int fields[3];
+	unsigned char const* p = entry;
+	for (int j = 0; j < 3; ++j)
+	{
+	    fields[j] = 0;
+	    if ((j == 0) && (W[0] == 0))
+	    {
+		QTC::TC("qpdf", "QPDF default for xref stream field 0");
+		fields[0] = 1;
+	    }
+	    for (int k = 0; k < W[j]; ++k)
+	    {
+		fields[j] <<= 8;
+		fields[j] += (int)(*p++);
+	    }
+	}
+
+	// Get the object and generation number.  The object number is
+	// based on /Index.  The generation number is 0 unless this is
+	// an uncompressed object record, in which case the generation
+	// number appears as the third field.
+	int obj = indx[cur_chunk] + chunk_count;
+	++chunk_count;
+	if (chunk_count >= indx[cur_chunk + 1])
+	{
+	    cur_chunk += 2;
+	    chunk_count = 0;
+	}
+
+	if (saw_first_compressed_object)
+	{
+	    if (fields[0] != 2)
+	    {
+		this->uncompressed_after_compressed = true;
+	    }
+	}
+	else if (fields[0] == 2)
+	{
+	    saw_first_compressed_object = true;
+	}
+	if (obj == 0)
+	{
+	    // This is needed by checkLinearization()
+	    this->first_xref_item_offset = xref_offset;
+	}
+	insertXrefEntry(obj, fields[0], fields[1], fields[2]);
+    }
+
+    if (! this->trailer.isInitialized())
+    {
+	setTrailer(dict);
+    }
+
+    if (dict.hasKey("/Prev"))
+    {
+	if (! dict.getKey("/Prev").isInteger())
+	{
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "/Prev key in xref stream dictionary is not "
+			  "an integer");
+	}
+	QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
+	xref_offset = dict.getKey("/Prev").getIntValue();
+    }
+    else
+    {
+	xref_offset = 0;
+    }
+
+    return xref_offset;
+}
+
+void
+QPDF::insertXrefEntry(int obj, int f0, int f1, int f2)
+{
+    // Populate the xref table in such a way that the first reference
+    // to an object that we see, which is the one in the latest xref
+    // table in which it appears, is the one that gets stored.
+
+    // If there is already an entry for this object and generation in
+    // the table, it means that a later xref table has registered this
+    // object.  Disregard this one.
+    { // private scope
+	int gen = (f0 == 2 ? 0 : f2);
+	ObjGen og(obj, gen);
+	if (this->xref_table.count(og))
+	{
+	    QTC::TC("qpdf", "QPDF xref reused object");
+	    return;
+	}
+	if (this->deleted_objects.count(obj))
+	{
+	    QTC::TC("qpdf", "QPDF xref deleted object");
+	    return;
+	}
+    }
+
+    switch (f0)
+    {
+      case 0:
+	this->deleted_objects.insert(obj);
+	break;
+
+      case 1:
+	// f2 is generation
+	QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
+	this->xref_table[ObjGen(obj, f2)] = QPDFXRefEntry(f0, f1, f2);
+	break;
+
+      case 2:
+	this->xref_table[ObjGen(obj, 0)] = QPDFXRefEntry(f0, f1, f2);
+	break;
+
+      default:
+	throw QPDFExc(this->file.getName(), 0,
+		      "unknown xref stream entry type " +
+		      QUtil::int_to_string(f0));
+	break;
+    }
+}
+
+void
+QPDF::showXRefTable()
+{
+    for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+	     this->xref_table.begin();
+	 iter != this->xref_table.end(); ++iter)
+    {
+	ObjGen const& og = (*iter).first;
+	QPDFXRefEntry const& entry = (*iter).second;
+	std::cout << og.obj << "/" << og.gen << ": ";
+	switch (entry.getType())
+	{
+	  case 1:
+	    std::cout << "uncompressed; offset = " << entry.getOffset();
+	    break;
+
+	  case 2:
+	    std::cout << "compressed; stream = " << entry.getObjStreamNumber()
+		      << ", index = " << entry.getObjStreamIndex();
+	    break;
+
+	  default:
+	    throw QEXC::Internal("unknown cross-reference table type while"
+				 " showing xref_table");
+	    break;
+	}
+	std::cout << std::endl;
+    }
+}
+
+QPDFObjectHandle
+QPDF::readObject(InputSource* input, int objid, int generation,
+		 bool in_object_stream)
+{
+    off_t offset = input->tell();
+    QPDFObjectHandle object = readObjectInternal(
+	input, objid, generation, in_object_stream, false, false);
+    // Override last_offset so that it points to the beginning of the
+    // object we just read
+    input->setLastOffset(offset);
+    return object;
+}
+
+QPDFObjectHandle
+QPDF::readObjectInternal(InputSource* input,
+			 int objid, int generation,
+			 bool in_object_stream,
+			 bool in_array, bool in_dictionary)
+{
+    if (in_dictionary && in_array)
+    {
+	// Although dictionaries and arrays arbitrarily nest, these
+	// variables indicate what is at the top of the stack right
+	// now, so they can, by definition, never both be true.
+	throw QEXC::Internal("readObjectInternal: in_dict && in_array");
+    }
+
+    QPDFObjectHandle object;
+
+    off_t offset = input->tell();
+    std::vector<QPDFObjectHandle> olist;
+    bool done = false;
+    while (! done)
+    {
+	object = QPDFObjectHandle();
+
+	QPDFTokenizer::Token token = readToken(input);
+
+	switch (token.getType())
+	{
+	  case QPDFTokenizer::tt_brace_open:
+	  case QPDFTokenizer::tt_brace_close:
+	    // Don't know what to do with these for now
+	    QTC::TC("qpdf", "QPDF bad brace");
+	    throw QPDFExc(input->getName(), input->getLastOffset(),
+			  "unexpected brace token");
+	    break;
+
+	  case QPDFTokenizer::tt_array_close:
+	    if (in_array)
+	    {
+		done = true;
+	    }
+	    else
+	    {
+		QTC::TC("qpdf", "QPDF bad array close");
+		throw QPDFExc(input->getName(), input->getLastOffset(),
+			      "unexpected array close token");
+	    }
+	    break;
+
+	  case QPDFTokenizer::tt_dict_close:
+	    if (in_dictionary)
+	    {
+		done = true;
+	    }
+	    else
+	    {
+		QTC::TC("qpdf", "QPDF bad dictionary close");
+		throw QPDFExc(input->getName(), input->getLastOffset(),
+			      "unexpected dictionary close token");
+	    }
+	    break;
+
+	  case QPDFTokenizer::tt_array_open:
+	    object = readObjectInternal(
+		input, objid, generation, in_object_stream, true, false);
+	    break;
+
+	  case QPDFTokenizer::tt_dict_open:
+	    object = readObjectInternal(
+		input, objid, generation, in_object_stream, false, true);
+	    break;
+
+	  case QPDFTokenizer::tt_bool:
+	    object = QPDFObjectHandle::newBool(
+		(token.getValue() == "true"));
+	    break;
+
+	  case QPDFTokenizer::tt_null:
+	    object = QPDFObjectHandle::newNull();
+	    break;
+
+	  case QPDFTokenizer::tt_integer:
+	    object = QPDFObjectHandle::newInteger(
+		atoi(token.getValue().c_str()));
+	    break;
+
+	  case QPDFTokenizer::tt_real:
+	    object = QPDFObjectHandle::newReal(token.getValue());
+	    break;
+
+	  case QPDFTokenizer::tt_name:
+	    object = QPDFObjectHandle::newName(token.getValue());
+	    break;
+
+	  case QPDFTokenizer::tt_word:
+	    {
+		std::string const& value = token.getValue();
+		if ((value == "R") && (in_array || in_dictionary) &&
+		    (olist.size() >= 2) &&
+		    (olist[olist.size() - 1].isInteger()) &&
+		    (olist[olist.size() - 2].isInteger()))
+		{
+		    // Try to resolve indirect objects
+		    object = QPDFObjectHandle::Factory::newIndirect(
+			this,
+			olist[olist.size() - 2].getIntValue(),
+			olist[olist.size() - 1].getIntValue());
+		    olist.pop_back();
+		    olist.pop_back();
+		}
+		else
+		{
+		    throw QPDFExc(input->getName(), input->getLastOffset(),
+				  "unknown token while reading object (" +
+				  value + ")");
+		}
+	    }
+	    break;
+
+	  case QPDFTokenizer::tt_string:
+	    {
+		std::string val = token.getValue();
+		if (this->encrypted && (! in_object_stream))
+		{
+		    decryptString(val, objid, generation);
+		}
+		object = QPDFObjectHandle::newString(val);
+	    }
+	    break;
+
+	  default:
+	    throw QPDFExc(input->getName(), input->getLastOffset(),
+			  "unknown token type while reading object");
+	    break;
+	}
+
+	if (in_dictionary || in_array)
+	{
+	    if (! done)
+	    {
+		olist.push_back(object);
+	    }
+	}
+	else if (! object.isInitialized())
+	{
+	    throw QEXC::Internal(std::string("uninitialized object (token = ") +
+				 QUtil::int_to_string(token.getType()) +
+				 ", " + token.getValue() + ")");
+	}
+	else
+	{
+	    done = true;
+	}
+    }
+
+    if (in_array)
+    {
+	object = QPDFObjectHandle::newArray(olist);
+    }
+    else if (in_dictionary)
+    {
+	// Convert list to map.  Alternating elements are keys.
+	std::map<std::string, QPDFObjectHandle> dict;
+	if (olist.size() % 2)
+	{
+	    QTC::TC("qpdf", "QPDF dictionary odd number of elements");
+	    throw QPDFExc(
+		input->getName(), input->getLastOffset(),
+		"dictionary ending here has an odd number of elements");
+	}
+	for (unsigned int i = 0; i < olist.size(); i += 2)
+	{
+	    QPDFObjectHandle key_obj = olist[i];
+	    QPDFObjectHandle val = olist[i + 1];
+	    if (! key_obj.isName())
+	    {
+		throw QPDFExc(
+		    input->getName(), offset,
+		    std::string("dictionary key not name (") +
+		    key_obj.unparse() + ")");
+	    }
+	    dict[key_obj.getName()] = val;
+	}
+	object = QPDFObjectHandle::newDictionary(dict);
+
+	if (! in_object_stream)
+	{
+	    // check for stream
+	    off_t cur_offset = input->tell();
+	    if (readToken(input) ==
+		QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
+	    {
+		// Kill to next actual newline.  Do not use readLine()
+		// here -- streams are a special case.  The next
+		// single newline character marks the end of the
+		// stream token.  It is incorrect to strip subsequent
+		// carriage returns or newlines as they may be part of
+		// the stream.
+		{
+		    char ch;
+		    do
+		    {
+			if (input->read(&ch, 1) == 0)
+			{
+			    // A premature EOF here will result in
+			    // some other problem that will get
+			    // reported at another time.
+			    ch = '\n';
+			}
+		    } while (ch != '\n');
+		}
+
+		// Must get offset before accessing any additional
+		// objects since resolving a previously unresolved
+		// indirect object will change file position.
+		off_t stream_offset = input->tell();
+		int length = 0;
+
+		try
+		{
+		    if (dict.count("/Length") == 0)
+		    {
+			QTC::TC("qpdf", "QPDF stream without length");
+			throw QPDFExc(input->getName(), offset,
+				      "stream dictionary lacks /Length key");
+		    }
+
+		    QPDFObjectHandle length_obj = dict["/Length"];
+		    if (! length_obj.isInteger())
+		    {
+			QTC::TC("qpdf", "QPDF stream length not integer");
+			throw QPDFExc(input->getName(), offset,
+				      "/Length key in stream dictionary is not "
+				      "an integer");
+		    }
+
+		    length = length_obj.getIntValue();
+		    input->seek(stream_offset + length, SEEK_SET);
+		    if (! (readToken(input) ==
+			   QPDFTokenizer::Token(
+			       QPDFTokenizer::tt_word, "endstream")))
+		    {
+			QTC::TC("qpdf", "QPDF missing endstream");
+			throw QPDFExc(input->getName(), input->getLastOffset(),
+				      "expected endstream");
+		    }
+		}
+		catch (QPDFExc& e)
+		{
+		    if (this->attempt_recovery)
+		    {
+			// may throw an exception
+			length = recoverStreamLength(
+			    input, objid, generation, stream_offset);
+		    }
+		    else
+		    {
+			throw e;
+		    }
+		}
+		object = QPDFObjectHandle::Factory::newStream(
+		    this, objid, generation, object, stream_offset, length);
+	    }
+	    else
+	    {
+		input->seek(cur_offset, SEEK_SET);
+	    }
+	}
+    }
+
+    return object;
+}
+
+int
+QPDF::recoverStreamLength(InputSource* input,
+			  int objid, int generation, off_t stream_offset)
+{
+    static PCRE endobj_re("^endobj\\b");
+
+    // Try to reconstruct stream length by looking for
+    // endstream(\r\n?|\n)endobj
+    warn(QPDFExc(input->getName(), stream_offset,
+		 "attempting to recover stream length"));
+
+    input->seek(0, SEEK_END);
+    off_t eof = input->tell();
+    input->seek(stream_offset, SEEK_SET);
+    std::string last_line;
+    off_t last_line_offset = 0;
+    int length = 0;
+    while (input->tell() < eof)
+    {
+	std::string line = input->readLine();
+	// Can't use regexp last_line since it might contain nulls
+	if (endobj_re.match(line.c_str()) &&
+	    (last_line.length() >= 9) &&
+	    (last_line.substr(last_line.length() - 9, 9) == "endstream"))
+	{
+	    // Stream probably ends right before "endstream", which
+	    // contains 9 characters.
+	    length = last_line_offset + last_line.length() - 9 - stream_offset;
+	    // Go back to where we would have been if we had just read
+	    // the endstream.
+	    input->seek(input->getLastOffset(), SEEK_SET);
+	    break;
+	}
+	last_line = line;
+	last_line_offset = input->getLastOffset();
+    }
+
+    if (length)
+    {
+	int this_obj_offset = 0;
+	ObjGen this_obj(0, 0);
+
+	// Make sure this is inside this object
+	for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+		 this->xref_table.begin();
+	     iter != this->xref_table.end(); ++iter)
+	{
+	    ObjGen const& og = (*iter).first;
+	    QPDFXRefEntry const& entry = (*iter).second;
+	    if (entry.getType() == 1)
+	    {
+		int obj_offset = entry.getOffset();
+		if ((obj_offset > stream_offset) &&
+		    ((this_obj_offset == 0) ||
+		     (this_obj_offset > obj_offset)))
+		{
+		    this_obj_offset = obj_offset;
+		    this_obj = og;
+		}
+	    }
+	}
+	if (this_obj_offset &&
+	    (this_obj.obj == objid) &&
+	    (this_obj.gen == generation))
+	{
+	    // Well, we found endstream\nendobj within the space
+	    // allowed for this object, so we're probably in good
+	    // shape.
+	}
+	else
+	{
+	    QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
+	}
+    }
+
+    if (length == 0)
+    {
+	throw QPDFExc(input->getName(), stream_offset,
+		      "unable to recover stream data");
+    }
+
+    QTC::TC("qpdf", "QPDF recovered stream length");
+    return length;
+}
+
+QPDFTokenizer::Token
+QPDF::readToken(InputSource* input)
+{
+    off_t offset = input->tell();
+    QPDFTokenizer::Token token;
+    bool unread_char;
+    char char_to_unread;
+    while (! this->tokenizer.getToken(token, unread_char, char_to_unread))
+    {
+	char ch;
+	if (input->read(&ch, 1) == 0)
+	{
+	    throw QPDFExc(input->getName(), offset, "EOF while reading token");
+	}
+	else
+	{
+	    if (isspace(ch) && (input->getLastOffset() == offset))
+	    {
+		++offset;
+	    }
+	    this->tokenizer.presentCharacter(ch);
+	}
+    }
+
+    if (unread_char)
+    {
+	input->unreadCh(char_to_unread);
+    }
+
+    if (token.getType() == QPDFTokenizer::tt_bad)
+    {
+	throw QPDFExc(input->getName(), offset, token.getErrorMessage());
+    }
+
+    input->setLastOffset(offset);
+
+    return token;
+}
+
+QPDFObjectHandle
+QPDF::readObjectAtOffset(off_t offset, int exp_objid, int exp_generation,
+			 int& objid, int& generation)
+{
+    this->file.seek(offset, SEEK_SET);
+
+    QPDFTokenizer::Token tobjid = readToken(&this->file);
+    QPDFTokenizer::Token tgen = readToken(&this->file);
+    QPDFTokenizer::Token tobj = readToken(&this->file);
+
+    bool objidok = (tobjid.getType() == QPDFTokenizer::tt_integer);
+    int genok = (tgen.getType() == QPDFTokenizer::tt_integer);
+    int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"));
+
+    QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
+    QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
+    QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
+
+    try
+    {
+	if (! (objidok && genok && objok))
+	{
+	    QTC::TC("qpdf", "QPDF expected n n obj");
+	    throw QPDFExc(this->file.getName(), offset, "expected n n obj");
+	}
+	objid = atoi(tobjid.getValue().c_str());
+	generation = atoi(tgen.getValue().c_str());
+
+	if (exp_objid &&
+	    (! ((objid == exp_objid) && (generation == exp_generation))))
+	{
+	    QTC::TC("qpdf", "QPDF err wrong objid/generation");
+	    throw QPDFExc(this->file.getName(), offset,
+			  std::string("expected ") +
+			  QUtil::int_to_string(exp_objid) + " " +
+			  QUtil::int_to_string(exp_generation) + " obj");
+	}
+    }
+    catch (QPDFExc& e)
+    {
+	if (exp_objid && this->attempt_recovery)
+	{
+	    // Try again after reconstructing xref table
+	    reconstruct_xref(e);
+	    ObjGen og(exp_objid, exp_generation);
+	    if (this->xref_table.count(og) &&
+		(this->xref_table[og].getType() == 1))
+	    {
+		off_t new_offset = this->xref_table[og].getOffset();
+		// Call readObjectAtOffset with 0 for exp_objid to
+		// avoid an infinite loop.
+		QPDFObjectHandle result =
+		    readObjectAtOffset(new_offset, 0, 0, objid, generation);
+		QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
+		return result;
+	    }
+	}
+	else
+	{
+	    throw e;
+	}
+    }
+
+    QPDFObjectHandle oh = readObject(
+	&this->file, objid, generation, false);
+
+    if (! (readToken(&this->file) ==
+	   QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj")))
+    {
+	QTC::TC("qpdf", "QPDF err expected endobj");
+	warn(QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		     "expected endobj"));
+    }
+
+    ObjGen og(objid, generation);
+    if (! this->obj_cache.count(og))
+    {
+	// Store the object in the cache here so it gets cached
+	// whether we first know the offset or whether we first know
+	// the object ID and generation (in which we case we would get
+	// here through resolve).
+
+	// Determine the end offset of this object before and after
+	// white space.  We use these numbers to validate
+	// linearization hint tables.  Offsets and lengths of objects
+	// may imply the end of an object to be anywhere between these
+	// values.
+	off_t end_before_space = this->file.tell();
+
+	// skip over spaces
+	while (true)
+	{
+	    char ch;
+	    if (this->file.read(&ch, 1))
+	    {
+		if (! isspace(ch))
+		{
+		    this->file.seek(-1, SEEK_CUR);
+		    break;
+		}
+	    }
+	    else
+	    {
+		throw QPDFExc(this->file.getName(), offset,
+			      "EOF after endobj");
+	    }
+	}
+	off_t end_after_space = this->file.tell();
+
+	this->obj_cache[og] =
+	    ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh),
+		     end_before_space, end_after_space);
+    }
+
+    return oh;
+}
+
+PointerHolder<QPDFObject>
+QPDF::resolve(int objid, int generation)
+{
+    // Check object cache before checking xref table.  This allows us
+    // to insert things into the object cache that don't actually
+    // exist in the file.
+    ObjGen og(objid, generation);
+    if (! this->obj_cache.count(og))
+    {
+	if (! this->xref_table.count(og))
+	{
+	    // PDF spec says unknown objects resolve to the null object.
+	    return new QPDF_Null;
+	}
+
+	QPDFXRefEntry const& entry = this->xref_table[og];
+	switch (entry.getType())
+	{
+	  case 1:
+	    {
+		off_t offset = entry.getOffset();
+		// Object stored in cache by readObjectAtOffset
+		int aobjid;
+		int ageneration;
+		QPDFObjectHandle oh =
+		    readObjectAtOffset(offset, objid, generation,
+				       aobjid, ageneration);
+	    }
+	    break;
+
+	  case 2:
+	    resolveObjectsInStream(entry.getObjStreamNumber());
+	    break;
+
+	  default:
+	    throw QPDFExc(this->file.getName(), 0,
+			  "object " +
+			  QUtil::int_to_string(objid) + "/" +
+			  QUtil::int_to_string(generation) +
+			  " has unexpected xref entry type");
+	}
+    }
+
+    return this->obj_cache[og].object;
+}
+
+void
+QPDF::resolveObjectsInStream(int obj_stream_number)
+{
+    // Force resolution of object stream
+    QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);
+    if (! obj_stream.isStream())
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "supposed object stream " +
+		      QUtil::int_to_string(obj_stream_number) +
+		      " is not a stream");
+    }
+
+    // For linearization data in the object, use the data from the
+    // object stream for the objects in the stream.
+    ObjGen stream_og(obj_stream_number, 0);
+    off_t end_before_space = this->obj_cache[stream_og].end_before_space;
+    off_t end_after_space = this->obj_cache[stream_og].end_after_space;
+
+    QPDFObjectHandle dict = obj_stream.getDict();
+    if (! (dict.getKey("/Type").isName() &&
+	   dict.getKey("/Type").getName() == "/ObjStm"))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "supposed object stream " +
+		      QUtil::int_to_string(obj_stream_number) +
+		      " has wrong type");
+    }
+
+    if (! (dict.getKey("/N").isInteger() &&
+	   dict.getKey("/First").isInteger()))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "object stream " +
+		      QUtil::int_to_string(obj_stream_number) +
+		      " has incorrect keys");
+    }
+
+    int n = dict.getKey("/N").getIntValue();
+    int first = dict.getKey("/First").getIntValue();
+
+    std::map<int, int> offsets;
+
+    PointerHolder<Buffer> bp = obj_stream.getStreamData();
+    BufferInputSource input(
+	"object stream " + QUtil::int_to_string(obj_stream_number),
+	bp.getPointer());
+
+    for (int i = 0; i < n; ++i)
+    {
+	QPDFTokenizer::Token tnum = readToken(&input);
+	QPDFTokenizer::Token toffset = readToken(&input);
+	if (! ((tnum.getType() == QPDFTokenizer::tt_integer) &&
+	       (toffset.getType() == QPDFTokenizer::tt_integer)))
+	{
+	    throw QPDFExc(input.getName(), input.getLastOffset(),
+			  "expected integer in object stream header");
+	}
+
+	int num = atoi(tnum.getValue().c_str());
+	int offset = atoi(toffset.getValue().c_str());
+	offsets[num] = offset + first;
+    }
+
+    for (std::map<int, int>::iterator iter = offsets.begin();
+	 iter != offsets.end(); ++iter)
+    {
+	int obj = (*iter).first;
+	int offset = (*iter).second;
+	input.seek(offset, SEEK_SET);
+	QPDFObjectHandle oh = readObject(&input, obj, 0, true);
+
+	// Store in cache
+	ObjGen og(obj, 0);
+
+	this->obj_cache[og] =
+	    ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh),
+		     end_before_space, end_after_space);
+    }
+}
+
+QPDFObjectHandle
+QPDF::makeIndirectObject(QPDFObjectHandle oh)
+{
+    ObjGen o1 = (*(this->obj_cache.rbegin())).first;
+    ObjGen o2 = (*(this->xref_table.rbegin())).first;
+    QTC::TC("qpdf", "QPDF indirect last obj from xref",
+	    (o2.obj > o1.obj) ? 1 : 0);
+    int max_objid = std::max(o1.obj, o2.obj);
+    ObjGen next(max_objid + 1, 0);
+    this->obj_cache[next] =
+	ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
+    return QPDFObjectHandle::Factory::newIndirect(this, next.obj, next.gen);
+}
+
+QPDFObjectHandle
+QPDF::getObjectByID(int objid, int generation)
+{
+    return QPDFObjectHandle::Factory::newIndirect(this, objid, generation);
+}
+
+void
+QPDF::trimTrailerForWrite()
+{
+    // Note that removing the encryption dictionary does not interfere
+    // with reading encrypted files.  QPDF loads all the information
+    // it needs from the encryption dictionary at the beginning and
+    // never looks at it again.
+    this->trailer.removeKey("/ID");
+    this->trailer.removeKey("/Encrypt");
+    this->trailer.removeKey("/Prev");
+
+    // Remove all trailer keys that potentially come from a
+    // cross-reference stream
+    this->trailer.removeKey("/Index");
+    this->trailer.removeKey("/W");
+    this->trailer.removeKey("/Length");
+    this->trailer.removeKey("/Filter");
+    this->trailer.removeKey("/DecodeParms");
+    this->trailer.removeKey("/Type");
+    this->trailer.removeKey("/XRefStm");
+}
+
+std::string
+QPDF::getFilename() const
+{
+    return this->file.getName();
+}
+
+std::string
+QPDF::getPDFVersion() const
+{
+    return this->pdf_version;
+}
+
+QPDFObjectHandle
+QPDF::getTrailer()
+{
+    return this->trailer;
+}
+
+QPDFObjectHandle
+QPDF::getRoot()
+{
+    return this->trailer.getKey("/Root");
+}
+
+void
+QPDF::getObjectStreamData(std::map<int, int>& omap)
+{
+    for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+	     this->xref_table.begin();
+	 iter != this->xref_table.end(); ++iter)
+    {
+	ObjGen const& og = (*iter).first;
+	QPDFXRefEntry const& entry = (*iter).second;
+	if (entry.getType() == 2)
+	{
+	    omap[og.obj] = entry.getObjStreamNumber();
+	}
+    }
+}
+
+std::vector<int>
+QPDF::getCompressibleObjects()
+{
+    // Return a set of object numbers of objects that are allowed to
+    // be in object streams.  We disregard generation numbers here
+    // since this is a helper function for QPDFWriter which is going
+    // to renumber objects anyway.  This code will do weird things if
+    // we have two objects with the same object number and different
+    // generations, but so do virtually all PDF consumers,
+    // particularly since this is not a permitted condition.
+
+    // We walk through the objects by traversing the document from the
+    // root, including a traversal of the pages tree.  This makes that
+    // objects that are on the same page are more likely to be in the
+    // same object stream, which is slightly more efficient,
+    // particularly with linearized files.  This is better than
+    // iterating through the xref table since it avoids preserving
+    // orphaned items.
+
+    // Exclude encryption dictionary, if any
+    int encryption_dict_id = 0;
+    QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
+    if (encryption_dict.isIndirect())
+    {
+	encryption_dict_id = encryption_dict.getObjectID();
+    }
+
+    std::set<int> visited;
+    std::list<QPDFObjectHandle> queue;
+    queue.push_front(this->trailer);
+    std::vector<int> result;
+    while (! queue.empty())
+    {
+	QPDFObjectHandle obj = queue.front();
+	queue.pop_front();
+	if (obj.isIndirect())
+	{
+	    int objid = obj.getObjectID();
+	    if (visited.count(objid))
+	    {
+		QTC::TC("qpdf", "QPDF loop detected traversing objects");
+		continue;
+	    }
+	    if (objid == encryption_dict_id)
+	    {
+		QTC::TC("qpdf", "QPDF exclude encryption dictionary");
+	    }
+	    else if (! obj.isStream())
+	    {
+		result.push_back(objid);
+	    }
+	    visited.insert(objid);
+	}
+	if (obj.isStream())
+	{
+	    QPDFObjectHandle dict = obj.getDict();
+	    std::set<std::string> keys = dict.getKeys();
+	    for (std::set<std::string>::reverse_iterator iter = keys.rbegin();
+		 iter != keys.rend(); ++iter)
+	    {
+		std::string const& key = *iter;
+		QPDFObjectHandle value = dict.getKey(key);
+		if (key == "/Length")
+		{
+		    // omit stream lengths
+		    if (value.isIndirect())
+		    {
+			QTC::TC("qpdf", "QPDF exclude indirect length");
+		    }
+		}
+		else
+		{
+		    queue.push_front(value);
+		}
+	    }
+	}
+	else if (obj.isDictionary())
+	{
+	    std::set<std::string> keys = obj.getKeys();
+	    for (std::set<std::string>::reverse_iterator iter = keys.rbegin();
+		 iter != keys.rend(); ++iter)
+	    {
+		queue.push_front(obj.getKey(*iter));
+	    }
+	}
+	else if (obj.isArray())
+	{
+	    int n = obj.getArrayNItems();
+	    for (int i = 1; i <= n; ++i)
+	    {
+		queue.push_front(obj.getArrayItem(n - i));
+	    }
+	}
+    }
+
+    return result;
+}
+
+void
+QPDF::pipeStreamData(int objid, int generation,
+		     off_t offset, size_t length,
+		     QPDFObjectHandle stream_dict,
+		     Pipeline* pipeline)
+{
+    std::vector<PointerHolder<Pipeline> > to_delete;
+    if (this->encrypted)
+    {
+	bool xref_stream = false;
+	if (stream_dict.getKey("/Type").isName() &&
+	    (stream_dict.getKey("/Type").getName() == "/XRef"))
+	{
+	    QTC::TC("qpdf", "QPDF piping xref stream from encrypted file");
+	    xref_stream = true;
+	}
+	if (! xref_stream)
+	{
+	    decryptStream(pipeline, objid, generation, to_delete);
+	}
+    }
+
+    this->file.seek(offset, SEEK_SET);
+    char buf[10240];
+    while (length > 0)
+    {
+	size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
+	size_t len = this->file.read(buf, to_read);
+	if (len == 0)
+	{
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "unexpected EOF reading stream data");
+	}
+	length -= len;
+	pipeline->write((unsigned char*)buf, len);
+    }
+    pipeline->finish();
+}
+
+std::vector<QPDFObjectHandle> const&
+QPDF::getAllPages()
+{
+    if (this->all_pages.empty())
+    {
+	getAllPagesInternal(
+	    this->trailer.getKey("/Root").getKey("/Pages"), this->all_pages);
+    }
+    return this->all_pages;
+}
+
+void
+QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
+			  std::vector<QPDFObjectHandle>& result)
+{
+    std::string type = cur_pages.getKey("/Type").getName();
+    if (type == "/Pages")
+    {
+	QPDFObjectHandle kids = cur_pages.getKey("/Kids");
+	int n = kids.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    getAllPagesInternal(kids.getArrayItem(i), result);
+	}
+    }
+    else if (type == "/Page")
+    {
+	result.push_back(cur_pages);
+    }
+    else
+    {
+	throw QPDFExc(this->file.getName() + ": invalid Type in page tree");
+    }
+}
diff --git a/libqpdf/QPDFExc.cc b/libqpdf/QPDFExc.cc
new file mode 100644
index 00000000..c7270677
--- /dev/null
+++ b/libqpdf/QPDFExc.cc
@@ -0,0 +1,20 @@
+
+#include <qpdf/QPDFExc.hh>
+
+#include <qpdf/QUtil.hh>
+
+QPDFExc::QPDFExc(std::string const& message) :
+    QEXC::General(message)
+{
+}
+
+QPDFExc::QPDFExc(std::string const& filename, int offset,
+		 std::string const& message) :
+    QEXC::General(filename + ": offset " + QUtil::int_to_string(offset) +
+		  ": " + message)
+{
+}
+
+QPDFExc::~QPDFExc() throw ()
+{
+}
diff --git a/libqpdf/QPDFObject.cc b/libqpdf/QPDFObject.cc
new file mode 100644
index 00000000..6c4963e2
--- /dev/null
+++ b/libqpdf/QPDFObject.cc
@@ -0,0 +1,2 @@
+
+#include <qpdf/QPDFObject.hh>
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
new file mode 100644
index 00000000..9fba7b43
--- /dev/null
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -0,0 +1,637 @@
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDF_Bool.hh>
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Integer.hh>
+#include <qpdf/QPDF_Real.hh>
+#include <qpdf/QPDF_Name.hh>
+#include <qpdf/QPDF_String.hh>
+#include <qpdf/QPDF_Array.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+#include <qpdf/QPDF_Stream.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QUtil.hh>
+
+QPDFObjectHandle::QPDFObjectHandle() :
+    initialized(false),
+    objid(0),
+    generation(0)
+{
+}
+
+QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, int objid, int generation) :
+    initialized(true),
+    qpdf(qpdf),
+    objid(objid),
+    generation(generation)
+{
+}
+
+QPDFObjectHandle::QPDFObjectHandle(QPDFObject* data) :
+    initialized(true),
+    qpdf(0),
+    objid(0),
+    generation(0),
+    obj(data)
+{
+}
+
+bool
+QPDFObjectHandle::isInitialized() const
+{
+    return this->initialized;
+}
+
+template <class T>
+class QPDFObjectTypeAccessor
+{
+  public:
+    static bool check(QPDFObject* o)
+    {
+	return (o && dynamic_cast<T*>(o));
+    }
+};
+
+bool
+QPDFObjectHandle::isBool()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Bool>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isNull()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isInteger()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Integer>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isReal()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Real>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isNumber()
+{
+    return (isInteger() || isReal());
+}
+
+double
+QPDFObjectHandle::getNumericValue()
+{
+    double result = 0.0;
+    if (isInteger())
+    {
+	result = getIntValue();
+    }
+    else if (isReal())
+    {
+	result = atof(getRealValue().c_str());
+    }
+    else
+    {
+	throw QEXC::Internal("getNumericValue called for non-numeric object");
+    }
+    return result;
+}
+
+bool
+QPDFObjectHandle::isName()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Name>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isString()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_String>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isArray()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Array>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isDictionary()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isStream()
+{
+    dereference();
+    return QPDFObjectTypeAccessor<QPDF_Stream>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isIndirect()
+{
+    assertInitialized();
+    return (this->objid != 0);
+}
+
+bool
+QPDFObjectHandle::isScalar()
+{
+    return (! (isArray() || isDictionary() || isStream()));
+}
+
+// Bool accessors
+
+bool
+QPDFObjectHandle::getBoolValue()
+{
+    assertType("Boolean", isBool());
+    return dynamic_cast<QPDF_Bool*>(obj.getPointer())->getVal();
+}
+
+// Integer accessors
+
+int
+QPDFObjectHandle::getIntValue()
+{
+    assertType("Integer", isInteger());
+    return dynamic_cast<QPDF_Integer*>(obj.getPointer())->getVal();
+}
+
+// Real accessors
+
+std::string
+QPDFObjectHandle::getRealValue()
+{
+    assertType("Real", isReal());
+    return dynamic_cast<QPDF_Real*>(obj.getPointer())->getVal();
+}
+
+// Name acessors
+
+std::string
+QPDFObjectHandle::getName()
+{
+    assertType("Name", isName());
+    return dynamic_cast<QPDF_Name*>(obj.getPointer())->getName();
+}
+
+// String accessors
+
+std::string
+QPDFObjectHandle::getStringValue()
+{
+    assertType("String", isString());
+    return dynamic_cast<QPDF_String*>(obj.getPointer())->getVal();
+}
+
+std::string
+QPDFObjectHandle::getUTF8Value()
+{
+    assertType("String", isString());
+    return dynamic_cast<QPDF_String*>(obj.getPointer())->getUTF8Val();
+}
+
+// Array acessors
+
+int
+QPDFObjectHandle::getArrayNItems()
+{
+    assertType("Array", isArray());
+    return dynamic_cast<QPDF_Array*>(obj.getPointer())->getNItems();
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::getArrayItem(int n)
+{
+    assertType("Array", isArray());
+    return dynamic_cast<QPDF_Array*>(obj.getPointer())->getItem(n);
+}
+
+// Array mutators
+
+void
+QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item)
+{
+    assertType("Array", isArray());
+    return dynamic_cast<QPDF_Array*>(obj.getPointer())->setItem(n, item);
+}
+
+// Dictionary accesors
+
+bool
+QPDFObjectHandle::hasKey(std::string const& key)
+{
+    assertType("Dictionary", isDictionary());
+    return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->hasKey(key);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::getKey(std::string const& key)
+{
+    assertType("Dictionary", isDictionary());
+    return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKey(key);
+}
+
+std::set<std::string>
+QPDFObjectHandle::getKeys()
+{
+    assertType("Dictionary", isDictionary());
+    return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKeys();
+}
+
+// Dictionary mutators
+
+void
+QPDFObjectHandle::replaceKey(std::string const& key,
+			    QPDFObjectHandle const& value)
+{
+    assertType("Dictionary", isDictionary());
+    return dynamic_cast<QPDF_Dictionary*>(
+	obj.getPointer())->replaceKey(key, value);
+}
+
+void
+QPDFObjectHandle::removeKey(std::string const& key)
+{
+    assertType("Dictionary", isDictionary());
+    return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->removeKey(key);
+}
+
+// Stream accessors
+QPDFObjectHandle
+QPDFObjectHandle::getDict()
+{
+    assertType("Stream", isStream());
+    return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
+}
+
+PointerHolder<Buffer>
+QPDFObjectHandle::getStreamData()
+{
+    assertType("Stream", isStream());
+    return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData();
+}
+
+bool
+QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
+				 bool normalize, bool compress)
+{
+    assertType("Stream", isStream());
+    return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
+	p, filter, normalize, compress);
+}
+
+int
+QPDFObjectHandle::getObjectID() const
+{
+    return this->objid;
+}
+
+int
+QPDFObjectHandle::getGeneration() const
+{
+    return this->generation;
+}
+
+std::map<std::string, QPDFObjectHandle>
+QPDFObjectHandle::getPageImages()
+{
+    assertPageObject();
+
+    // Note: this code doesn't handle inherited resources.  If this
+    // page dictionary doesn't have a /Resources key or has one whose
+    // value is null or an empty dictionary, you are supposed to walk
+    // up the page tree until you find a /Resources dictionary.  As of
+    // this writing, I don't have any test files that use inherited
+    // resources, and hand-generating one won't be a good test beacuse
+    // any mistakes in my understanding would be present in both the
+    // code and the test file.
+
+    // NOTE: If support of inherited resources (see above comment) is
+    // implemented, edit comment in QPDFObjectHandle.hh for this
+    // function.
+
+    std::map<std::string, QPDFObjectHandle> result;
+    if (this->hasKey("/Resources"))
+    {
+	QPDFObjectHandle resources = this->getKey("/Resources");
+	if (resources.hasKey("/XObject"))
+	{
+	    QPDFObjectHandle xobject = resources.getKey("/XObject");
+	    std::set<std::string> keys = xobject.getKeys();
+	    for (std::set<std::string>::iterator iter = keys.begin();
+		 iter != keys.end(); ++iter)
+	    {
+		std::string key = (*iter);
+		QPDFObjectHandle value = xobject.getKey(key);
+		if (value.isStream())
+		{
+		    QPDFObjectHandle dict = value.getDict();
+		    if (dict.hasKey("/Subtype") &&
+			(dict.getKey("/Subtype").getName() == "/Image") &&
+			(! dict.hasKey("/ImageMask")))
+		    {
+			result[key] = value;
+		    }
+		}
+	    }
+	}
+    }
+
+    return result;
+}
+
+std::vector<QPDFObjectHandle>
+QPDFObjectHandle::getPageContents()
+{
+    assertPageObject();
+
+    std::vector<QPDFObjectHandle> result;
+    QPDFObjectHandle contents = this->getKey("/Contents");
+    if (contents.isArray())
+    {
+	int n_items = contents.getArrayNItems();
+	for (int i = 0; i < n_items; ++i)
+	{
+	    QPDFObjectHandle item = contents.getArrayItem(i);
+	    if (item.isStream())
+	    {
+		result.push_back(item);
+	    }
+	    else
+	    {
+		throw QEXC::General("unknown item type while inspecting "
+				    "element of /Contents array in page "
+				    "dictionary");
+	    }
+	}
+    }
+    else if (contents.isStream())
+    {
+	result.push_back(contents);
+    }
+    else
+    {
+	throw QEXC::General("unknown object type inspecting /Contents "
+			    "key in page dictionary");
+    }
+
+    return result;
+}
+
+std::string
+QPDFObjectHandle::unparse()
+{
+    std::string result;
+    if (this->isIndirect())
+    {
+	result = QUtil::int_to_string(this->objid) + " " +
+	    QUtil::int_to_string(this->generation) + " R";
+    }
+    else
+    {
+	result = unparseResolved();
+    }
+    return result;
+}
+
+std::string
+QPDFObjectHandle::unparseResolved()
+{
+    dereference();
+    return this->obj.getPointer()->unparse();
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
+{
+    return QPDFObjectHandle(qpdf, objid, generation);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newBool(bool value)
+{
+    return QPDFObjectHandle(new QPDF_Bool(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newNull()
+{
+    return QPDFObjectHandle(new QPDF_Null());
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newInteger(int value)
+{
+    return QPDFObjectHandle(new QPDF_Integer(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newReal(std::string const& value)
+{
+    return QPDFObjectHandle(new QPDF_Real(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newName(std::string const& name)
+{
+    return QPDFObjectHandle(new QPDF_Name(name));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newString(std::string const& str)
+{
+    return QPDFObjectHandle(new QPDF_String(str));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newArray(std::vector<QPDFObjectHandle> const& items)
+{
+    return QPDFObjectHandle(new QPDF_Array(items));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newDictionary(
+    std::map<std::string, QPDFObjectHandle> const& items)
+{
+    return QPDFObjectHandle(new QPDF_Dictionary(items));
+}
+
+
+QPDFObjectHandle
+QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation,
+			    QPDFObjectHandle stream_dict,
+			    off_t offset, int length)
+{
+    return QPDFObjectHandle(new QPDF_Stream(
+				qpdf, objid, generation,
+				stream_dict, offset, length));
+}
+
+void
+QPDFObjectHandle::makeDirectInternal(std::set<int>& visited)
+{
+    assertInitialized();
+
+    if (isStream())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle ERR clone stream");
+	throw QEXC::General("attempt to make a stream into a direct object");
+    }
+
+    int cur_objid = this->objid;
+    if (cur_objid != 0)
+    {
+	if (visited.count(cur_objid))
+	{
+	    QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
+	    throw QEXC::General("loop detected while converting object from "
+				"indirect to direct");
+	}
+	visited.insert(cur_objid);
+    }
+
+    dereference();
+    this->objid = 0;
+    this->generation = 0;
+
+    QPDFObject* new_obj = 0;
+
+    if (isBool())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone bool");
+	new_obj = new QPDF_Bool(getBoolValue());
+    }
+    else if (isNull())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone null");
+	new_obj = new QPDF_Null();
+    }
+    else if (isInteger())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone integer");
+	new_obj = new QPDF_Integer(getIntValue());
+    }
+    else if (isReal())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone real");
+	new_obj = new QPDF_Real(getRealValue());
+    }
+    else if (isName())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone name");
+	new_obj = new QPDF_Name(getName());
+    }
+    else if (isString())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone string");
+	new_obj = new QPDF_String(getStringValue());
+    }
+    else if (isArray())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone array");
+	std::vector<QPDFObjectHandle> items;
+	int n = getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    items.push_back(getArrayItem(i));
+	    items.back().makeDirectInternal(visited);
+	}
+	new_obj = new QPDF_Array(items);
+    }
+    else if (isDictionary())
+    {
+	QTC::TC("qpdf", "QPDFObjectHandle clone dictionary");
+	std::set<std::string> keys = getKeys();
+	std::map<std::string, QPDFObjectHandle> items;
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    items[*iter] = getKey(*iter);
+	    items[*iter].makeDirectInternal(visited);
+	}
+	new_obj = new QPDF_Dictionary(items);
+    }
+    else
+    {
+	throw QEXC::Internal("QPDFObjectHandle::makeIndirect: "
+			     "unknown object type");
+    }
+
+    this->obj = new_obj;
+
+    if (cur_objid)
+    {
+	visited.erase(cur_objid);
+    }
+}
+
+void
+QPDFObjectHandle::makeDirect()
+{
+    std::set<int> visited;
+    makeDirectInternal(visited);
+}
+
+void
+QPDFObjectHandle::assertInitialized() const
+{
+    if (! this->initialized)
+    {
+	throw QEXC::Internal("operation attempted on uninitialized "
+			     "QPDFObjectHandle");
+    }
+}
+
+void
+QPDFObjectHandle::assertType(char const* type_name, bool istype)
+{
+    if (! istype)
+    {
+	throw QEXC::Internal(std::string("operation for ") + type_name +
+			     " object attempted on object of wrong type");
+    }
+}
+
+void
+QPDFObjectHandle::assertPageObject()
+{
+    if (! (this->isDictionary() && this->hasKey("/Type") &&
+	   (this->getKey("/Type").getName() == "/Page")))
+    {
+	throw QEXC::Internal("page operation called on non-Page object");
+    }
+}
+
+void
+QPDFObjectHandle::dereference()
+{
+    if (this->obj.getPointer() == 0)
+    {
+	this->obj = QPDF::Resolver::resolve(
+	    this->qpdf, this->objid, this->generation);
+	if (this->obj.getPointer() == 0)
+	{
+	    QTC::TC("qpdf", "QPDFObjectHandle indirect to unknown");
+	    this->obj = new QPDF_Null();
+	}
+    }
+}
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
new file mode 100644
index 00000000..4eed6f16
--- /dev/null
+++ b/libqpdf/QPDFTokenizer.cc
@@ -0,0 +1,458 @@
+
+#include <qpdf/QPDFTokenizer.hh>
+
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+
+#include <qpdf/PCRE.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+
+// See note above about ctype.
+static bool is_hex_digit(char ch)
+{
+    return (strchr("0123456789abcdefABCDEF", ch) != 0);
+}
+
+QPDFTokenizer::QPDFTokenizer() :
+    pound_special_in_name(true)
+{
+    reset();
+}
+
+void
+QPDFTokenizer::allowPoundAnywhereInName()
+{
+    QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name");
+    this->pound_special_in_name = false;
+}
+
+void
+QPDFTokenizer::reset()
+{
+    state = st_top;
+    type = tt_bad;
+    val = "";
+    raw_val = "";
+    error_message = "";
+    unread_char = false;
+    char_to_unread = '\0';
+    string_depth = 0;
+    string_ignoring_newline = false;
+    last_char_was_bs = false;
+}
+
+void
+QPDFTokenizer::presentCharacter(char ch)
+{
+    static PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$");
+
+    if (state == st_token_ready)
+    {
+	throw QEXC::Internal("QPDF tokenizer presented character "
+			     "while token is waiting");
+    }
+
+    char orig_ch = ch;
+
+    // State machine is implemented such that some characters may be
+    // handled more than once.  This happens whenever you have to use
+    // the character that caused a state change in the new state.
+
+    bool handled = true;
+    if (state == st_top)
+    {
+	// Note: we specifically do not use ctype here.  It is
+	// locale-dependent.
+	if (strchr(" \t\n\v\f\r", ch))
+	{
+	    // ignore
+	}
+	else if (ch == '%')
+	{
+	    // Discard comments
+	    state = st_in_comment;
+	}
+	else if (ch == '(')
+	{
+	    string_depth = 1;
+	    string_ignoring_newline = false;
+	    memset(bs_num_register, '\0', sizeof(bs_num_register));
+	    last_char_was_bs = false;
+	    state = st_in_string;
+	}
+	else if (ch == '<')
+	{
+	    state = st_lt;
+	}
+	else if (ch == '>')
+	{
+	    state = st_gt;
+	}
+	else
+	{
+	    val += ch;
+	    if (ch == ')')
+	    {
+		type = tt_bad;
+		QTC::TC("qpdf", "QPDF_Tokenizer bad )");
+		error_message = "unexpected )";
+		state = st_token_ready;
+	    }
+	    else if (ch == '[')
+	    {
+		type = tt_array_open;
+		state = st_token_ready;
+	    }
+	    else if (ch == ']')
+	    {
+		type = tt_array_close;
+		state = st_token_ready;
+	    }
+	    else if (ch == '{')
+	    {
+		type = tt_brace_open;
+		state = st_token_ready;
+	    }
+	    else if (ch == '}')
+	    {
+		type = tt_brace_close;
+		state = st_token_ready;
+	    }
+	    else
+	    {
+		state = st_literal;
+	    }
+	}
+    }
+    else if (state == st_in_comment)
+    {
+	if ((ch == '\r') || (ch == '\n'))
+	{
+	    state = st_top;
+	}
+    }
+    else if (state == st_lt)
+    {
+	if (ch == '<')
+	{
+	    val = "<<";
+	    type = tt_dict_open;
+	    state = st_token_ready;
+	}
+	else
+	{
+	    handled = false;
+	    state = st_in_hexstring;
+	}
+    }
+    else if (state == st_gt)
+    {
+	if (ch == '>')
+	{
+	    val = ">>";
+	    type = tt_dict_close;
+	    state = st_token_ready;
+	}
+	else
+	{
+	    val = ">";
+	    type = tt_bad;
+	    QTC::TC("qpdf", "QPDF_Tokenizer bad >");
+	    error_message = "unexpected >";
+	    unread_char = true;
+	    char_to_unread = ch;
+	    state = st_token_ready;
+	}
+    }
+    else if (state == st_in_string)
+    {
+	if (string_ignoring_newline && (! ((ch == '\r') || (ch == '\n'))))
+	{
+	    string_ignoring_newline = false;
+	}
+
+	unsigned int bs_num_count = strlen(bs_num_register);
+	bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
+	if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal)))
+	{
+	    // We've accumulated \ddd.  PDF Spec says to ignore
+	    // high-order overflow.
+	    val += (char) strtol(bs_num_register, 0, 8);
+	    memset(bs_num_register, '\0', sizeof(bs_num_register));
+	    bs_num_count = 0;
+	}
+
+	if (string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
+	{
+	    // ignore
+	}
+	else if (ch_is_octal && (last_char_was_bs || (bs_num_count > 0)))
+	{
+	    bs_num_register[bs_num_count++] = ch;
+	}
+	else if (last_char_was_bs)
+	{
+	    switch (ch)
+	    {
+	      case 'n':
+		val += '\n';
+		break;
+
+	      case 'r':
+		val += '\r';
+		break;
+
+	      case 't':
+		val += '\t';
+		break;
+
+	      case 'b':
+		val += '\b';
+		break;
+
+	      case 'f':
+		val += '\f';
+		break;
+
+	      case '\r':
+	      case '\n':
+		string_ignoring_newline = true;
+		break;
+
+	      default:
+		// PDF spec says backslash is ignored before anything else
+		val += ch;
+		break;
+	    }
+	}
+	else if (ch == '\\')
+	{
+	    // last_char_was_bs is set/cleared below as appropriate
+	    if (bs_num_count)
+	    {
+		throw QEXC::Internal("QPDFTokenizer: bs_num_count != 0 "
+				     "when ch == '\\'");
+	    }
+	}
+	else if (ch == '(')
+	{
+	    val += ch;
+	    ++string_depth;
+	}
+	else if ((ch == ')') && (--string_depth == 0))
+	{
+	    type = tt_string;
+	    state = st_token_ready;
+	}
+	else
+	{
+	    val += ch;
+	}
+
+	last_char_was_bs = ((! last_char_was_bs) && (ch == '\\'));
+    }
+    else if (state == st_literal)
+    {
+	if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
+	{
+	    // A C-loacle whitespace character or delimiter terminates
+	    // token.  It is important to unread the whitespace
+	    // character even though it is ignored since it may be the
+	    // newline after a stream keyword.  Removing it here could
+	    // make the stream-reading code break on some files,
+	    // though not on any files in the test suite as of this
+	    // writing.
+
+	    type = tt_word;
+	    unread_char = true;
+	    char_to_unread = ch;
+	    state = st_token_ready;
+	}
+	else
+	{
+	    val += ch;
+	}
+    }
+    else
+    {
+	handled = false;
+    }
+
+
+    if (handled)
+    {
+	// okay
+    }
+    else if (state == st_in_hexstring)
+    {
+	if (ch == '>')
+	{
+	    type = tt_string;
+	    state = st_token_ready;
+	    if (val.length() % 2)
+	    {
+		// PDF spec says odd hexstrings have implicit
+		// trailing 0.
+		val += '0';
+	    }
+	    char num[3];
+	    num[2] = '\0';
+	    std::string nval;
+	    for (unsigned int i = 0; i < val.length(); i += 2)
+	    {
+		num[0] = val[i];
+		num[1] = val[i+1];
+		char nch = (char)(strtol(num, 0, 16));
+		nval += nch;
+	    }
+	    val = nval;
+	}
+	else if (is_hex_digit(ch))
+	{
+	    val += ch;
+	}
+	else if (strchr(" \t\n\v\f\r", ch))
+	{
+	    // ignore
+	}
+	else
+	{
+	    type = tt_bad;
+	    QTC::TC("qpdf", "QPDF_Tokenizer bad (");
+	    error_message = std::string("invalid character (") +
+		ch + ") in hexstring";
+	    state = st_token_ready;
+	}
+    }
+    else
+    {
+	throw QEXC::Internal("invalid state while reading token");
+    }
+
+    if ((state == st_token_ready) && (type == tt_word))
+    {
+	if ((val.length() > 0) && (val[0] == '/'))
+	{
+	    type = tt_name;
+	    // Deal with # in name token.  Note: '/' by itself is a
+	    // valid name, so don't strip leading /.  That way we
+	    // don't have to deal with the empty string as a name.
+	    std::string nval = "/";
+	    char const* valstr = val.c_str() + 1;
+	    for (char const* p = valstr; *p; ++p)
+	    {
+		if ((*p == '#') && this->pound_special_in_name)
+		{
+		    if (p[1] && p[2] &&
+			is_hex_digit(p[1]) && is_hex_digit(p[2]))
+		    {
+			char num[3];
+			num[0] = p[1];
+			num[1] = p[2];
+			num[2] = '\0';
+			char ch = (char)(strtol(num, 0, 16));
+			if (ch == '\0')
+			{
+			    type = tt_bad;
+			    QTC::TC("qpdf", "QPDF_Tokenizer null in name");
+			    error_message =
+				"null character not allowed in name token";
+			    nval += "#00";
+			}
+			else
+			{
+			    nval += ch;
+			}
+			p += 2;
+		    }
+		    else
+		    {
+			QTC::TC("qpdf", "QPDF_Tokenizer bad name");
+			type = tt_bad;
+			error_message = "invalid name token";
+			nval += *p;
+		    }
+		}
+		else
+		{
+		    nval += *p;
+		}
+	    }
+	    val = nval;
+	}
+	else if (num_re.match(val.c_str()))
+	{
+	    if (val.find('.') != std::string::npos)
+	    {
+		type = tt_real;
+	    }
+	    else
+	    {
+		type = tt_integer;
+	    }
+	}
+	else if ((val == "true") || (val == "false"))
+	{
+	    type = tt_bool;
+	}
+	else if (val == "null")
+	{
+	    type = tt_null;
+	}
+	else
+	{
+	    // I don't really know what it is, so leave it as tt_word.
+	    // Lots of cases ($, #, etc.) other than actual words fall
+	    // into this category, but that's okay at least for now.
+	    type = tt_word;
+	}
+    }
+
+    if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))
+    {
+	this->raw_val += orig_ch;
+    }
+}
+
+void
+QPDFTokenizer::presentEOF()
+{
+    switch (state)
+    {
+      case st_token_ready:
+      case st_top:
+	// okay
+	break;
+
+      case st_in_comment:
+	state = st_top;
+	break;
+
+      default:
+	type = tt_bad;
+	error_message = "EOF while reading token";
+	state = st_token_ready;
+    }
+}
+
+bool
+QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
+{
+    bool ready = (this->state == st_token_ready);
+    unread_char = this->unread_char;
+    ch = this->char_to_unread;
+    if (ready)
+    {
+	token = Token(type, val, raw_val, error_message);
+	reset();
+    }
+    return ready;
+}
+
+bool
+QPDFTokenizer::betweenTokens()
+{
+    return ((state == st_top) || (state == st_in_comment));
+}
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
new file mode 100644
index 00000000..0a611eb9
--- /dev/null
+++ b/libqpdf/QPDFWriter.cc
@@ -0,0 +1,2021 @@
+
+#include <qpdf/QPDFWriter.hh>
+
+#include <assert.h>
+#include <qpdf/Pl_StdioFile.hh>
+#include <qpdf/Pl_Count.hh>
+#include <qpdf/Pl_Discard.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_PNGFilter.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/MD5.hh>
+#include <qpdf/RC4.hh>
+#include <qpdf/QTC.hh>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/QPDF_Name.hh>
+#include <qpdf/QPDF_String.hh>
+
+QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
+    pdf(pdf),
+    filename(filename),
+    file(0),
+    close_file(false),
+    normalize_content_set(false),
+    normalize_content(false),
+    stream_data_mode_set(false),
+    stream_data_mode(s_compress),
+    qdf_mode(false),
+    static_id(false),
+    direct_stream_lengths(true),
+    encrypted(false),
+    preserve_encryption(true),
+    linearized(false),
+    object_stream_mode(o_preserve),
+    encryption_dict_objid(0),
+    next_objid(1),
+    cur_stream_length_id(0),
+    cur_stream_length(0),
+    added_newline(false),
+    max_ostream_index(0)
+{
+    if (filename == 0)
+    {
+	this->filename = "standard output";
+	QTC::TC("qpdf", "QPDFWriter write to stdout");
+	file = stdout;
+    }
+    else
+    {
+	QTC::TC("qpdf", "QPDFWriter write to file");
+	file = QUtil::fopen_wrapper(std::string("open ") + filename,
+				    fopen(filename, "wb+"));
+	close_file = true;
+    }
+    Pipeline* p = new Pl_StdioFile("qdf output", file);
+    to_delete.push_back(p);
+    pipeline = new Pl_Count("qdf count", p);
+    to_delete.push_back(pipeline);
+    pipeline_stack.push_back(pipeline);
+}
+
+QPDFWriter::~QPDFWriter()
+{
+    if (file)
+    {
+	fclose(file);
+    }
+}
+
+void
+QPDFWriter::setObjectStreamMode(object_stream_e mode)
+{
+    this->object_stream_mode = mode;
+}
+
+void
+QPDFWriter::setStreamDataMode(stream_data_e mode)
+{
+    this->stream_data_mode_set = true;
+    this->stream_data_mode = mode;
+}
+
+void
+QPDFWriter::setContentNormalization(bool val)
+{
+    this->normalize_content_set = true;
+    this->normalize_content = val;
+}
+
+void
+QPDFWriter::setQDFMode(bool val)
+{
+    this->qdf_mode = val;
+}
+
+void
+QPDFWriter::setStaticID(bool val)
+{
+    this->static_id = val;
+}
+
+void
+QPDFWriter::setPreserveEncryption(bool val)
+{
+    this->preserve_encryption = val;
+}
+
+void
+QPDFWriter::setLinearization(bool val)
+{
+    this->linearized = val;
+}
+
+void
+QPDFWriter::setR2EncryptionParameters(
+    char const* user_password, char const* owner_password,
+    bool allow_print, bool allow_modify,
+    bool allow_extract, bool allow_annotate)
+{
+    std::set<int> clear;
+    if (! allow_print)
+    {
+	clear.insert(3);
+    }
+    if (! allow_modify)
+    {
+	clear.insert(4);
+    }
+    if (! allow_extract)
+    {
+	clear.insert(5);
+    }
+    if (! allow_annotate)
+    {
+	clear.insert(6);
+    }
+
+    this->min_pdf_version = "1.3";
+    setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear);
+}
+
+void
+QPDFWriter::setR3EncryptionParameters(
+    char const* user_password, char const* owner_password,
+    bool allow_accessibility, bool allow_extract,
+    r3_print_e print, r3_modify_e modify)
+{
+    // Acrobat 5 security options:
+
+    // Checkboxes:
+    //   Enable Content Access for the Visually Impaired
+    //   Allow Content Copying and Extraction
+
+    // Allowed changes menu:
+    //   None
+    //   Only Document Assembly
+    //   Only Form Field Fill-in or Signing
+    //   Comment AUthoring, Form Field Fill-in or Signing
+    //   General Editing, Comment and Form Field Authoring
+
+    // Allowed printing menu:
+    //   None
+    //   Low Resolution
+    //   Full printing
+
+    std::set<int> clear;
+    if (! allow_accessibility)
+    {
+	clear.insert(10);
+    }
+    if (! allow_extract)
+    {
+	clear.insert(5);
+    }
+
+    // Note: these switch statements all "fall through" (no break
+    // statements).  Each option clears successively more access bits.
+    switch (print)
+    {
+      case r3p_none:
+	clear.insert(3);	// any printing
+
+      case r3p_low:
+	clear.insert(12);	// high resolution printing
+
+      case r3p_full:
+	break;
+
+	// no default so gcc warns for missing cases
+    }
+
+    switch (modify)
+    {
+      case r3m_none:
+	clear.insert(11);	// document essembly
+
+      case r3m_assembly:
+	clear.insert(9);	// filling in form fields
+
+      case r3m_form:
+	clear.insert(6);	// modify annotations, fill in form fields
+
+      case r3m_annotate:
+	clear.insert(4);	// other modifications
+
+      case r3m_all:
+	break;
+
+	// no default so gcc warns for missing cases
+    }
+
+    this->min_pdf_version = "1.4";
+    setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
+}
+
+void
+QPDFWriter::setEncryptionParameters(
+    char const* user_password, char const* owner_password,
+    int V, int R, int key_len, std::set<int>& bits_to_clear)
+{
+    // PDF specification refers to bits with the low bit numbered 1.
+    // We have to convert this into a bit field.
+
+    // Specification always requirse bits 1 and 2 to be cleared.
+    bits_to_clear.insert(1);
+    bits_to_clear.insert(2);
+
+    unsigned long P = 0;
+    // Create the complement of P, then invert.
+    for (std::set<int>::iterator iter = bits_to_clear.begin();
+	 iter != bits_to_clear.end(); ++iter)
+    {
+	P |= (1 << (*iter) - 1);
+    }
+    P = ~P;
+
+    generateID();
+    std::string O;
+    std::string U;
+    QPDF::compute_encryption_O_U(
+	user_password, owner_password, V, R, key_len, P, this->id1, O, U);
+    setEncryptionParametersInternal(
+	V, R, key_len, P, O, U, this->id1, user_password);
+}
+
+void
+QPDFWriter::copyEncryptionParameters()
+{
+    generateID();
+    QPDFObjectHandle trailer = this->pdf.getTrailer();
+    if (trailer.hasKey("/Encrypt"))
+    {
+	QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
+	int V = encrypt.getKey("/V").getIntValue();
+	int key_len = 5;
+	if (V > 1)
+	{
+	    key_len = encrypt.getKey("/Length").getIntValue() / 8;
+	}
+	setEncryptionParametersInternal(
+	    V,
+	    encrypt.getKey("/R").getIntValue(),
+    	    key_len,
+	    encrypt.getKey("/P").getIntValue(),
+	    encrypt.getKey("/O").getStringValue(),
+	    encrypt.getKey("/U").getStringValue(),
+	    this->id1,		// this->id1 == the other file's id1
+	    pdf.getUserPassword());
+    }
+}
+
+void
+QPDFWriter::setEncryptionParametersInternal(
+    int V, int R, int key_len, long P,
+    std::string const& O, std::string const& U,
+    std::string const& id1, std::string const& user_password)
+{
+    encryption_dictionary["/Filter"] = "/Standard";
+    encryption_dictionary["/V"] = QUtil::int_to_string(V);
+    encryption_dictionary["/Length"] = QUtil::int_to_string(key_len * 8);
+    encryption_dictionary["/R"] = QUtil::int_to_string(R);
+    encryption_dictionary["/P"] = QUtil::int_to_string(P);
+    encryption_dictionary["/O"] = QPDF_String(O).unparse(true);
+    encryption_dictionary["/U"] = QPDF_String(U).unparse(true);
+    this->encrypted = true;
+    QPDF::EncryptionData encryption_data(V, R, key_len, P, O, U, this->id1);
+    this->encryption_key = QPDF::compute_encryption_key(
+	user_password, encryption_data);
+}
+
+void
+QPDFWriter::setDataKey(int objid)
+{
+    this->cur_data_key = QPDF::compute_data_key(
+	this->encryption_key, objid, 0);
+}
+
+int
+QPDFWriter::bytesNeeded(unsigned long n)
+{
+    int bytes = 0;
+    while (n)
+    {
+	++bytes;
+	n >>= 8;
+    }
+    return bytes;
+}
+
+void
+QPDFWriter::writeBinary(unsigned long val, unsigned int bytes)
+{
+    assert(bytes <= sizeof(unsigned long));
+    unsigned char data[sizeof(unsigned long)];
+    for (unsigned int i = 0; i < bytes; ++i)
+    {
+	data[bytes - i - 1] = (unsigned char)(val & 0xff);
+	val >>= 8;
+    }
+    this->pipeline->write(data, bytes);
+}
+
+void
+QPDFWriter::writeString(std::string const& str)
+{
+    this->pipeline->write((unsigned char*)str.c_str(), str.length());
+}
+
+void
+QPDFWriter::writeBuffer(PointerHolder<Buffer>& b)
+{
+    this->pipeline->write(b.getPointer()->getBuffer(),
+			  b.getPointer()->getSize());
+}
+
+void
+QPDFWriter::writeStringQDF(std::string const& str)
+{
+    if (this->qdf_mode)
+    {
+	writeString(str);
+    }
+}
+
+void
+QPDFWriter::writeStringNoQDF(std::string const& str)
+{
+    if (! this->qdf_mode)
+    {
+	writeString(str);
+    }
+}
+
+Pipeline*
+QPDFWriter::pushPipeline(Pipeline* p)
+{
+    assert(dynamic_cast<Pl_Count*>(p) == 0);
+    this->pipeline_stack.push_back(p);
+    return p;
+}
+
+void
+QPDFWriter::activatePipelineStack()
+{
+    Pl_Count* c = new Pl_Count("count", this->pipeline_stack.back());
+    this->pipeline_stack.push_back(c);
+    this->pipeline = c;
+}
+
+void
+QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp)
+{
+    assert(this->pipeline_stack.size() >= 2);
+    this->pipeline->finish();
+    assert(dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) ==
+	   this->pipeline);
+    delete this->pipeline_stack.back();
+    this->pipeline_stack.pop_back();
+    while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
+    {
+	Pipeline* p = this->pipeline_stack.back();
+	this->pipeline_stack.pop_back();
+	Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
+	if (bp && buf)
+	{
+	    *bp = buf->getBuffer();
+	}
+	delete p;
+    }
+    this->pipeline = dynamic_cast<Pl_Count*>(this->pipeline_stack.back());
+}
+
+void
+QPDFWriter::pushEncryptionFilter()
+{
+    if (this->encrypted && (! this->cur_data_key.empty()))
+    {
+	Pipeline* p =
+	    new Pl_RC4("stream encryption", this->pipeline,
+		       (unsigned char*) this->cur_data_key.c_str(),
+		       this->cur_data_key.length());
+	pushPipeline(p);
+    }
+    // Must call this unconditionally so we can call popPipelineStack
+    // to balance pushEncryptionFilter().
+    activatePipelineStack();
+}
+
+void
+QPDFWriter::pushDiscardFilter()
+{
+    pushPipeline(new Pl_Discard());
+    activatePipelineStack();
+}
+
+int
+QPDFWriter::openObject(int objid)
+{
+    if (objid == 0)
+    {
+	objid = this->next_objid++;
+    }
+    this->xref[objid] = QPDFXRefEntry(1, pipeline->getCount(), 0);
+    writeString(QUtil::int_to_string(objid));
+    writeString(" 0 obj\n");
+    return objid;
+}
+
+void
+QPDFWriter::closeObject(int objid)
+{
+    // Write a newline before endobj as it makes the file easier to
+    // repair.
+    writeString("\nendobj\n");
+    writeStringQDF("\n");
+    this->lengths[objid] = pipeline->getCount() - this->xref[objid].getOffset();
+}
+
+void
+QPDFWriter::assignCompressedObjectNumbers(int objid)
+{
+    if (this->object_stream_to_objects.count(objid) == 0)
+    {
+	return;
+    }
+
+    // Reserve numbers for the objects that belong to this object
+    // stream.
+    for (std::set<int>::iterator iter =
+	     this->object_stream_to_objects[objid].begin();
+	 iter != this->object_stream_to_objects[objid].end();
+	 ++iter)
+    {
+	obj_renumber[*iter] = next_objid++;
+    }
+}
+
+void
+QPDFWriter::enqueueObject(QPDFObjectHandle object)
+{
+    if (object.isIndirect())
+    {
+	if (object.isNull())
+	{
+	    // This is a place-holder object for an object stream
+	}
+	else if (object.isScalar())
+	{
+	    throw QEXC::Internal(
+		"QPDFWriter::enqueueObject: indirect scalar: " +
+		std::string(this->filename) + " " +
+		QUtil::int_to_string(object.getObjectID()) + " " +
+		QUtil::int_to_string(object.getGeneration()));
+	}
+	int objid = object.getObjectID();
+
+	if (obj_renumber.count(objid) == 0)
+	{
+	    if (this->object_to_object_stream.count(objid))
+	    {
+		// This is in an object stream.  Don't process it
+		// here.  Instead, enqueue the object stream.
+		int stream_id = this->object_to_object_stream[objid];
+		enqueueObject(this->pdf.getObjectByID(stream_id, 0));
+	    }
+	    else
+	    {
+		object_queue.push_back(object);
+		obj_renumber[objid] = next_objid++;
+
+		if (this->object_stream_to_objects.count(objid))
+		{
+		    // For linearized files, uncompressed objects go
+		    // at end, and we take care of assigning numbers
+		    // to them elsewhere.
+		    if (! this->linearized)
+		    {
+			assignCompressedObjectNumbers(objid);
+		    }
+		}
+		else if ((! this->direct_stream_lengths) && object.isStream())
+		{
+		    // reserve next object ID for length
+		    ++next_objid;
+		}
+	    }
+	}
+    }
+    else if (object.isArray())
+    {
+	int n = object.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    if (! this->linearized)
+	    {
+		enqueueObject(object.getArrayItem(i));
+	    }
+	}
+    }
+    else if (object.isDictionary())
+    {
+	std::set<std::string> keys = object.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    if (! this->linearized)
+	    {
+		enqueueObject(object.getKey(*iter));
+	    }
+	}
+    }
+    else
+    {
+	// ignore
+    }
+}
+
+void
+QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
+{
+    if (! this->linearized)
+    {
+	enqueueObject(child);
+    }
+    if (child.isIndirect())
+    {
+	if (child.isScalar())
+	{
+	    throw QEXC::Internal(
+		"QPDFWriter::unparseChild: indirect scalar: " +
+		QUtil::int_to_string(child.getObjectID()) + " " +
+		QUtil::int_to_string(child.getGeneration()));
+	}
+	int old_id = child.getObjectID();
+	int new_id = obj_renumber[old_id];
+	writeString(QUtil::int_to_string(new_id));
+	writeString(" 0 R");
+    }
+    else
+    {
+	unparseObject(child, level, flags);
+    }
+}
+
+void
+QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev)
+{
+    QPDFObjectHandle trailer = pdf.getTrailer();
+    if (! xref_stream)
+    {
+	writeString("trailer <<");
+    }
+    writeStringQDF("\n");
+    if (which == t_lin_second)
+    {
+	writeString(" /Size ");
+	writeString(QUtil::int_to_string(size));
+    }
+    else
+    {
+	std::set<std::string> keys = trailer.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    std::string const& key = *iter;
+	    writeStringQDF("  ");
+	    writeStringNoQDF(" ");
+	    writeString(QPDF_Name::normalizeName(key));
+	    writeString(" ");
+	    if (key == "/Size")
+	    {
+		writeString(QUtil::int_to_string(size));
+		if (which == t_lin_first)
+		{
+		    writeString(" /Prev ");
+		    int pos = this->pipeline->getCount();
+		    writeString(QUtil::int_to_string(prev));
+		    int nspaces = pos + 11 - this->pipeline->getCount();
+		    assert(nspaces >= 0);
+		    for (int i = 0; i < nspaces; ++i)
+		    {
+			writeString(" ");
+		    }
+		}
+	    }
+	    else
+	    {
+		unparseChild(trailer.getKey(key), 1, 0);
+	    }
+	    writeStringQDF("\n");
+	}
+    }
+
+    // Write ID
+    writeStringQDF(" ");
+    writeString(" /ID [");
+    writeString(QPDF_String(this->id1).unparse(true));
+    writeString(QPDF_String(this->id2).unparse(true));
+    writeString("]");
+
+    if (which != t_lin_second)
+    {
+	// Write reference to encryption dictionary
+	if (this->encrypted)
+	{
+	    writeString(" /Encrypt ");
+	    writeString(QUtil::int_to_string(this->encryption_dict_objid));
+	    writeString(" 0 R");
+	}
+    }
+
+    writeStringQDF("\n");
+    writeStringNoQDF(" ");
+    writeString(">>");
+}
+
+void
+QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
+			  unsigned int flags)
+{
+    unparseObject(object, level, flags, 0, false);
+}
+
+void
+QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
+			  unsigned int flags, int stream_length, bool compress)
+{
+    unsigned int child_flags = flags & ~f_stream;
+
+    std::string indent;
+    for (int i = 0; i < level; ++i)
+    {
+	indent += "  ";
+    }
+
+    if (object.isArray())
+    {
+	// Note: PDF spec 1.4 implementation note 121 states that
+	// Acrobat requires a space after the [ in the /H key of the
+	// linearization parameter dictionary.  We'll do this
+	// unconditionally for all arrays because it looks nicer and
+	// doesn't make the files that much bigger.
+	writeString("[");
+	writeStringQDF("\n");
+	int n = object.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    writeStringQDF(indent);
+	    writeStringQDF("  ");
+	    writeStringNoQDF(" ");
+	    unparseChild(object.getArrayItem(i), level + 1, child_flags);
+	    writeStringQDF("\n");
+	}
+	writeStringQDF(indent);
+	writeStringNoQDF(" ");
+	writeString("]");
+    }
+    else if (object.isDictionary())
+    {
+	writeString("<<");
+	writeStringQDF("\n");
+	std::set<std::string> keys = object.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    std::string const& key = *iter;
+	    if ((flags & f_filtered) &&
+		((key == "/Filter") ||
+		 (key == "/DecodeParms")))
+	    {
+		continue;
+	    }
+	    if ((flags & f_stream) && (key == "/Length"))
+	    {
+		continue;
+	    }
+	    writeStringQDF(indent);
+	    writeStringQDF("  ");
+	    writeStringNoQDF(" ");
+	    writeString(QPDF_Name::normalizeName(key));
+	    writeString(" ");
+	    unparseChild(object.getKey(key), level + 1, child_flags);
+	    writeStringQDF("\n");
+	}
+
+	if (flags & f_stream)
+	{
+	    writeStringQDF(indent);
+	    writeStringQDF(" ");
+	    writeString(" /Length ");
+
+	    if (this->direct_stream_lengths)
+	    {
+		writeString(QUtil::int_to_string(stream_length));
+	    }
+	    else
+	    {
+		writeString(
+		    QUtil::int_to_string(this->cur_stream_length_id));
+		writeString(" 0 R");
+	    }
+	    writeStringQDF("\n");
+	    if (compress && (flags & f_filtered))
+	    {
+		writeStringQDF(indent);
+		writeStringQDF(" ");
+		writeString(" /Filter /FlateDecode");
+		writeStringQDF("\n");
+	    }
+	}
+
+	writeStringQDF(indent);
+	writeStringNoQDF(" ");
+	writeString(">>");
+    }
+    else if (object.isStream())
+    {
+	// Write stream data to a buffer.
+	int old_id = object.getObjectID();
+	int new_id = obj_renumber[old_id];
+	if (! this->direct_stream_lengths)
+	{
+	    this->cur_stream_length_id = new_id + 1;
+	}
+	QPDFObjectHandle stream_dict = object.getDict();
+
+	bool filter = (this->stream_data_mode != s_preserve);
+	if (this->stream_data_mode == s_compress)
+	{
+	    // Don't filter if the stream is already compressed with
+	    // FlateDecode.  We don't want to make it worse by getting
+	    // rid of a predictor or otherwising messing with it.  We
+	    // should also avoid messing with anything that's
+	    // compressed with a lossy compression scheme, but we
+	    // don't support any of those right now.
+	    QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
+	    if (filter_obj.isName() && (filter_obj.getName() == "/FlateDecode"))
+	    {
+		QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
+		filter = false;
+	    }
+	}
+	bool normalize = false;
+	bool compress = false;
+	if (this->normalize_content && normalized_streams.count(old_id))
+	{
+	    normalize = true;
+	    filter = true;
+	}
+	else if (filter && (this->stream_data_mode == s_compress))
+	{
+	    compress = true;
+	    QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
+	}
+
+	flags |= f_stream;
+
+	pushPipeline(new Pl_Buffer("stream data"));
+	activatePipelineStack();
+	bool filtered =
+	    object.pipeStreamData(this->pipeline, filter, normalize, compress);
+	PointerHolder<Buffer> stream_data;
+	popPipelineStack(&stream_data);
+	if (filtered)
+	{
+	    flags |= f_filtered;
+	}
+	else
+	{
+	    compress = false;
+	}
+
+	this->cur_stream_length = stream_data.getPointer()->getSize();
+	unparseObject(stream_dict, 0, flags, this->cur_stream_length, compress);
+	writeString("\nstream\n");
+	pushEncryptionFilter();
+	writeBuffer(stream_data);
+	popPipelineStack();
+
+	if (this->qdf_mode)
+	{
+	    if (this->pipeline->getLastChar() != '\n')
+	    {
+		writeString("\n");
+		this->added_newline = true;
+	    }
+	    else
+	    {
+		this->added_newline = false;
+	    }
+	}
+	writeString("endstream");
+    }
+    else if (object.isString())
+    {
+	std::string val;
+	if (this->encrypted &&
+	    (! (flags & f_in_ostream)) &&
+	    (! this->cur_data_key.empty()))
+	{
+	    val = object.getStringValue();
+	    char* tmp = QUtil::copy_string(val);
+	    unsigned int vlen = val.length();
+	    RC4 rc4((unsigned char const*)this->cur_data_key.c_str(),
+		    this->cur_data_key.length());
+	    rc4.process((unsigned char*)tmp, vlen);
+	    val = QPDF_String(std::string(tmp, vlen)).unparse();
+	    delete [] tmp;
+	}
+	else
+	{
+	    val = object.unparseResolved();
+	}
+	writeString(val);
+    }
+    else
+    {
+	writeString(object.unparseResolved());
+    }
+}
+
+void
+QPDFWriter::writeObjectStreamOffsets(std::vector<int>& offsets,
+				     int first_obj)
+{
+    for (unsigned int i = 0; i < offsets.size(); ++i)
+    {
+	if (i != 0)
+	{
+	    writeStringQDF("\n");
+	    writeStringNoQDF(" ");
+	}
+	writeString(QUtil::int_to_string(i + first_obj));
+	writeString(" ");
+	writeString(QUtil::int_to_string(offsets[i]));
+    }
+    writeString("\n");
+}
+
+void
+QPDFWriter::writeObjectStream(QPDFObjectHandle object)
+{
+    // Note: object might be null if this is a place-holder for an
+    // object stream that we are generating from scratch.
+
+    int old_id = object.getObjectID();
+    int new_id = obj_renumber[old_id];
+
+    std::vector<int> offsets;
+    int first = 0;
+
+    // Generate stream itself.  We have to do this in two passes so we
+    // can calculate offsets in the first pass.
+    PointerHolder<Buffer> stream_buffer;
+    int first_obj = -1;
+    bool compressed = false;
+    for (int pass = 1; pass <= 2; ++pass)
+    {
+	if (pass == 1)
+	{
+	    pushDiscardFilter();
+	}
+	else
+	{
+	    // Adjust offsets to skip over comment before first object
+
+	    first = offsets[0];
+	    for (std::vector<int>::iterator iter = offsets.begin();
+		 iter != offsets.end(); ++iter)
+	    {
+		*iter -= first;
+	    }
+
+	    // Take one pass at writing pairs of numbers so we can get
+	    // their size information
+	    pushDiscardFilter();
+	    writeObjectStreamOffsets(offsets, first_obj);
+	    first += this->pipeline->getCount();
+	    popPipelineStack();
+
+	    // Set up a stream to write the stream data into a buffer.
+	    Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
+	    if (! ((this->stream_data_mode == s_uncompress) || this->qdf_mode))
+	    {
+		compressed = true;
+		next = pushPipeline(
+		    new Pl_Flate("compress object stream", next,
+				 Pl_Flate::a_deflate));
+	    }
+	    activatePipelineStack();
+	    writeObjectStreamOffsets(offsets, first_obj);
+	}
+
+	int count = 0;
+	for (std::set<int>::iterator iter =
+		 this->object_stream_to_objects[old_id].begin();
+	     iter != this->object_stream_to_objects[old_id].end();
+	     ++iter, ++count)
+	{
+	    int obj = *iter;
+	    int new_obj = this->obj_renumber[obj];
+	    if (first_obj == -1)
+	    {
+		first_obj = new_obj;
+	    }
+	    if (this->qdf_mode)
+	    {
+		writeString("%% Object stream: object " +
+			    QUtil::int_to_string(new_obj) + ", index " +
+			    QUtil::int_to_string(count) + "\n");
+	    }
+	    if (pass == 1)
+	    {
+		offsets.push_back(this->pipeline->getCount());
+	    }
+	    writeObject(this->pdf.getObjectByID(obj, 0), count);
+
+	    this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
+	}
+
+	// stream_buffer will be initialized only for pass 2
+	popPipelineStack(&stream_buffer);
+    }
+
+    // Write the object
+    openObject(new_id);
+    setDataKey(new_id);
+    writeString("<<");
+    writeStringQDF("\n ");
+    writeString(" /Type /ObjStm");
+    writeStringQDF("\n ");
+    writeString(" /Length " +
+		QUtil::int_to_string(stream_buffer.getPointer()->getSize()));
+    writeStringQDF("\n ");
+    if (compressed)
+    {
+	writeString(" /Filter /FlateDecode");
+    }
+    writeString(" /N " + QUtil::int_to_string(offsets.size()));
+    writeStringQDF("\n ");
+    writeString(" /First " + QUtil::int_to_string(first));
+    if (! object.isNull())
+    {
+	// If the original object has an /Extends key, preserve it.
+	QPDFObjectHandle dict = object.getDict();
+	QPDFObjectHandle extends = dict.getKey("/Extends");
+	if (extends.isIndirect())
+	{
+	    QTC::TC("qpdf", "QPDFWriter copy Extends");
+	    writeStringQDF("\n ");
+	    writeString(" /Extends ");
+	    unparseChild(extends, 1, f_in_ostream);
+	}
+    }
+    writeStringQDF("\n");
+    writeStringNoQDF(" ");
+    writeString(">>\nstream\n");
+    if (this->encrypted)
+    {
+	QTC::TC("qpdf", "QPDFWriter encrypt object stream");
+    }
+    pushEncryptionFilter();
+    writeBuffer(stream_buffer);
+    popPipelineStack();
+    writeString("endstream");
+    this->cur_data_key.clear();
+    closeObject(new_id);
+}
+
+void
+QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
+{
+    int old_id = object.getObjectID();
+
+    if ((object_stream_index == -1) &&
+	(this->object_stream_to_objects.count(old_id)))
+    {
+	writeObjectStream(object);
+	return;
+    }
+
+    int new_id = obj_renumber[old_id];
+    if (this->qdf_mode)
+    {
+	if (this->page_object_to_seq.count(old_id))
+	{
+	    writeString("%% Page ");
+	    writeString(
+		QUtil::int_to_string(
+		    this->page_object_to_seq[old_id]));
+	    writeString("\n");
+	}
+	if (this->contents_to_page_seq.count(old_id))
+	{
+	    writeString("%% Contents for page ");
+	    writeString(
+		QUtil::int_to_string(
+		    this->contents_to_page_seq[old_id]));
+	    writeString("\n");
+	}
+    }
+    if (object_stream_index == -1)
+    {
+	openObject(new_id);
+	setDataKey(new_id);
+	unparseObject(object, 0, 0);
+	this->cur_data_key.clear();
+	closeObject(new_id);
+    }
+    else
+    {
+	unparseObject(object, 0, f_in_ostream);
+	writeString("\n");
+    }
+
+    if ((! this->direct_stream_lengths) && object.isStream())
+    {
+	if (this->qdf_mode)
+	{
+	    if (this->added_newline)
+	    {
+		writeString("%QDF: ignore_newline\n");
+	    }
+	}
+	openObject(new_id + 1);
+	writeString(QUtil::int_to_string(this->cur_stream_length));
+	closeObject(new_id + 1);
+    }
+}
+
+void
+QPDFWriter::generateID()
+{
+    // Note: we can't call generateID() at the time of construction
+    // since the caller hasn't yet had a chance to call setStaticID(),
+    // but we need to generate it before computing encryption
+    // dictionary parameters.  This is why we call this function both
+    // from setEncryptionParameters() and from write() and return
+    // immediately if the ID has already been generated.
+
+    if (! this->id2.empty())
+    {
+	return;
+    }
+
+    QPDFObjectHandle trailer = pdf.getTrailer();
+
+    std::string result;
+
+    if (this->static_id)
+    {
+	// For test suite use only...
+	static char tmp[] = {0x31, 0x41, 0x59, 0x26,
+			     0x53, 0x58, 0x97, 0x93,
+			     0x23, 0x84, 0x62, 0x64,
+			     0x33, 0x83, 0x27, 0x95,
+			     0x00};
+	result = tmp;
+    }
+    else
+    {
+	// The PDF specification has guidelines for creating IDs, but it
+	// states clearly that the only thing that's really important is
+	// that it is very likely to be unique.  We can't really follow
+	// the guidelines in the spec exactly because we haven't written
+	// the file yet.  This scheme should be fine though.
+
+	std::string seed;
+	seed += QUtil::int_to_string((int)time(0));
+	seed += " QPDF ";
+	seed += filename;
+	seed += " ";
+	if (trailer.hasKey("/Info"))
+	{
+	    std::set<std::string> keys = trailer.getKeys();
+	    for (std::set<std::string>::iterator iter = keys.begin();
+		 iter != keys.end(); ++iter)
+	    {
+		QPDFObjectHandle obj = trailer.getKey(*iter);
+		if (obj.isString())
+		{
+		    seed += " ";
+		    seed += obj.getStringValue();
+		}
+	    }
+	}
+
+	MD5 m;
+	m.encodeString(seed.c_str());
+	MD5::Digest digest;
+	m.digest(digest);
+	result = std::string((char*)digest, sizeof(MD5::Digest));
+    }
+
+    // If /ID already exists, follow the spec: use the original first
+    // word and generate a new second word.  Otherwise, we'll use the
+    // generated ID for both.
+
+    this->id2 = result;
+    if (trailer.hasKey("/ID"))
+    {
+	// Note: keep /ID from old file even if --static-id was given.
+	this->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
+    }
+    else
+    {
+	this->id1 = this->id2;
+    }
+}
+
+void
+QPDFWriter::initializeSpecialStreams()
+{
+    // Mark all page content streams in case we are filtering or
+    // normalizing.
+    std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+    int num = 0;
+    for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+	 iter != pages.end(); ++iter)
+    {
+	QPDFObjectHandle& page = *iter;
+	this->page_object_to_seq[page.getObjectID()] = ++num;
+	QPDFObjectHandle contents = page.getKey("/Contents");
+	std::vector<int> contents_objects;
+	if (contents.isArray())
+	{
+	    int n = contents.getArrayNItems();
+	    for (int i = 0; i < n; ++i)
+	    {
+		contents_objects.push_back(
+		    contents.getArrayItem(i).getObjectID());
+	    }
+	}
+	else if (contents.isStream())
+	{
+	    contents_objects.push_back(contents.getObjectID());
+	}
+
+	for (std::vector<int>::iterator iter = contents_objects.begin();
+	     iter != contents_objects.end(); ++iter)
+	{
+	    this->contents_to_page_seq[*iter] = num;
+	    this->normalized_streams.insert(*iter);
+	}
+    }
+}
+
+void
+QPDFWriter::preserveObjectStreams()
+{
+    this->pdf.getObjectStreamData(this->object_to_object_stream);
+}
+
+void
+QPDFWriter::generateObjectStreams()
+{
+    // Basic strategy: make a list of objects that can go into an
+    // object stream.  Then figure out how many object streams are
+    // needed so that we can distribute objects approximately evenly
+    // without having any object stream exceed 100 members.  We don't
+    // have to worry about linearized files here -- if the file is
+    // linearized, we take care of excluding things that aren't
+    // allowed here later.
+
+    // This code doesn't do anything with /Extends.
+
+    std::vector<int> const& eligible = this->pdf.getCompressibleObjects();
+    unsigned int n_object_streams = (eligible.size() + 99) / 100;
+    unsigned int n_per = eligible.size() / n_object_streams;
+    if (n_per * n_object_streams < eligible.size())
+    {
+	++n_per;
+    }
+    unsigned int n = 0;
+    int cur_ostream = 0;
+    for (std::vector<int>::const_iterator iter = eligible.begin();
+	 iter != eligible.end(); ++iter)
+    {
+	if ((n % n_per) == 0)
+	{
+	    if (n > 0)
+	    {
+		QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
+	    }
+	    n = 0;
+	}
+	if (n == 0)
+	{
+	    // Construct a new null object as the "original" object
+	    // stream.  The rest of the code knows that this means
+	    // we're creating the object stream from scratch.
+	    cur_ostream = this->pdf.makeIndirectObject(
+		QPDFObjectHandle::newNull()).getObjectID();
+	}
+	this->object_to_object_stream[*iter] = cur_ostream;
+	++n;
+    }
+}
+
+void
+QPDFWriter::write()
+{
+    // Do preliminary setup
+
+    if (this->linearized)
+    {
+	this->qdf_mode = false;
+    }
+
+    if (this->qdf_mode)
+    {
+	if (! this->normalize_content_set)
+	{
+	    this->normalize_content = true;
+	}
+	if (! this->stream_data_mode_set)
+	{
+	    this->stream_data_mode = s_uncompress;
+	}
+    }
+
+    if (this->encrypted)
+    {
+	// Encryption has been explicitly set
+	this->preserve_encryption = false;
+    }
+    else if (this->normalize_content ||
+	     (this->stream_data_mode == s_uncompress) ||
+	     this->qdf_mode)
+    {
+	// Encryption makes looking at contents pretty useless.  If
+	// the user explicitly encrypted though, we still obey that.
+	this->preserve_encryption = false;
+    }
+
+    if (preserve_encryption)
+    {
+	copyEncryptionParameters();
+    }
+
+    if (this->qdf_mode || this->normalize_content ||
+	(this->stream_data_mode == s_uncompress))
+    {
+	initializeSpecialStreams();
+    }
+
+    if (this->qdf_mode)
+    {
+	// Generate indirect stream lengths for qdf mode since fix-qdf
+	// uses them for storing recomputed stream length data.
+	// Certain streams such as object streams, xref streams, and
+	// hint streams always get direct stream lengths.
+	this->direct_stream_lengths = false;
+    }
+
+    switch (this->object_stream_mode)
+    {
+      case o_disable:
+	// no action required
+	break;
+
+      case o_preserve:
+	preserveObjectStreams();
+	break;
+
+      case o_generate:
+	generateObjectStreams();
+	break;
+
+	// no default so gcc will warn for missing case tag
+    }
+
+    if (this->linearized)
+    {
+	// Page dictionaries are not allowed to be compressed objects.
+	std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+	for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+	     iter != pages.end(); ++iter)
+	{
+	    QPDFObjectHandle& page = *iter;
+	    int objid = page.getObjectID();
+	    if (this->object_to_object_stream.count(objid))
+	    {
+		QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
+		this->object_to_object_stream.erase(objid);
+	    }
+	}
+    }
+
+    if (this->linearized || this->encrypted)
+    {
+    	// The document catalog is not allowed to be compressed in
+    	// linearized files either.  It also appears that Adobe Reader
+    	// 8.0.0 has a bug that prevents it from being able to handle
+    	// encrypted files with compressed document catalogs, so we
+    	// disable them in that case as well.
+	int objid = pdf.getRoot().getObjectID();
+	if (this->object_to_object_stream.count(objid))
+	{
+	    QTC::TC("qpdf", "QPDFWriter uncompressing root");
+	    this->object_to_object_stream.erase(objid);
+	}
+    }
+
+    // Generate reverse mapping from object stream to objects
+    for (std::map<int, int>::iterator iter =
+	     this->object_to_object_stream.begin();
+	 iter != this->object_to_object_stream.end(); ++iter)
+    {
+	int obj = (*iter).first;
+	int stream = (*iter).second;
+	this->object_stream_to_objects[stream].insert(obj);
+	this->max_ostream_index =
+	    std::max(this->max_ostream_index,
+		     (int)this->object_stream_to_objects[stream].size() - 1);
+    }
+
+    if (! this->object_stream_to_objects.empty())
+    {
+	this->min_pdf_version = "1.5";
+    }
+
+    generateID();
+
+    pdf.trimTrailerForWrite();
+    pdf.flattenScalarReferences();
+
+    if (this->linearized)
+    {
+	writeLinearized();
+    }
+    else
+    {
+	writeStandard();
+    }
+
+    this->pipeline->finish();
+    if (this->close_file)
+    {
+	fclose(this->file);
+    }
+    this->file = 0;
+}
+
+void
+QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
+{
+    for (std::vector<QPDFObjectHandle>::iterator iter = part.begin();
+	 iter != part.end(); ++iter)
+    {
+	enqueueObject(*iter);
+    }
+}
+
+void
+QPDFWriter::writeEncryptionDictionary()
+{
+    this->encryption_dict_objid = openObject(this->encryption_dict_objid);
+    writeString("<<");
+    for (std::map<std::string, std::string>::iterator iter =
+	     this->encryption_dictionary.begin();
+	 iter != this->encryption_dictionary.end(); ++iter)
+    {
+	writeString(" ");
+	writeString((*iter).first);
+	writeString(" ");
+	writeString((*iter).second);
+    }
+    writeString(" >>");
+    closeObject(this->encryption_dict_objid);
+}
+
+void
+QPDFWriter::writeHeader()
+{
+    std::string version = pdf.getPDFVersion();
+    if (! this->min_pdf_version.empty())
+    {
+	float ov = atof(version.c_str());
+	float mv = atof(this->min_pdf_version.c_str());
+	if (mv > ov)
+	{
+	    version = this->min_pdf_version;
+	}
+    }
+
+    writeString("%PDF-");
+    writeString(version);
+    // This string of binary characters would not be valid UTF-8, so
+    // it really should be treated as binary.
+    writeString("\n%����\n");
+    writeStringQDF("%QDF-1.0\n\n");
+}
+
+void
+QPDFWriter::writeHintStream(int hint_id)
+{
+    PointerHolder<Buffer> hint_buffer;
+    int S = 0;
+    int O = 0;
+    pdf.generateHintStream(
+	this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O);
+
+    openObject(hint_id);
+    setDataKey(hint_id);
+
+    unsigned char* hs = hint_buffer.getPointer()->getBuffer();
+    unsigned long hlen = hint_buffer.getPointer()->getSize();
+
+    writeString("<< /Filter /FlateDecode /S ");
+    writeString(QUtil::int_to_string(S));
+    if (O)
+    {
+	writeString(" /O ");
+	writeString(QUtil::int_to_string(O));
+    }
+    writeString(" /Length ");
+    writeString(QUtil::int_to_string(hlen));
+    writeString(" >>\nstream\n");
+
+    if (this->encrypted)
+    {
+	QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
+    }
+    pushEncryptionFilter();
+    writeBuffer(hint_buffer);
+    popPipelineStack();
+
+    if (hs[hlen - 1] != '\n')
+    {
+	writeString("\n");
+    }
+    writeString("endstream");
+    closeObject(hint_id);
+}
+
+int
+QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
+{
+    return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0);
+}
+
+int
+QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
+			   int prev, bool suppress_offsets,
+			   int hint_id, int hint_offset, int hint_length)
+{
+    writeString("xref\n");
+    writeString(QUtil::int_to_string(first));
+    writeString(" ");
+    writeString(QUtil::int_to_string(last - first + 1));
+    int space_before_zero = this->pipeline->getCount();
+    writeString("\n");
+    for (int i = first; i <= last; ++i)
+    {
+	if (i == 0)
+	{
+	    writeString("0000000000 65535 f \n");
+	}
+	else
+	{
+	    int offset = 0;
+	    if (! suppress_offsets)
+	    {
+		offset = this->xref[i].getOffset();
+		if ((hint_id != 0) &&
+		    (i != hint_id) &&
+		    (offset >= hint_offset))
+		{
+		    offset += hint_length;
+		}
+	    }
+	    writeString(QUtil::int_to_string(offset, 10));
+	    writeString(" 00000 n \n");
+	}
+    }
+    writeTrailer(which, size, false, prev);
+    writeString("\n");
+    return space_before_zero;
+}
+
+int
+QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
+			    trailer_e which, int first, int last, int size)
+{
+    return writeXRefStream(objid, max_id, max_offset,
+			   which, first, last, size, 0, 0, 0, 0);
+}
+
+int
+QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
+			    trailer_e which, int first, int last, int size,
+			    int prev, int hint_id,
+			    int hint_offset, int hint_length)
+{
+    int xref_offset = this->pipeline->getCount();
+    int space_before_zero = xref_offset - 1;
+
+    // field 1 contains offsets and object stream identifiers
+    int f1_size = std::max(bytesNeeded(max_offset),
+			   bytesNeeded(max_id));
+
+    // field 2 contains object stream indices
+    int f2_size = bytesNeeded(this->max_ostream_index);
+
+    unsigned int esize = 1 + f1_size + f2_size;
+
+    // Must store in xref table in advance of writing the actual data
+    // rather than waiting for openObject to do it.
+    this->xref[xref_id] = QPDFXRefEntry(1, pipeline->getCount(), 0);
+
+    Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
+    bool compressed = false;
+    if (! ((this->stream_data_mode == s_uncompress) || this->qdf_mode))
+    {
+	compressed = true;
+	p = pushPipeline(
+	    new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
+	p = pushPipeline(
+	    new Pl_PNGFilter(
+		"pngify xref", p, Pl_PNGFilter::a_encode, esize, 0));
+    }
+    activatePipelineStack();
+    for (int i = first; i <= last; ++i)
+    {
+	QPDFXRefEntry& e = this->xref[i];
+	switch (e.getType())
+	{
+	  case 0:
+	    writeBinary(0, 1);
+	    writeBinary(0, f1_size);
+	    writeBinary(0, f2_size);
+	    break;
+
+	  case 1:
+	    {
+		int offset = e.getOffset();
+		if ((hint_id != 0) &&
+		    (i != hint_id) &&
+		    (offset >= hint_offset))
+		{
+		    offset += hint_length;
+		}
+		writeBinary(1, 1);
+		writeBinary(offset, f1_size);
+		writeBinary(0, f2_size);
+	    }
+	    break;
+
+	  case 2:
+	    writeBinary(2, 1);
+	    writeBinary(e.getObjStreamNumber(), f1_size);
+	    writeBinary(e.getObjStreamIndex(), f2_size);
+	    break;
+
+	  default:
+	    throw QEXC::Internal("invalid type writing xref stream");
+	    break;
+	}
+    }
+    PointerHolder<Buffer> xref_data;
+    popPipelineStack(&xref_data);
+
+    openObject(xref_id);
+    writeString("<<");
+    writeStringQDF("\n ");
+    writeString(" /Type /XRef");
+    writeStringQDF("\n ");
+    writeString(" /Length " +
+		QUtil::int_to_string(xref_data.getPointer()->getSize()));
+    if (compressed)
+    {
+	writeStringQDF("\n ");
+	writeString(" /Filter /FlateDecode");
+	writeStringQDF("\n ");
+	writeString(" /DecodeParms << /Columns " +
+		    QUtil::int_to_string(esize) + " /Predictor 12 >>");
+    }
+    writeStringQDF("\n ");
+    writeString(" /W [ 1 " +
+		QUtil::int_to_string(f1_size) + " " +
+		QUtil::int_to_string(f2_size) + " ]");
+    if (! ((first == 0) && (last == size - 1)))
+    {
+	writeString(" /Index [ " +
+		    QUtil::int_to_string(first) + " " +
+		    QUtil::int_to_string(last - first + 1) + " ]");
+    }
+    writeTrailer(which, size, true, prev);
+    writeString("\nstream\n");
+    writeBuffer(xref_data);
+    writeString("\nendstream");
+    closeObject(xref_id);
+    return space_before_zero;
+}
+
+void
+QPDFWriter::writeLinearized()
+{
+    // Optimize file and enqueue objects in order
+
+    bool need_xref_stream = (! this->object_to_object_stream.empty());
+    pdf.optimize(this->object_to_object_stream);
+
+    std::vector<QPDFObjectHandle> part4;
+    std::vector<QPDFObjectHandle> part6;
+    std::vector<QPDFObjectHandle> part7;
+    std::vector<QPDFObjectHandle> part8;
+    std::vector<QPDFObjectHandle> part9;
+    pdf.getLinearizedParts(this->object_to_object_stream,
+			   part4, part6, part7, part8, part9);
+
+    // Object number sequence:
+    //
+    //  second half
+    //    second half uncompressed objects
+    //    second half xref stream, if any
+    //    second half compressed objects
+    //  first half
+    //    linearization dictionary
+    //    first half xref stream, if any
+    //    part 4 uncompresesd objects
+    //    encryption dictionary, if any
+    //    hint stream
+    //    part 6 uncompressed objects
+    //    first half compressed objects
+    //
+
+    // Second half objects
+    int second_half_uncompressed = part7.size() + part8.size() + part9.size();
+    int second_half_first_obj = 1;
+    int after_second_half = 1 + second_half_uncompressed;
+    this->next_objid = after_second_half;
+    int second_half_xref = 0;
+    if (need_xref_stream)
+    {
+	second_half_xref = this->next_objid++;
+    }
+    // Assign numbers to all compressed objects in the second half.
+    std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
+    for (int i = 0; i < 3; ++i)
+    {
+	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
+	     iter != (*vecs2[i]).end(); ++iter)
+	{
+	    assignCompressedObjectNumbers((*iter).getObjectID());
+	}
+    }
+    int second_half_end = this->next_objid - 1;
+    int second_trailer_size = this->next_objid;
+
+    // First half objects
+    int first_half_start = this->next_objid;
+    int lindict_id = this->next_objid++;
+    int first_half_xref = 0;
+    if (need_xref_stream)
+    {
+	first_half_xref = this->next_objid++;
+    }
+    int part4_first_obj = this->next_objid;
+    this->next_objid += part4.size();
+    int after_part4 = this->next_objid;
+    if (this->encrypted)
+    {
+	this->encryption_dict_objid = this->next_objid++;
+    }
+    int hint_id = this->next_objid++;
+    int part6_first_obj = this->next_objid;
+    this->next_objid += part6.size();
+    int after_part6 = this->next_objid;
+    // Assign numbers to all compressed objects in the first half
+    std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
+    for (int i = 0; i < 2; ++i)
+    {
+	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
+	     iter != (*vecs1[i]).end(); ++iter)
+	{
+	    assignCompressedObjectNumbers((*iter).getObjectID());
+	}
+    }
+    int first_half_end = this->next_objid - 1;
+    int first_trailer_size = this->next_objid;
+
+    int part4_end_marker = part4.back().getObjectID();
+    int part6_end_marker = part6.back().getObjectID();
+    int space_before_zero = 0;
+    int file_size = 0;
+    int part6_end_offset = 0;
+    int first_half_max_obj_offset = 0;
+    int second_xref_offset = 0;
+    int first_xref_end = 0;
+    int second_xref_end = 0;
+
+    this->next_objid = part4_first_obj;
+    enqueuePart(part4);
+    assert(this->next_objid = after_part4);
+    this->next_objid = part6_first_obj;
+    enqueuePart(part6);
+    assert(this->next_objid == after_part6);
+    this->next_objid = second_half_first_obj;
+    enqueuePart(part7);
+    enqueuePart(part8);
+    enqueuePart(part9);
+    assert(this->next_objid == after_second_half);
+
+    int hint_length = 0;
+    PointerHolder<Buffer> hint_buffer;
+
+    // Write file in two passes.  Part numbers refer to PDF spec 1.4.
+
+    for (int pass = 1; pass <= 2; ++pass)
+    {
+	if (pass == 1)
+	{
+	    pushDiscardFilter();
+	}
+
+	// Part 1: header
+
+	writeHeader();
+
+	// Part 2: linearization parameter dictionary.  Save enough
+	// space to write real dictionary.  150 characters is enough
+	// space if all numerical values in the parameter dictionary
+	// are 10 digits long plus a few extra characters for safety.
+
+	int pos = this->pipeline->getCount();
+	openObject(lindict_id);
+	writeString("<<");
+	if (pass == 2)
+	{
+	    std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
+	    int first_page_object = obj_renumber[pages[0].getObjectID()];
+	    int npages = pages.size();
+
+	    writeString(" /Linearized 1 /L ");
+	    writeString(QUtil::int_to_string(file_size + hint_length));
+	    // Implementation note 121 states that a space is
+	    // mandatory after this open bracket.
+	    writeString(" /H [ ");
+	    writeString(QUtil::int_to_string(this->xref[hint_id].getOffset()));
+	    writeString(" ");
+	    writeString(QUtil::int_to_string(hint_length));
+	    writeString(" ] /O ");
+	    writeString(QUtil::int_to_string(first_page_object));
+	    writeString(" /E ");
+	    writeString(QUtil::int_to_string(part6_end_offset + hint_length));
+	    writeString(" /N ");
+	    writeString(QUtil::int_to_string(npages));
+	    writeString(" /T ");
+	    writeString(QUtil::int_to_string(space_before_zero + hint_length));
+	}
+	writeString(" >>");
+	closeObject(lindict_id);
+	static int const pad = 150;
+	int spaces = (pos + pad - this->pipeline->getCount());
+	assert(spaces >= 0);
+	for (int i = 0; i < spaces; ++i)
+	{
+	    writeString(" ");
+	}
+	writeString("\n");
+
+	// Part 3: first page cross reference table and trailer.
+
+	int first_xref_offset = this->pipeline->getCount();
+	int hint_offset = 0;
+	if (pass == 2)
+	{
+	    hint_offset = this->xref[hint_id].getOffset();
+	}
+	if (need_xref_stream)
+	{
+	    // Must pad here too.
+	    if (pass == 1)
+	    {
+		// first_half_max_obj_offset is very likely to fall
+		// within the first 64K of the document (thus
+		// requiring two bytes for offsets) since it is the
+		// offset of the last uncompressed object in page 1.
+		// We allow for it to do otherwise though.
+		first_half_max_obj_offset = 65535;
+	    }
+	    pos = this->pipeline->getCount();
+	    writeXRefStream(first_half_xref, first_half_end,
+			    first_half_max_obj_offset,
+			    t_lin_first, first_half_start, first_half_end,
+			    first_trailer_size,
+			    hint_length + second_xref_offset,
+			    hint_id, hint_offset, hint_length);
+	    int endpos = this->pipeline->getCount();
+	    if (pass == 1)
+	    {
+		// Pad so we have enough room for the real xref
+		// stream.  In an extremely unlikely worst case,
+		// first_half_max_obj_offset could be enough larger to
+		// require two extra bytes beyond what we calculated
+		// in pass 1.  This means we need to save two extra
+		// bytes for each xref entry.  To that, we'll add 10
+		// extra bytes for number length increases.
+		int possible_extra =
+		    10 + (2 * (first_half_end - first_half_start + 1));
+		for (int i = 0; i < possible_extra; ++i)
+		{
+		    writeString(" ");
+		}
+		first_xref_end = this->pipeline->getCount();
+	    }
+	    else
+	    {
+		// Pad so that the next object starts at the same
+		// place as in pass 1.
+		for (int i = 0; i < first_xref_end - endpos; ++i)
+		{
+		    writeString(" ");
+		}
+		assert(this->pipeline->getCount() == first_xref_end);
+	    }
+	    writeString("\n");
+	}
+	else
+	{
+	    writeXRefTable(t_lin_first, first_half_start, first_half_end,
+			   first_trailer_size, hint_length + second_xref_offset,
+			   (pass == 1), hint_id, hint_offset, hint_length);
+	    writeString("startxref\n0\n%%EOF\n");
+	}
+
+	// Parts 4 through 9
+
+	for (std::list<QPDFObjectHandle>::iterator iter =
+		 this->object_queue.begin();
+	     iter != this->object_queue.end(); ++iter)
+	{
+	    QPDFObjectHandle cur_object = (*iter);
+	    if (cur_object.getObjectID() == part6_end_marker)
+	    {
+		first_half_max_obj_offset = this->pipeline->getCount();
+	    }
+	    writeObject(cur_object);
+	    if (cur_object.getObjectID() == part4_end_marker)
+	    {
+		if (this->encrypted)
+		{
+		    writeEncryptionDictionary();
+		}
+		if (pass == 1)
+		{
+		    this->xref[hint_id] =
+			QPDFXRefEntry(1, this->pipeline->getCount(), 0);
+		}
+		else
+		{
+		    // Part 5: hint stream
+		    writeBuffer(hint_buffer);
+		}
+	    }
+	    if (cur_object.getObjectID() == part6_end_marker)
+	    {
+		part6_end_offset = this->pipeline->getCount();
+	    }
+	}
+
+	// Part 10: overflow hint stream -- not used
+
+	// Part 11: main cross reference table and trailer
+
+	second_xref_offset = this->pipeline->getCount();
+	if (need_xref_stream)
+	{
+	    space_before_zero =
+		writeXRefStream(second_half_xref,
+				second_half_end, second_xref_offset,
+				t_lin_second, 0, second_half_end,
+				second_trailer_size);
+	    if (pass == 1)
+	    {
+		// Add some padding -- we need an accurate file_size
+		// number, and this could change if the pass 2 xref
+		// stream compresses differently.  There shouldn't be
+		// much difference, so we'll just pad 100 characters.
+		// This is unscientific though, and may not always
+		// work.  The only way we could really get around this
+		// would be to seek back to the beginning of the file
+		// and update /L in the linearization dictionary, but
+		// that would be the only thing in the design that
+		// would require the output file to be seekable.
+		for (int i = 0; i < 99; ++i)
+		{
+		    writeString(" ");
+		}
+		writeString("\n");
+		second_xref_end = this->pipeline->getCount();
+	    }
+	    else
+	    {
+		// Make the file size the same.
+		int pos = this->pipeline->getCount();
+		while (pos < second_xref_end + hint_length - 1)
+		{
+		    ++pos;
+		    writeString(" ");
+		}
+		writeString("\n");
+		// If this assertion fails, maybe we didn't have
+		// enough padding above.
+		assert(this->pipeline->getCount() ==
+		       second_xref_end + hint_length);
+	    }
+	}
+	else
+	{
+	    space_before_zero =
+		writeXRefTable(t_lin_second, 0, second_half_end,
+			       second_trailer_size);
+	}
+	writeString("startxref\n");
+	writeString(QUtil::int_to_string(first_xref_offset));
+	writeString("\n%%EOF\n");
+
+	if (pass == 1)
+	{
+	    // Close first pass pipeline
+	    file_size = this->pipeline->getCount();
+	    popPipelineStack();
+
+	    // Save hint offset since it will be set to zero by
+	    // calling openObject.
+	    int hint_offset = this->xref[hint_id].getOffset();
+
+	    // Write hint stream to a buffer
+	    pushPipeline(new Pl_Buffer("hint buffer"));
+	    activatePipelineStack();
+	    writeHintStream(hint_id);
+	    popPipelineStack(&hint_buffer);
+	    hint_length = hint_buffer.getPointer()->getSize();
+
+	    // Restore hint offset
+	    this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
+	}
+    }
+}
+
+void
+QPDFWriter::writeStandard()
+{
+    // Start writing
+
+    writeHeader();
+
+    // Put root first on queue.
+    QPDFObjectHandle trailer = pdf.getTrailer();
+    enqueueObject(trailer.getKey("/Root"));
+
+    // Next place any other objects referenced from the trailer
+    // dictionary into the queue, handling direct objects recursively.
+    // Root is already there, so enqueuing it a second time is a
+    // no-op.
+    std::set<std::string> keys = trailer.getKeys();
+    for (std::set<std::string>::iterator iter = keys.begin();
+	 iter != keys.end(); ++iter)
+    {
+	enqueueObject(trailer.getKey(*iter));
+    }
+
+    // Now start walking queue, output each object
+    while (this->object_queue.size())
+    {
+	QPDFObjectHandle cur_object = this->object_queue.front();
+	this->object_queue.pop_front();
+	writeObject(cur_object);
+    }
+
+    // Write out the encryption dictionary, if any
+    if (this->encrypted)
+    {
+	writeEncryptionDictionary();
+    }
+
+    // Now write out xref.  next_objid is now the number of objects.
+    off_t xref_offset = this->pipeline->getCount();
+    if (this->object_stream_to_objects.empty())
+    {
+	// Write regular cross-reference table
+	// Write regular cross-reference table
+	writeXRefTable(t_normal, 0, this->next_objid - 1, this->next_objid);
+    }
+    else
+    {
+	// Write cross-reference stream.
+	int xref_id = this->next_objid++;
+	writeXRefStream(xref_id, xref_id, xref_offset, t_normal,
+			0, this->next_objid - 1, this->next_objid);
+    }
+    writeString("startxref\n");
+    writeString(QUtil::int_to_string(xref_offset));
+    writeString("\n%%EOF\n");
+}
diff --git a/libqpdf/QPDFXRefEntry.cc b/libqpdf/QPDFXRefEntry.cc
new file mode 100644
index 00000000..669a2f13
--- /dev/null
+++ b/libqpdf/QPDFXRefEntry.cc
@@ -0,0 +1,61 @@
+
+#include <qpdf/QPDFXRefEntry.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QUtil.hh>
+
+QPDFXRefEntry::QPDFXRefEntry() :
+    type(0),
+    field1(0),
+    field2(0)
+{
+}
+
+QPDFXRefEntry::QPDFXRefEntry(int type, int field1, int field2) :
+    type(type),
+    field1(field1),
+    field2(field2)
+{
+    if ((type < 1) || (type > 2))
+    {
+	throw QPDFExc("invalid xref type " + QUtil::int_to_string(type));
+    }
+}
+
+int
+QPDFXRefEntry::getType() const
+{
+    return this->type;
+}
+
+int
+QPDFXRefEntry::getOffset() const
+{
+    if (this->type != 1)
+    {
+	throw QPDFExc(
+	    "getOffset called for xref entry of type != 1");
+    }
+    return this->field1;
+}
+
+int
+QPDFXRefEntry::getObjStreamNumber() const
+{
+    if (this->type != 2)
+    {
+	throw QPDFExc(
+	    "getObjStreamNumber called for xref entry of type != 2");
+    }
+    return this->field1;
+}
+
+int
+QPDFXRefEntry::getObjStreamIndex() const
+{
+    if (this->type != 2)
+    {
+	throw QPDFExc(
+	    "getObjStreamIndex called for xref entry of type != 2");
+    }
+    return this->field2;
+}
diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc
new file mode 100644
index 00000000..d1edbfdd
--- /dev/null
+++ b/libqpdf/QPDF_Array.cc
@@ -0,0 +1,51 @@
+
+#include <qpdf/QPDF_Array.hh>
+
+#include <qpdf/QEXC.hh>
+
+QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& items) :
+    items(items)
+{
+}
+
+QPDF_Array::~QPDF_Array()
+{
+}
+
+std::string
+QPDF_Array::unparse()
+{
+    std::string result = "[ ";
+    for (std::vector<QPDFObjectHandle>::iterator iter = this->items.begin();
+	 iter != this->items.end(); ++iter)
+    {
+	result += (*iter).unparse();
+	result += " ";
+    }
+    result += "]";
+    return result;
+}
+
+int
+QPDF_Array::getNItems() const
+{
+    return this->items.size();
+}
+
+QPDFObjectHandle
+QPDF_Array::getItem(int n) const
+{
+    if ((n < 0) || (n >= (int)this->items.size()))
+    {
+	throw QEXC::Internal("bounds array accessing QPDF_Array element");
+    }
+    return this->items[n];
+}
+
+void
+QPDF_Array::setItem(int n, QPDFObjectHandle const& oh)
+{
+    // Call getItem for bounds checking
+    (void) getItem(n);
+    this->items[n] = oh;
+}
diff --git a/libqpdf/QPDF_Bool.cc b/libqpdf/QPDF_Bool.cc
new file mode 100644
index 00000000..2b50c4c2
--- /dev/null
+++ b/libqpdf/QPDF_Bool.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/QPDF_Bool.hh>
+
+QPDF_Bool::QPDF_Bool(bool val) :
+    val(val)
+{
+}
+
+QPDF_Bool::~QPDF_Bool()
+{
+}
+
+std::string
+QPDF_Bool::unparse()
+{
+    return (val ? "true" : "false");
+}
+
+bool
+QPDF_Bool::getVal() const
+{
+    return this->val;
+}
diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc
new file mode 100644
index 00000000..654df688
--- /dev/null
+++ b/libqpdf/QPDF_Dictionary.cc
@@ -0,0 +1,84 @@
+
+#include <qpdf/QPDF_Dictionary.hh>
+
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Name.hh>
+
+QPDF_Dictionary::QPDF_Dictionary(
+    std::map<std::string, QPDFObjectHandle> const& items) :
+    items(items)
+{
+}
+
+QPDF_Dictionary::~QPDF_Dictionary()
+{
+}
+
+std::string
+QPDF_Dictionary::unparse()
+{
+    std::string result = "<< ";
+    for (std::map<std::string, QPDFObjectHandle>::iterator iter =
+	     this->items.begin();
+	 iter != this->items.end(); ++iter)
+    {
+	result += QPDF_Name::normalizeName((*iter).first) +
+	    " " + (*iter).second.unparse() + " ";
+    }
+    result += ">>";
+    return result;
+}
+
+bool
+QPDF_Dictionary::hasKey(std::string const& key)
+{
+    return ((this->items.count(key) > 0) &&
+	    (! this->items[key].isNull()));
+}
+
+QPDFObjectHandle
+QPDF_Dictionary::getKey(std::string const& key)
+{
+    // PDF spec says fetching a non-existent key from a dictionary
+    // returns the null object.
+    if (this->items.count(key))
+    {
+	// May be a null object
+	return (*(this->items.find(key))).second;
+    }
+    else
+    {
+	return QPDFObjectHandle::newNull();
+    }
+}
+
+std::set<std::string>
+QPDF_Dictionary::getKeys()
+{
+    std::set<std::string> result;
+    for (std::map<std::string, QPDFObjectHandle>::const_iterator iter =
+	     this->items.begin();
+	 iter != this->items.end(); ++iter)
+    {
+	if (hasKey((*iter).first))
+	{
+	    result.insert((*iter).first);
+	}
+    }
+    return result;
+}
+
+void
+QPDF_Dictionary::replaceKey(std::string const& key,
+			    QPDFObjectHandle const& value)
+{
+    // add or replace value
+    this->items[key] = value;
+}
+
+void
+QPDF_Dictionary::removeKey(std::string const& key)
+{
+    // no-op if key does not exist
+    this->items.erase(key);
+}
diff --git a/libqpdf/QPDF_Integer.cc b/libqpdf/QPDF_Integer.cc
new file mode 100644
index 00000000..988519d0
--- /dev/null
+++ b/libqpdf/QPDF_Integer.cc
@@ -0,0 +1,25 @@
+
+#include <qpdf/QPDF_Integer.hh>
+
+#include <qpdf/QUtil.hh>
+
+QPDF_Integer::QPDF_Integer(int val) :
+    val(val)
+{
+}
+
+QPDF_Integer::~QPDF_Integer()
+{
+}
+
+std::string
+QPDF_Integer::unparse()
+{
+    return QUtil::int_to_string(this->val);
+}
+
+int
+QPDF_Integer::getVal() const
+{
+    return this->val;
+}
diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc
new file mode 100644
index 00000000..f57ced04
--- /dev/null
+++ b/libqpdf/QPDF_Name.cc
@@ -0,0 +1,46 @@
+
+#include <qpdf/QPDF_Name.hh>
+
+QPDF_Name::QPDF_Name(std::string const& name) :
+    name(name)
+{
+}
+
+QPDF_Name::~QPDF_Name()
+{
+}
+
+std::string
+QPDF_Name::normalizeName(std::string const& name)
+{
+    std::string result;
+    char num[4];
+    result += name[0];
+    for (unsigned int i = 1; i < name.length(); ++i)
+    {
+	char ch = name[i];
+	// Don't use locale/ctype here; follow PDF spec guidlines.
+	if (strchr("#()<>[]{}/%", ch) || (ch < 33) || (ch > 126))
+	{
+	    sprintf(num, "#%02x", (unsigned char) ch);
+	    result += num;
+	}
+	else
+	{
+	    result += ch;
+	}
+    }
+    return result;
+}
+
+std::string
+QPDF_Name::unparse()
+{
+    return normalizeName(this->name);
+}
+
+std::string
+QPDF_Name::getName() const
+{
+    return this->name;
+}
diff --git a/libqpdf/QPDF_Null.cc b/libqpdf/QPDF_Null.cc
new file mode 100644
index 00000000..57a78b7e
--- /dev/null
+++ b/libqpdf/QPDF_Null.cc
@@ -0,0 +1,12 @@
+
+#include <qpdf/QPDF_Null.hh>
+
+QPDF_Null::~QPDF_Null()
+{
+}
+
+std::string
+QPDF_Null::unparse()
+{
+    return "null";
+}
diff --git a/libqpdf/QPDF_Real.cc b/libqpdf/QPDF_Real.cc
new file mode 100644
index 00000000..87a19cb2
--- /dev/null
+++ b/libqpdf/QPDF_Real.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/QPDF_Real.hh>
+
+QPDF_Real::QPDF_Real(std::string const& val) :
+    val(val)
+{
+}
+
+QPDF_Real::~QPDF_Real()
+{
+}
+
+std::string
+QPDF_Real::unparse()
+{
+    return this->val;
+}
+
+std::string
+QPDF_Real::getVal()
+{
+    return this->val;
+}
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
new file mode 100644
index 00000000..9694f837
--- /dev/null
+++ b/libqpdf/QPDF_Stream.cc
@@ -0,0 +1,309 @@
+
+#include <qpdf/QPDF_Stream.hh>
+
+#include <qpdf/QEXC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pipeline.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_PNGFilter.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_ASCII85Decoder.hh>
+#include <qpdf/Pl_ASCIIHexDecoder.hh>
+#include <qpdf/Pl_LZWDecoder.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/Pl_QPDFTokenizer.hh>
+
+QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
+			 QPDFObjectHandle stream_dict,
+			 off_t offset, int length) :
+    qpdf(qpdf),
+    objid(objid),
+    generation(generation),
+    stream_dict(stream_dict),
+    offset(offset),
+    length(length)
+{
+    if (! stream_dict.isDictionary())
+    {
+	throw QEXC::Internal("stream object instantiated with non-dictionary "
+			     "object for dictionary");
+    }
+}
+
+QPDF_Stream::~QPDF_Stream()
+{
+}
+
+std::string
+QPDF_Stream::unparse()
+{
+    // Unparse stream objects as indirect references
+    return QUtil::int_to_string(this->objid) + " " +
+	QUtil::int_to_string(this->generation) + " R";
+}
+
+QPDFObjectHandle
+QPDF_Stream::getDict() const
+{
+    return this->stream_dict;
+}
+
+PointerHolder<Buffer>
+QPDF_Stream::getStreamData()
+{
+    Pl_Buffer buf("stream data buffer");
+    if (! pipeStreamData(&buf, true, false, false))
+    {
+	throw QPDFExc("getStreamData called on unfilterable stream");
+    }
+    return buf.getBuffer();
+}
+
+bool
+QPDF_Stream::filterable(std::vector<std::string>& filters,
+			int& predictor, int& columns,
+			bool& early_code_change)
+{
+    // Initialize values to their defaults as per the PDF spec
+    predictor = 1;
+    columns = 0;
+    early_code_change = true;
+
+    bool filterable = true;
+
+    // See if we can support any decode parameters that are specified.
+
+    QPDFObjectHandle decode_obj =
+	this->stream_dict.getKey("/DecodeParms");
+    if (decode_obj.isNull())
+    {
+	// no problem
+    }
+    else if (decode_obj.isDictionary())
+    {
+	std::set<std::string> keys = decode_obj.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    std::string const& key = *iter;
+	    if (key == "/Predictor")
+	    {
+		QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
+		if (predictor_obj.isInteger())
+		{
+		    predictor = predictor_obj.getIntValue();
+		    if (! ((predictor == 1) || (predictor == 12)))
+		    {
+			filterable = false;
+		    }
+		}
+		else
+		{
+		    filterable = false;
+		}
+	    }
+	    else if (key == "/EarlyChange")
+	    {
+		QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
+		if (earlychange_obj.isInteger())
+		{
+		    int earlychange = earlychange_obj.getIntValue();
+		    early_code_change = (earlychange == 1);
+		    if (! ((earlychange == 0) || (earlychange == 1)))
+		    {
+			filterable = false;
+		    }
+		}
+		else
+		{
+		    filterable = false;
+		}
+	    }
+	    else if (key == "/Columns")
+	    {
+		QPDFObjectHandle columns_obj = decode_obj.getKey(key);
+		if (columns_obj.isInteger())
+		{
+		    columns = columns_obj.getIntValue();
+		}
+		else
+		{
+		    filterable = false;
+		}
+	    }
+	    else
+	    {
+		filterable = false;
+	    }
+	}
+    }
+    else
+    {
+	throw QPDFExc(qpdf->getFilename(), this->offset,
+		      "invalid decode parameters object type for this stream");
+    }
+
+    if ((predictor > 1) && (columns == 0))
+    {
+	// invalid
+	filterable = false;
+    }
+
+    if (! filterable)
+    {
+	return false;
+    }
+
+    // Check filters
+
+    QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
+    bool filters_okay = true;
+
+    if (filter_obj.isNull())
+    {
+	// No filters
+    }
+    else if (filter_obj.isName())
+    {
+	// One filter
+	filters.push_back(filter_obj.getName());
+    }
+    else if (filter_obj.isArray())
+    {
+	// Potentially multiple filters
+	int n = filter_obj.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    QPDFObjectHandle item = filter_obj.getArrayItem(i);
+	    if (item.isName())
+	    {
+		filters.push_back(item.getName());
+	    }
+	    else
+	    {
+		filters_okay = false;
+	    }
+	}
+    }
+    else
+    {
+	filters_okay = false;
+    }
+
+    if (! filters_okay)
+    {
+	QTC::TC("qpdf", "QPDF_Stream invalid filter");
+	throw QPDFExc(qpdf->getFilename(), this->offset,
+		      "invalid filter object type for this stream");
+    }
+
+    // `filters' now contains a list of filters to be applied in
+    // order.  See which ones we can support.
+
+    for (std::vector<std::string>::iterator iter = filters.begin();
+	 iter != filters.end(); ++iter)
+    {
+	std::string const& filter = *iter;
+	if (! ((filter == "/FlateDecode") ||
+	       (filter == "/LZWDecode") ||
+	       (filter == "/ASCII85Decode") ||
+	       (filter == "/ASCIIHexDecode")))
+	{
+	    filterable = false;
+	}
+    }
+
+    return filterable;
+}
+
+bool
+QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
+			    bool normalize, bool compress)
+{
+    std::vector<std::string> filters;
+    int predictor = 1;
+    int columns = 0;
+    bool early_code_change = true;
+    if (filter)
+    {
+	filter = filterable(filters, predictor, columns, early_code_change);
+    }
+
+    if (pipeline == 0)
+    {
+	QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
+	return filter;
+    }
+
+    // Construct the pipeline in reverse order.  Force pipelines we
+    // create to be deleted when this function finishes.
+    std::vector<PointerHolder<Pipeline> > to_delete;
+
+    if (filter)
+    {
+	if (compress)
+	{
+	    pipeline = new Pl_Flate("compress object stream", pipeline,
+				    Pl_Flate::a_deflate);
+	    to_delete.push_back(pipeline);
+	}
+
+	if (normalize)
+	{
+	    pipeline = new Pl_QPDFTokenizer("normalizer", pipeline);
+	    to_delete.push_back(pipeline);
+	}
+
+	for (std::vector<std::string>::reverse_iterator iter = filters.rbegin();
+	     iter != filters.rend(); ++iter)
+	{
+	    std::string const& filter = *iter;
+	    if (filter == "/FlateDecode")
+	    {
+		if (predictor == 12)
+		{
+		    QTC::TC("qpdf", "QPDF_Stream PNG filter");
+		    pipeline = new Pl_PNGFilter(
+			"png decode", pipeline, Pl_PNGFilter::a_decode,
+			columns, 0 /* not used */);
+		    to_delete.push_back(pipeline);
+		}
+
+		pipeline = new Pl_Flate("stream inflate",
+					pipeline, Pl_Flate::a_inflate);
+		to_delete.push_back(pipeline);
+	    }
+	    else if (filter == "/ASCII85Decode")
+	    {
+		pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
+		to_delete.push_back(pipeline);
+	    }
+	    else if (filter == "/ASCIIHexDecode")
+	    {
+		pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
+		to_delete.push_back(pipeline);
+	    }
+	    else if (filter == "/LZWDecode")
+	    {
+		pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
+					     early_code_change);
+		to_delete.push_back(pipeline);
+	    }
+	    else
+	    {
+		throw QEXC::Internal("QPDFStream: unknown filter "
+				     "encountered after check");
+	    }
+	}
+    }
+
+    QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
+			       this->offset, this->length,
+			       this->stream_dict, pipeline);
+
+    return filter;
+}
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
new file mode 100644
index 00000000..cc8ca042
--- /dev/null
+++ b/libqpdf/QPDF_String.cc
@@ -0,0 +1,178 @@
+
+#include <qpdf/QPDF_String.hh>
+
+#include <qpdf/QUtil.hh>
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+#include <string.h>
+
+// See above about ctype.
+static bool is_iso_latin1_printable(unsigned char ch)
+{
+    return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
+}
+
+QPDF_String::QPDF_String(std::string const& val) :
+    val(val)
+{
+}
+
+QPDF_String::~QPDF_String()
+{
+}
+
+std::string
+QPDF_String::unparse()
+{
+    return unparse(false);
+}
+
+std::string
+QPDF_String::unparse(bool force_binary)
+{
+    bool use_hexstring = force_binary;
+    if (! use_hexstring)
+    {
+	unsigned int nonprintable = 0;
+	int consecutive_printable = 0;
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    char ch = this->val[i];
+	    // Note: do not use locale to determine printability.  The PDF
+	    // specification accepts arbitrary binary data.  Some locales
+	    // imply multibyte characters.  We'll consider something
+	    // printable if it is printable in ISO-Latin-1.  We'll code
+	    // this manually rather than being rude and setting locale.
+	    if ((ch == 0) || (! (is_iso_latin1_printable(ch) ||
+				 strchr("\n\r\t\b\f", ch))))
+	    {
+		++nonprintable;
+		consecutive_printable = 0;
+	    }
+	    else
+	    {
+		if (++consecutive_printable > 5)
+		{
+		    // If there are more than 5 consecutive printable
+		    // characters, I want to see them as such.
+		    nonprintable = 0;
+		    break;
+		}
+	    }
+	}
+
+	// Use hex notation if more than 20% of the characters are not
+	// printable in the current locale.  Uniformly distributed random
+	// characters will not pass this test even with ISO-Latin-1 in
+	// which 76% are either printable or in the set of standard
+	// escaped characters.
+	if (5 * nonprintable > val.length())
+	{
+	    use_hexstring = true;
+	}
+    }
+    std::string result;
+    if (use_hexstring)
+    {
+	result += "<";
+	char num[3];
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    sprintf(num, "%02x", (unsigned char) this->val[i]);
+	    result += num;
+	}
+	result += ">";
+    }
+    else
+    {
+	result += "(";
+	char num[5];
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    char ch = this->val[i];
+	    switch (ch)
+	    {
+	      case '\n':
+		result += "\\n";
+		break;
+
+	      case '\r':
+		result += "\\r";
+		break;
+
+	      case '\t':
+		result += "\\t";
+		break;
+
+	      case '\b':
+		result += "\\b";
+		break;
+
+	      case '\f':
+		result += "\\f";
+		break;
+
+	      case '(':
+		result += "\\(";
+		break;
+
+	      case ')':
+		result += "\\)";
+		break;
+
+	      case '\\':
+		result += "\\\\";
+		break;
+
+	      default:
+		if (is_iso_latin1_printable(ch))
+		{
+		    result += this->val[i];
+		}
+		else
+		{
+		    sprintf(num, "\\%03o", (unsigned char)ch);
+		    result += num;
+		}
+		break;
+	    }
+	}
+	result += ")";
+    }
+
+    return result;
+}
+
+std::string
+QPDF_String::getVal() const
+{
+    return this->val;
+}
+
+std::string
+QPDF_String::getUTF8Val() const
+{
+    std::string result;
+    unsigned int len = this->val.length();
+    if ((len >= 2) && (len % 2 == 0) &&
+	(this->val[0] == '\xfe') && (this->val[1] == '\xff'))
+    {
+	// This is a Unicode string using big-endian UTF-16.  This
+	// code is not actually correct as it doesn't properly handle
+	// characters past 0xffff.
+	for (unsigned int i = 2; i < len; i += 2)
+	{
+	    result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
+				    ((unsigned char) this->val[i+1]));
+	}
+    }
+    else
+    {
+	for (unsigned int i = 0; i < len; ++i)
+	{
+	    result += QUtil::toUTF8((unsigned char) this->val[i]);
+	}
+    }
+    return result;
+}
diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc
new file mode 100644
index 00000000..e5e2d8be
--- /dev/null
+++ b/libqpdf/QPDF_encryption.cc
@@ -0,0 +1,441 @@
+// This file implements methods from the QPDF class that involve
+// encryption.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QPDFExc.hh>
+
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/RC4.hh>
+#include <qpdf/MD5.hh>
+
+static char const padding_string[] = {
+    0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
+    0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
+    0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
+    0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a
+};
+
+static unsigned int const O_key_bytes = sizeof(MD5::Digest);
+static unsigned int const id_bytes = 16;
+static unsigned int const key_bytes = 32;
+
+void
+pad_or_truncate_password(std::string const& password, char k1[key_bytes])
+{
+    int password_bytes = std::min(key_bytes, password.length());
+    int pad_bytes = key_bytes - password_bytes;
+    memcpy(k1, password.c_str(), password_bytes);
+    memcpy(k1 + password_bytes, padding_string, pad_bytes);
+}
+
+void
+QPDF::trim_user_password(std::string& user_password)
+{
+    // Although unnecessary, this routine trims the padding string
+    // from the end of a user password.  Its only purpose is for
+    // recovery of user passwords which is done in the test suite.
+    char const* cstr = user_password.c_str();
+    size_t len = user_password.length();
+    if (len < key_bytes)
+    {
+	return;
+    }
+
+    char* p = 0;
+    while ((p = strchr(cstr, '\x28')) != 0)
+    {
+	if (memcmp(p, padding_string, len - (p - cstr)) == 0)
+	{
+	    user_password = user_password.substr(0, p - cstr);
+	    return;
+	}
+    }
+}
+
+static std::string
+pad_or_truncate_password(std::string const& password)
+{
+    char k1[key_bytes];
+    pad_or_truncate_password(password, k1);
+    return std::string(k1, key_bytes);
+}
+
+static void
+iterate_md5_digest(MD5& md5, MD5::Digest& digest, int iterations)
+{
+    md5.digest(digest);
+
+    for (int i = 0; i < iterations; ++i)
+    {
+	MD5 m;
+	m.encodeDataIncrementally((char*)digest, sizeof(digest));
+	m.digest(digest);
+    }
+}
+
+
+static void
+iterate_rc4(unsigned char* data, int data_len,
+	    unsigned char* okey, int key_len,
+	    int iterations, bool reverse)
+{
+    unsigned char* key = new unsigned char[key_len];
+    for (int i = 0; i < iterations; ++i)
+    {
+	int const xor_value = (reverse ? iterations - 1 - i : i);
+	for (int j = 0; j < key_len; ++j)
+	{
+	    key[j] = okey[j] ^ xor_value;
+	}
+	RC4 rc4(key, key_len);
+	rc4.process(data, data_len);
+    }
+    delete [] key;
+}
+
+std::string
+QPDF::compute_data_key(std::string const& encryption_key,
+		       int objid, int generation)
+{
+    // Algorithm 3.1 from the PDF 1.4 Reference Manual
+
+    std::string result = encryption_key;
+
+    // Append low three bytes of object ID and low two bytes of generation
+    result += (char) (objid & 0xff);
+    result += (char) ((objid >> 8) & 0xff);
+    result += (char) ((objid >> 16) & 0xff);
+    result += (char) (generation & 0xff);
+    result += (char) ((generation >> 8) & 0xff);
+
+    MD5 md5;
+    md5.encodeDataIncrementally(result.c_str(), result.length());
+    MD5::Digest digest;
+    md5.digest(digest);
+    return std::string((char*) digest,
+		       std::min(result.length(), (size_t) 16));
+}
+
+std::string
+QPDF::compute_encryption_key(
+    std::string const& password, EncryptionData const& data)
+{
+    // Algorithm 3.2 from the PDF 1.4 Reference Manual
+
+    MD5 md5;
+    md5.encodeDataIncrementally(
+	pad_or_truncate_password(password).c_str(), key_bytes);
+    md5.encodeDataIncrementally(data.O.c_str(), key_bytes);
+    char pbytes[4];
+    pbytes[0] = (char) (data.P & 0xff);
+    pbytes[1] = (char) ((data.P >> 8) & 0xff);
+    pbytes[2] = (char) ((data.P >> 16) & 0xff);
+    pbytes[3] = (char) ((data.P >> 24) & 0xff);
+    md5.encodeDataIncrementally(pbytes, 4);
+    md5.encodeDataIncrementally(data.id1.c_str(), id_bytes);
+    MD5::Digest digest;
+    iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0));
+    return std::string((char*)digest, data.Length_bytes);
+}
+
+static void
+compute_O_rc4_key(std::string const& user_password,
+		  std::string const& owner_password,
+		  QPDF::EncryptionData const& data,
+		  unsigned char key[O_key_bytes])
+{
+    std::string password = owner_password;
+    if (password.empty())
+    {
+	password = user_password;
+    }
+    MD5 md5;
+    md5.encodeDataIncrementally(
+	pad_or_truncate_password(password).c_str(), key_bytes);
+    MD5::Digest digest;
+    iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0));
+    memcpy(key, digest, O_key_bytes);
+}
+
+static std::string
+compute_O_value(std::string const& user_password,
+		std::string const& owner_password,
+		QPDF::EncryptionData const& data)
+{
+    // Algorithm 3.3 from the PDF 1.4 Reference Manual
+
+    unsigned char O_key[O_key_bytes];
+    compute_O_rc4_key(user_password, owner_password, data, O_key);
+
+    char upass[key_bytes];
+    pad_or_truncate_password(user_password, upass);
+    iterate_rc4((unsigned char*) upass, key_bytes,
+		O_key, data.Length_bytes, (data.R == 3) ? 20 : 1, false);
+    return std::string(upass, key_bytes);
+}
+
+static
+std::string
+compute_U_value_R2(std::string const& user_password,
+		   QPDF::EncryptionData const& data)
+{
+    // Algorithm 3.4 from the PDF 1.4 Reference Manual
+
+    std::string k1 = QPDF::compute_encryption_key(user_password, data);
+    char udata[key_bytes];
+    pad_or_truncate_password("", udata);
+    iterate_rc4((unsigned char*) udata, key_bytes,
+		(unsigned char*)k1.c_str(), data.Length_bytes, 1, false);
+    return std::string(udata, key_bytes);
+}
+
+static
+std::string
+compute_U_value_R3(std::string const& user_password,
+		   QPDF::EncryptionData const& data)
+{
+    // Algorithm 3.5 from the PDF 1.4 Reference Manual
+
+    std::string k1 = QPDF::compute_encryption_key(user_password, data);
+    MD5 md5;
+    md5.encodeDataIncrementally(
+	pad_or_truncate_password("").c_str(), key_bytes);
+    md5.encodeDataIncrementally(data.id1.c_str(), data.id1.length());
+    MD5::Digest digest;
+    md5.digest(digest);
+    iterate_rc4(digest, sizeof(MD5::Digest),
+		(unsigned char*) k1.c_str(), data.Length_bytes, 20, false);
+    char result[key_bytes];
+    memcpy(result, digest, sizeof(MD5::Digest));
+    // pad with arbitrary data -- make it consistent for the sake of
+    // testing
+    for (unsigned int i = sizeof(MD5::Digest); i < key_bytes; ++i)
+    {
+	result[i] = (char)((i * i) % 0xff);
+    }
+    return std::string(result, key_bytes);
+}
+
+static std::string
+compute_U_value(std::string const& user_password,
+		QPDF::EncryptionData const& data)
+{
+    if (data.R == 3)
+    {
+	return compute_U_value_R3(user_password, data);
+    }
+
+    return compute_U_value_R2(user_password, data);
+}
+
+static bool
+check_user_password(std::string const& user_password,
+		    QPDF::EncryptionData const& data)
+{
+    // Algorithm 3.6 from the PDF 1.4 Reference Manual
+
+    std::string u_value = compute_U_value(user_password, data);
+    int to_compare = ((data.R == 3) ? sizeof(MD5::Digest) : key_bytes);
+    return (memcmp(data.U.c_str(), u_value.c_str(), to_compare) == 0);
+}
+
+static bool
+check_owner_password(std::string& user_password,
+		     std::string const& owner_password,
+		     QPDF::EncryptionData const& data)
+{
+    // Algorithm 3.7 from the PDF 1.4 Reference Manual
+
+    unsigned char key[O_key_bytes];
+    compute_O_rc4_key(user_password, owner_password, data, key);
+    unsigned char O_data[key_bytes];
+    memcpy(O_data, (unsigned char*) data.O.c_str(), key_bytes);
+    iterate_rc4(O_data, key_bytes, key, data.Length_bytes,
+		(data.R == 3) ? 20 : 1, true);
+    std::string new_user_password =
+	std::string((char*)O_data, key_bytes);
+    bool result = false;
+    if (check_user_password(new_user_password, data))
+    {
+	result = true;
+	user_password = new_user_password;
+    }
+    return result;
+}
+
+void
+QPDF::initializeEncryption()
+{
+    if (this->encryption_initialized)
+    {
+	return;
+    }
+    this->encryption_initialized = true;
+
+    // After we initialize encryption parameters, we must used stored
+    // key information and never look at /Encrypt again.  Otherwise,
+    // things could go wrong if someone mutates the encryption
+    // dictionary.
+
+    if (! this->trailer.hasKey("/Encrypt"))
+    {
+	return;
+    }
+
+    QPDFObjectHandle id_obj = this->trailer.getKey("/ID");
+    if (! (id_obj.isArray() &&
+	   (id_obj.getArrayNItems() == 2) &&
+	   id_obj.getArrayItem(0).isString()))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "invalid /ID in trailer dictionary");
+    }
+
+    std::string id1 = id_obj.getArrayItem(0).getStringValue();
+    if (id1.length() != id_bytes)
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "first /ID string in trailer dictionary has "
+		      "incorrect length");
+    }
+
+    QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt");
+    if (! encryption_dict.isDictionary())
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "/Encrypt in trailer dictionary is not a dictionary");
+    }
+
+    if (! (encryption_dict.getKey("/Filter").isName() &&
+	   (encryption_dict.getKey("/Filter").getName() == "/Standard")))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "unsupported encryption filter");
+    }
+
+    if (! (encryption_dict.getKey("/V").isInteger() &&
+	   encryption_dict.getKey("/R").isInteger() &&
+	   encryption_dict.getKey("/O").isString() &&
+	   encryption_dict.getKey("/U").isString() &&
+	   encryption_dict.getKey("/P").isInteger()))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "some encryption dictionary parameters are missing "
+		      "or the wrong type");
+    }
+
+    int V = encryption_dict.getKey("/V").getIntValue();
+    int R = encryption_dict.getKey("/R").getIntValue();
+    std::string O = encryption_dict.getKey("/O").getStringValue();
+    std::string U = encryption_dict.getKey("/U").getStringValue();
+    unsigned int P = (unsigned int) encryption_dict.getKey("/P").getIntValue();
+
+    if (! (((R == 2) || (R == 3)) &&
+	   ((V == 1) || (V == 2))))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "Unsupported /R or /V in encryption dictionary");
+    }
+
+    if (! ((O.length() == key_bytes) && (U.length() == key_bytes)))
+    {
+	throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+		      "incorrect length for /O and/or /P in "
+		      "encryption dictionary");
+    }
+
+    int Length = 40;
+    if (encryption_dict.getKey("/Length").isInteger())
+    {
+	Length = encryption_dict.getKey("/Length").getIntValue();
+	if ((Length % 8) || (Length < 40) || (Length > 128))
+	{
+	    throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+			  "invalid /Length value in encryption dictionary");
+	}
+    }
+
+    EncryptionData data(V, R, Length / 8, P, O, U, id1);
+    if (check_owner_password(this->user_password, this->provided_password, data))
+    {
+	// password supplied was owner password; user_password has
+	// been initialized
+    }
+    else if (check_user_password(this->provided_password, data))
+    {
+	this->user_password = this->provided_password;
+    }
+    else
+    {
+	throw QPDFExc(this->file.getName() + ": invalid password");
+    }
+
+    this->encrypted = true;
+    this->encryption_key = compute_encryption_key(this->user_password, data);
+}
+
+std::string
+QPDF::getKeyForObject(int objid, int generation)
+{
+    if (! this->encrypted)
+    {
+	throw QEXC::Internal("request for encryption key in non-encrypted PDF");
+    }
+
+    if (! ((objid == this->cached_key_objid) &&
+	   (generation == this->cached_key_generation)))
+    {
+	this->cached_object_encryption_key =
+	    compute_data_key(this->encryption_key, objid, generation);
+	this->cached_key_objid = objid;
+	this->cached_key_generation = generation;
+    }
+
+    return this->cached_object_encryption_key;
+}
+
+void
+QPDF::decryptString(std::string& str, int objid, int generation)
+{
+    if (objid == 0)
+    {
+	return;
+    }
+    std::string key = getKeyForObject(objid, generation);
+    char* tmp = QUtil::copy_string(str);
+    unsigned int vlen = str.length();
+    RC4 rc4((unsigned char const*)key.c_str(), key.length());
+    rc4.process((unsigned char*)tmp, vlen);
+    str = std::string(tmp, vlen);
+    delete [] tmp;
+}
+
+void
+QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
+		    std::vector<PointerHolder<Pipeline> >& heap)
+{
+    std::string key = getKeyForObject(objid, generation);
+    pipeline = new Pl_RC4("stream decryption", pipeline,
+			  (unsigned char*) key.c_str(), key.length());
+    heap.push_back(pipeline);
+}
+
+void
+QPDF::compute_encryption_O_U(
+    char const* user_password, char const* owner_password,
+    int V, int R, int key_len, unsigned long P,
+    std::string const& id1, std::string& O, std::string& U)
+{
+    EncryptionData data(V, R, key_len, P, "", "", id1);
+    data.O = compute_O_value(user_password, owner_password, data);
+    O = data.O;
+    U = compute_U_value(user_password, data);
+}
+
+std::string const&
+QPDF::getUserPassword() const
+{
+    return this->user_password;
+}
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
new file mode 100644
index 00000000..6c0cf3be
--- /dev/null
+++ b/libqpdf/QPDF_linearization.cc
@@ -0,0 +1,2103 @@
+// See doc/linearization.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QTC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/PCRE.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_Count.hh>
+#include <qpdf/BitWriter.hh>
+#include <qpdf/BitStream.hh>
+
+#include <iostream>
+#include <algorithm>
+#include <assert.h>
+#include <math.h>
+
+template <class T>
+static void
+load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
+		int bits_wanted, int T::*field)
+{
+    // nitems times, read bits_wanted from the given bit stream,
+    // storing results in the ith vector entry.
+
+    for (int i = 0; i < nitems; ++i)
+    {
+	vec[i].*field = bit_stream.getBits(bits_wanted);
+    }
+    // The PDF spec says that each hint table starts at a byte
+    // boundary.  Each "row" actually must start on a byte boundary.
+    bit_stream.skipToNextByte();
+}
+
+template <class T>
+static void
+load_vector_vector(BitStream& bit_stream,
+		   int nitems1, std::vector<T>& vec1, int T::*nitems2,
+		   int bits_wanted, std::vector<int> T::*vec2)
+{
+    // nitems1 times, read nitems2 (from the ith element of vec1) items
+    // into the vec2 vector field of the ith item of vec1.
+    for (int i1 = 0; i1 < nitems1; ++i1)
+    {
+	for (int i2 = 0; i2 < vec1[i1].*nitems2; ++i2)
+	{
+	    (vec1[i1].*vec2).push_back(bit_stream.getBits(bits_wanted));
+	}
+    }
+    bit_stream.skipToNextByte();
+}
+
+bool
+QPDF::checkLinearization()
+{
+    bool result = false;
+    try
+    {
+	readLinearizationData();
+	result = checkLinearizationInternal();
+    }
+    catch (QPDFExc& e)
+    {
+	std::cout << e.what() << std::endl;
+    }
+    return result;
+}
+
+bool
+QPDF::isLinearized()
+{
+    // If the first object in the file is a dictionary with a suitable
+    // /Linearized key and has an /L key that accurately indicates the
+    // file size, initialize this->lindict and return true.
+
+    // A linearized PDF spec's first object will be contained within
+    // the first 1024 bytes of the file and will be a dictionary with
+    // a valid /Linearized key.  This routine looks for that and does
+    // no additional validation.
+
+    // The PDF spec says the linearization dictionary must be
+    // completely contained within the first 1024 bytes of the file.
+    // Add a byte for a null terminator.
+    static int const tbuf_size = 1025;
+
+    char* buf = new char[tbuf_size];
+    this->file.seek(0, SEEK_SET);
+    PointerHolder<char> b(buf);	// guarantee deletion
+    memset(buf, '\0', tbuf_size);
+    this->file.read(buf, tbuf_size - 1);
+
+    static PCRE lindict_re("(?s:(\\d+)\\s+0\\s+obj\\s*<<)");
+
+    off_t offset = -1;
+    int lindict_obj = 0;
+    char* p = buf;
+    while (lindict_obj == 0)
+    {
+	PCRE::Match m(lindict_re.match(p));
+	if (m)
+	{
+	    offset = m.getOffset(0) + (p - buf);
+	    lindict_obj = atoi(m.getMatch(1).c_str());
+	    if (m.getMatch(0).find('\n') != std::string::npos)
+	    {
+		QTC::TC("qpdf", "QPDF lindict found newline");
+	    }
+	}
+	else
+	{
+	    if ((p = (char*)memchr(p, '\0', tbuf_size - (p - buf))) != 0)
+	    {
+		QTC::TC("qpdf", "QPDF lindict null found");
+		while ((p - buf < tbuf_size) && (*p == 0))
+		{
+		    ++p;
+		}
+		if ((p - buf) == tbuf_size)
+		{
+		    break;
+		}
+		QTC::TC("qpdf", "QPDF lindict searching after null");
+	    }
+	}
+    }
+
+    if (lindict_obj == 0)
+    {
+	return false;
+    }
+
+    QPDFObjectHandle candidate = QPDFObjectHandle::Factory::newIndirect(
+	this, lindict_obj, 0);
+    if (! candidate.isDictionary())
+    {
+	return false;
+    }
+
+    QPDFObjectHandle linkey = candidate.getKey("/Linearized");
+    if (! (linkey.isNumber() && ((int)floor(linkey.getNumericValue()) == 1)))
+    {
+	return false;
+    }
+
+    QPDFObjectHandle L = candidate.getKey("/L");
+    if (L.isInteger())
+    {
+	int Li = L.getIntValue();
+	this->file.seek(0, SEEK_END);
+	if (Li != this->file.tell())
+	{
+	    QTC::TC("qpdf", "QPDF /L mismatch");
+	    return false;
+	}
+	else
+	{
+	    this->linp.file_size = Li;
+	}
+    }
+
+    this->lindict = candidate;
+
+    return true;
+}
+
+void
+QPDF::readLinearizationData()
+{
+    // This function throws an exception (which is trapped by
+    // checkLinearization()) for any errors that prevent loading.
+
+    // Hint table parsing code needs at least 32 bits in a long.
+    assert(sizeof(long) >= 4);
+
+    if (! isLinearized())
+    {
+	throw QPDFExc(this->file.getName() + " is not linearized");
+    }
+
+    // /L is read and stored in linp by isLinearized()
+    QPDFObjectHandle H = lindict.getKey("/H");
+    QPDFObjectHandle O = lindict.getKey("/O");
+    QPDFObjectHandle E = lindict.getKey("/E");
+    QPDFObjectHandle N = lindict.getKey("/N");
+    QPDFObjectHandle T = lindict.getKey("/T");
+    QPDFObjectHandle P = lindict.getKey("/P");
+
+    if (! (H.isArray() &&
+	   O.isInteger() &&
+	   E.isInteger() &&
+	   N.isInteger() &&
+	   T.isInteger() &&
+	   (P.isInteger() || P.isNull())))
+    {
+	throw QPDFExc("some keys in linearization dictionary are of "
+		      "the wrong type");
+    }
+
+    // Hint table array: offset length [ offset length ]
+    unsigned int n_H_items = H.getArrayNItems();
+    if (! ((n_H_items == 2) || (n_H_items == 4)))
+    {
+	throw QPDFExc("H has the wrong number of items");
+    }
+
+    std::vector<int> H_items;
+    for (unsigned int i = 0; i < n_H_items; ++i)
+    {
+	QPDFObjectHandle oh(H.getArrayItem(i));
+	if (oh.isInteger())
+	{
+	    H_items.push_back(oh.getIntValue());
+	}
+	else
+	{
+	    throw QPDFExc("some H items are of the wrong type");
+	}
+    }
+
+    // H: hint table offset/length for primary and overflow hint tables
+    int H0_offset = H_items[0];
+    int H0_length = H_items[1];
+    int H1_offset = 0;
+    int H1_length = 0;
+    if (H_items.size() == 4)
+    {
+	// Acrobat doesn't read or write these (as PDF 1.4), so we
+	// don't have a way to generate a test case.
+	// QTC::TC("qpdf", "QPDF overflow hint table");
+	H1_offset = H_items[2];
+	H1_length = H_items[3];
+    }
+
+    // P: first page number
+    int first_page = 0;
+    if (P.isInteger())
+    {
+	QTC::TC("qpdf", "QPDF P present in lindict");
+	first_page = P.getIntValue();
+    }
+    else
+    {
+	QTC::TC("qpdf", "QPDF P absent in lindict");
+    }
+
+    // Store linearization parameter data
+
+    // file_size initialized by isLinearized()
+    this->linp.first_page_object = O.getIntValue();
+    this->linp.first_page_end = E.getIntValue();
+    this->linp.npages = N.getIntValue();
+    this->linp.xref_zero_offset = T.getIntValue();
+    this->linp.first_page = first_page;
+    this->linp.H_offset = H0_offset;
+    this->linp.H_length = H0_length;
+
+    // Read hint streams
+
+    Pl_Buffer pb("hint buffer");
+    QPDFObjectHandle H0 = readHintStream(pb, H0_offset, H0_length);
+    if (H1_offset)
+    {
+	(void) readHintStream(pb, H1_offset, H1_length);
+    }
+
+    // PDF 1.4 hint tables that we ignore:
+
+    //  /T    thumbnail
+    //  /A    thread information
+    //  /E    named destination
+    //  /V    interactive form
+    //  /I    information dictionary
+    //  /C    logical structure
+    //  /L    page label
+
+    // Individual hint table offsets
+    QPDFObjectHandle HS = H0.getKey("/S"); // shared object
+    QPDFObjectHandle HO = H0.getKey("/O"); // outline
+
+    PointerHolder<Buffer> hbp = pb.getBuffer();
+    Buffer* hb = hbp.getPointer();
+    unsigned char const* h_buf = hb->getBuffer();
+    int h_size = hb->getSize();
+
+    readHPageOffset(BitStream(h_buf, h_size));
+
+    int HSi = HS.getIntValue();
+    readHSharedObject(BitStream(h_buf + HSi, h_size - HSi));
+
+    if (HO.isInteger())
+    {
+	int HOi = HO.getIntValue();
+	readHGeneric(BitStream(h_buf + HOi, h_size - HOi),
+		     this->outline_hints);
+    }
+}
+
+QPDFObjectHandle
+QPDF::readHintStream(Pipeline& pl, off_t offset, size_t length)
+{
+    int obj;
+    int gen;
+    QPDFObjectHandle H = readObjectAtOffset(offset, 0, 0, obj, gen);
+    ObjCache& oc = this->obj_cache[ObjGen(obj, gen)];
+    off_t min_end_offset = oc.end_before_space;
+    off_t max_end_offset = oc.end_after_space;
+    if (! H.isStream())
+    {
+	throw QPDFExc("hint table is not a stream");
+    }
+
+    QPDFObjectHandle Hdict = H.getDict();
+
+    // Some versions of Acrobat make /Length indirect and place it
+    // immediately after the stream, increasing length to cover it,
+    // even though the specification says all objects in the
+    // linearization parameter dictionary must be direct.  We have to
+    // get the file position of the end of length in this case.
+    QPDFObjectHandle length_obj = Hdict.getKey("/Length");
+    if (length_obj.isIndirect())
+    {
+	QTC::TC("qpdf", "QPDF hint table length indirect");
+	// Force resolution
+	(void) length_obj.getIntValue();
+	ObjCache& oc = this->obj_cache
+	    [ObjGen(length_obj.getObjectID(),
+		    length_obj.getGeneration())];
+	min_end_offset = oc.end_before_space;
+	max_end_offset = oc.end_after_space;
+    }
+    else
+    {
+	QTC::TC("qpdf", "QPDF hint table length direct");
+    }
+    off_t computed_end = offset + length;
+    if ((computed_end < min_end_offset) ||
+	(computed_end > max_end_offset))
+    {
+	std::cout << "expected = " << computed_end
+		  << "; actual = " << min_end_offset << ".."
+		  << max_end_offset << std::endl;
+	throw QPDFExc("hint table length mismatch");
+    }
+    H.pipeStreamData(&pl, true, false, false);
+    return Hdict;
+}
+
+void
+QPDF::readHPageOffset(BitStream h)
+{
+    // All comments referring to the PDF spec refer to the spec for
+    // version 1.4.
+
+    HPageOffset& t = this->page_offset_hints;
+
+    t.min_nobjects = h.getBits(32);		  	    // 1
+    t.first_page_offset = h.getBits(32);		    // 2
+    t.nbits_delta_nobjects = h.getBits(16);		    // 3
+    t.min_page_length = h.getBits(32);			    // 4
+    t.nbits_delta_page_length = h.getBits(16);		    // 5
+    t.min_content_offset = h.getBits(32);		    // 6
+    t.nbits_delta_content_offset = h.getBits(16);	    // 7
+    t.min_content_length = h.getBits(32);		    // 8
+    t.nbits_delta_content_length = h.getBits(16);	    // 9
+    t.nbits_nshared_objects = h.getBits(16);		    // 10
+    t.nbits_shared_identifier = h.getBits(16);		    // 11
+    t.nbits_shared_numerator = h.getBits(16);		    // 12
+    t.shared_denominator = h.getBits(16);		    // 13
+
+    unsigned int nitems = this->linp.npages;
+    std::vector<HPageOffsetEntry>& entries = t.entries;
+    entries = std::vector<HPageOffsetEntry>(nitems);
+
+    load_vector_int(h, nitems, entries,
+		    t.nbits_delta_nobjects,
+		    &HPageOffsetEntry::delta_nobjects);
+    load_vector_int(h, nitems, entries,
+		    t.nbits_delta_page_length,
+		    &HPageOffsetEntry::delta_page_length);
+    load_vector_int(h, nitems, entries,
+		    t.nbits_nshared_objects,
+		    &HPageOffsetEntry::nshared_objects);
+    load_vector_vector(h, nitems, entries,
+		       &HPageOffsetEntry::nshared_objects,
+		       t.nbits_shared_identifier,
+		       &HPageOffsetEntry::shared_identifiers);
+    load_vector_vector(h, nitems, entries,
+		       &HPageOffsetEntry::nshared_objects,
+		       t.nbits_shared_numerator,
+		       &HPageOffsetEntry::shared_numerators);
+    load_vector_int(h, nitems, entries,
+		    t.nbits_delta_content_offset,
+		    &HPageOffsetEntry::delta_content_offset);
+    load_vector_int(h, nitems, entries,
+		    t.nbits_delta_content_length,
+		    &HPageOffsetEntry::delta_content_length);
+}
+
+void
+QPDF::readHSharedObject(BitStream h)
+{
+    HSharedObject& t = this->shared_object_hints;
+
+    t.first_shared_obj = h.getBits(32);			    // 1
+    t.first_shared_offset = h.getBits(32);		    // 2
+    t.nshared_first_page = h.getBits(32);		    // 3
+    t.nshared_total = h.getBits(32);			    // 4
+    t.nbits_nobjects = h.getBits(16);			    // 5
+    t.min_group_length = h.getBits(32);			    // 6
+    t.nbits_delta_group_length = h.getBits(16);		    // 7
+
+    QTC::TC("qpdf", "QPDF lin nshared_total > nshared_first_page",
+	    (t.nshared_total > t.nshared_first_page) ? 1 : 0);
+
+    int nitems = t.nshared_total;
+    std::vector<HSharedObjectEntry>& entries = t.entries;
+    entries = std::vector<HSharedObjectEntry>(nitems);
+
+    load_vector_int(h, nitems, entries,
+		    t.nbits_delta_group_length,
+		    &HSharedObjectEntry::delta_group_length);
+    load_vector_int(h, nitems, entries,
+		    1, &HSharedObjectEntry::signature_present);
+    for (int i = 0; i < nitems; ++i)
+    {
+	if (entries[i].signature_present)
+	{
+	    // Skip 128-bit MD5 hash.  These are not supported by
+	    // acrobat, so they should probably never be there.  We
+	    // have no test case for this.
+	    for (int j = 0; j < 4; ++j)
+	    {
+		(void) h.getBits(32);
+	    }
+	}
+    }
+    load_vector_int(h, nitems, entries,
+		    t.nbits_nobjects,
+		    &HSharedObjectEntry::nobjects_minus_one);
+}
+
+void
+QPDF::readHGeneric(BitStream h, HGeneric& t)
+{
+    t.first_object = h.getBits(32);			    // 1
+    t.first_object_offset = h.getBits(32);		    // 2
+    t.nobjects = h.getBits(32);				    // 3
+    t.group_length = h.getBits(32);			    // 4
+}
+
+bool
+QPDF::checkLinearizationInternal()
+{
+    // All comments referring to the PDF spec refer to the spec for
+    // version 1.4.
+
+    std::list<std::string> errors;
+    std::list<std::string> warnings;
+
+    // Check all values in linearization parameter dictionary
+
+    LinParameters& p = this->linp;
+
+    // L: file size in bytes -- checked by isLinearized
+
+    // O: object number of first page
+    std::vector<QPDFObjectHandle> const& pages = getAllPages();
+    if (p.first_page_object != pages[0].getObjectID())
+    {
+	QTC::TC("qpdf", "QPDF err /O mismatch");
+	errors.push_back("first page object (/O) mismatch");
+    }
+
+    // N: number of pages
+    int npages = pages.size();
+    if (p.npages != npages)
+    {
+	// Not tested in the test suite
+	errors.push_back("page count (/N) mismatch");
+    }
+
+    for (int i = 0; i < npages; ++i)
+    {
+	QPDFObjectHandle const& page = pages[i];
+	ObjGen og(page.getObjectID(), page.getGeneration());
+	if (this->xref_table[og].getType() == 2)
+	{
+	    errors.push_back("page dictionary for page " +
+			     QUtil::int_to_string(i) + " is compressed");
+	}
+    }
+
+    // T: offset of whitespace character preceding xref entry for object 0
+    this->file.seek(p.xref_zero_offset, SEEK_SET);
+    while (1)
+    {
+	char ch;
+	this->file.read(&ch, 1);
+	if (! ((ch == ' ') || (ch == '\r') || (ch == '\n')))
+	{
+	    this->file.seek(-1, SEEK_CUR);
+	    break;
+	}
+    }
+    if (this->file.tell() != this->first_xref_item_offset)
+    {
+	QTC::TC("qpdf", "QPDF err /T mismatch");
+	errors.push_back("space before first xref item (/T) mismatch "
+			 "(computed = " +
+			 QUtil::int_to_string(this->first_xref_item_offset) +
+			 "; file = " + QUtil::int_to_string(this->file.tell()));
+    }
+
+    // P: first page number -- Implementation note 124 says Acrobat
+    // ignores this value, so we will too.
+
+    // Check numbering of compressed objects in each xref section.
+    // For linearized files, all compressed objects are supposed to be
+    // at the end of the containing xref section if any object streams
+    // are in use.
+
+    if (this->uncompressed_after_compressed)
+    {
+	errors.push_back("linearized file contains an uncompressed object"
+			 " after a compressed one in a cross-reference stream");
+    }
+
+    // Further checking requires optimization and order calculation.
+    // Don't allow optimization to make changes.  If it has to, then
+    // the file is not properly linearized.  We use the xref table to
+    // figure out which objects are compressed and which are
+    // uncompressed.
+    { // local scope
+	std::map<int, int> object_stream_data;
+	for (std::map<ObjGen, QPDFXRefEntry>::const_iterator iter =
+		 this->xref_table.begin();
+	     iter != this->xref_table.end(); ++iter)
+	{
+	    ObjGen const& og = (*iter).first;
+	    QPDFXRefEntry const& entry = (*iter).second;
+	    if (entry.getType() == 2)
+	    {
+		object_stream_data[og.obj] = entry.getObjStreamNumber();
+	    }
+	}
+	optimize(object_stream_data, false);
+	calculateLinearizationData(object_stream_data);
+    }
+
+    // E: offset of end of first page -- Implementation note 123 says
+    // Acrobat includes on extra object here by mistake.  pdlin fails
+    // to place thumbnail images in section 9, so when thumbnails are
+    // present, it also gets the wrong value for /E.  It also doesn't
+    // count outlines here when it should even though it places them
+    // in part 6.  This code fails to put thread information
+    // dictionaries in part 9, so it actually gets the wrong value for
+    // E when threads are present.  In that case, it would probably
+    // agree with pdlin.  As of this writing, the test suite doesn't
+    // contain any files with threads.
+
+    assert(! this->part6.empty());
+    int min_E = -1;
+    int max_E = -1;
+    for (std::vector<QPDFObjectHandle>::iterator iter = this->part6.begin();
+	 iter != this->part6.end(); ++iter)
+    {
+	ObjGen og((*iter).getObjectID(), (*iter).getGeneration());
+	// All objects have to have been dereferenced to be classified.
+	assert(this->obj_cache.count(og) > 0);
+	ObjCache const& oc = this->obj_cache[og];
+	min_E = std::max(min_E, (int)oc.end_before_space);
+	max_E = std::max(max_E, (int)oc.end_after_space);
+    }
+    if ((p.first_page_end < min_E) || (p.first_page_end > max_E))
+    {
+	QTC::TC("qpdf", "QPDF warn /E mismatch");
+	warnings.push_back("end of first page section (/E) mismatch: /E = " +
+			   QUtil::int_to_string(p.first_page_end) +
+			   "; computed = " +
+			   QUtil::int_to_string(min_E) + ".." +
+			   QUtil::int_to_string(max_E));
+    }
+
+    // Check hint tables
+
+    std::map<int, int> shared_idx_to_obj;
+    checkHSharedObject(errors, warnings, pages, shared_idx_to_obj);
+    checkHPageOffset(errors, warnings, pages, shared_idx_to_obj);
+    checkHOutlines(warnings);
+
+    // Report errors
+
+    bool result = true;
+
+    if (! errors.empty())
+    {
+	result = false;
+	for (std::list<std::string>::iterator iter = errors.begin();
+	     iter != errors.end(); ++iter)
+	{
+	    std::cout << "ERROR: " << (*iter) << std::endl;
+	}
+    }
+
+    if (! warnings.empty())
+    {
+	result = false;
+	for (std::list<std::string>::iterator iter = warnings.begin();
+	     iter != warnings.end(); ++iter)
+	{
+	    std::cout << "WARNING: " << (*iter) << std::endl;
+	}
+    }
+
+    return result;
+}
+
+int
+QPDF::maxEnd(ObjUser const& ou)
+{
+    assert(this->obj_user_to_objects.count(ou) > 0);
+    std::set<ObjGen> const& ogs = this->obj_user_to_objects[ou];
+    int end = 0;
+    for (std::set<ObjGen>::iterator iter = ogs.begin();
+	 iter != ogs.end(); ++iter)
+    {
+	ObjGen const& og = *iter;
+	assert(this->obj_cache.count(og) > 0);
+	end = std::max(
+	    end, (int)(this->obj_cache[og].end_after_space));
+    }
+    return end;
+}
+
+int
+QPDF::getLinearizationOffset(ObjGen const& og)
+{
+    QPDFXRefEntry entry = this->xref_table[og];
+    int result = 0;
+    switch (entry.getType())
+    {
+      case 1:
+	result = entry.getOffset();
+	break;
+
+      case 2:
+	// For compressed objects, return the offset of the object
+	// stream that contains them.
+	result = getLinearizationOffset(ObjGen(entry.getObjStreamNumber(), 0));
+	break;
+
+      default:
+	throw QPDFExc(
+	    this->file.getName(), 0,
+	    "getLinearizationOffset called for xref entry not of type 1 or 2");
+	break;
+    }
+    return result;
+}
+
+QPDFObjectHandle
+QPDF::getUncompressedObject(QPDFObjectHandle& obj,
+			    std::map<int, int> const& object_stream_data)
+{
+    if (obj.isNull() || (object_stream_data.count(obj.getObjectID()) == 0))
+    {
+	return obj;
+    }
+    else
+    {
+	int repl = (*(object_stream_data.find(obj.getObjectID()))).second;
+	return objGenToIndirect(ObjGen(repl, 0));
+    }
+}
+
+int
+QPDF::lengthNextN(int first_object, int n,
+		  std::list<std::string>& errors)
+{
+    int length = 0;
+    for (int i = 0; i < n; ++i)
+    {
+	ObjGen og(first_object + i, 0);
+	if (this->xref_table.count(og) == 0)
+	{
+	    errors.push_back(
+		"no xref table entry for " +
+		QUtil::int_to_string(first_object + i) + " 0");
+	}
+	else
+	{
+	    assert(this->obj_cache.count(og) > 0);
+	    length += this->obj_cache[og].end_after_space -
+		getLinearizationOffset(og);
+	}
+    }
+    return length;
+}
+
+void
+QPDF::checkHPageOffset(std::list<std::string>& errors,
+		       std::list<std::string>& warnings,
+		       std::vector<QPDFObjectHandle> const& pages,
+		       std::map<int, int>& shared_idx_to_obj)
+{
+    // Implementation note 126 says Acrobat always sets
+    // delta_content_offset and delta_content_length in the page
+    // offset header dictionary to 0.  It also states that
+    // min_content_offset in the per-page information is always 0,
+    // which is an incorrect value.
+
+    // Implementation note 127 explains that Acrobat always sets item
+    // 8 (min_content_length) to zero, item 9
+    // (nbits_delta_content_length) to the value of item 5
+    // (nbits_delta_page_length), and item 7 of each per-page hint
+    // table (delta_content_length) to item 2 (delta_page_length) of
+    // that entry.  Acrobat ignores these values when reading files.
+
+    // Empirically, it also seems that Acrobat sometimes puts items
+    // under a page's /Resources dictionary in with shared objects
+    // even when they are private.
+
+    unsigned int npages = pages.size();
+    int table_offset = adjusted_offset(
+	this->page_offset_hints.first_page_offset);
+    ObjGen first_page_og(pages[0].getObjectID(), pages[0].getGeneration());
+    assert(this->xref_table.count(first_page_og) > 0);
+    int offset = getLinearizationOffset(first_page_og);
+    if (table_offset != offset)
+    {
+	warnings.push_back("first page object offset mismatch");
+    }
+
+    for (unsigned int pageno = 0; pageno < npages; ++pageno)
+    {
+	ObjGen page_og(pages[pageno].getObjectID(),
+		       pages[pageno].getGeneration());
+	int first_object = page_og.obj;
+	assert(this->xref_table.count(page_og) > 0);
+	offset = getLinearizationOffset(page_og);
+
+	HPageOffsetEntry& he = this->page_offset_hints.entries[pageno];
+	CHPageOffsetEntry& ce = this->c_page_offset_data.entries[pageno];
+	int h_nobjects = he.delta_nobjects +
+	    this->page_offset_hints.min_nobjects;
+	if (h_nobjects != ce.nobjects)
+	{
+	    // This happens with pdlin when there are thumbnails.
+	    warnings.push_back(
+		"object count mismatch for page " +
+		QUtil::int_to_string(pageno) + ": hint table = " +
+		QUtil::int_to_string(h_nobjects) + "; computed = " +
+		QUtil::int_to_string(ce.nobjects));
+	}
+
+	// Use value for number of objects in hint table rather than
+	// computed value if there is a discrepancy.
+	int length = lengthNextN(first_object, h_nobjects, errors);
+	int h_length = he.delta_page_length +
+	    this->page_offset_hints.min_page_length;
+	if (length != h_length)
+	{
+	    // This condition almost certainly indicates a bad hint
+	    // table or a bug in this code.
+	    errors.push_back(
+		"page length mismatch for page " +
+		QUtil::int_to_string(pageno) + ": hint table = " +
+		QUtil::int_to_string(h_length) + "; computed length = " +
+		QUtil::int_to_string(length) + " (offset = " +
+		QUtil::int_to_string(offset) + ")");
+	}
+
+	offset += h_length;
+
+	// Translate shared object indexes to object numbers.
+	std::set<int> hint_shared;
+	std::set<int> computed_shared;
+
+	if ((pageno == 0) && (he.nshared_objects > 0))
+	{
+	    // pdlin and Acrobat both do this even though the spec
+	    // states clearly and unambiguously that they should not.
+	    warnings.push_back("page 0 has shared identifier entries");
+	}
+
+	for (int i = 0; i < he.nshared_objects; ++i)
+	{
+	    int idx = he.shared_identifiers[i];
+	    assert(shared_idx_to_obj.count(idx) > 0);
+	    hint_shared.insert(shared_idx_to_obj[idx]);
+	}
+
+	for (int i = 0; i < ce.nshared_objects; ++i)
+	{
+	    int idx = ce.shared_identifiers[i];
+	    assert(idx < this->c_shared_object_data.nshared_total);
+	    int obj = this->c_shared_object_data.entries[idx].object;
+	    computed_shared.insert(obj);
+	}
+
+	for (std::set<int>::iterator iter = hint_shared.begin();
+	     iter != hint_shared.end(); ++iter)
+	{
+	    if (! computed_shared.count(*iter))
+	    {
+		// pdlin puts thumbnails here even though it shouldn't
+		warnings.push_back(
+		    "page " + QUtil::int_to_string(pageno) +
+		    ": shared object " + QUtil::int_to_string(*iter) +
+		    ": in hint table but not computed list");
+	    }
+	}
+
+	for (std::set<int>::iterator iter = computed_shared.begin();
+	     iter != computed_shared.end(); ++iter)
+	{
+	    if (! hint_shared.count(*iter))
+	    {
+		// Acrobat does not put some things including at least
+		// built-in fonts and procsets here, at least in some
+		// cases.
+		warnings.push_back(
+		    "page " + QUtil::int_to_string(pageno) +
+		    ": shared object " + QUtil::int_to_string(*iter) +
+		    ": in computed list but not hint table");
+	    }
+	}
+    }
+}
+
+void
+QPDF::checkHSharedObject(std::list<std::string>& errors,
+			 std::list<std::string>& warnings,
+			 std::vector<QPDFObjectHandle> const& pages,
+			 std::map<int, int>& idx_to_obj)
+{
+    // Implementation note 125 says shared object groups always
+    // contain only one object.  Implementation note 128 says that
+    // Acrobat always nbits_nobjects to zero.  Implementation note 130
+    // says that Acrobat does not support more than one shared object
+    // per group.  These are all consistent.
+
+    // Implementation note 129 states that MD5 signatures are not
+    // implemented in Acrobat, so signature_present must always be
+    // zero.
+
+    // Implementation note 131 states that first_shared_obj and
+    // first_shared_offset have meaningless values for single-page
+    // files.
+
+    // Empirically, Acrobat and pdlin generate incorrect values for
+    // these whenever there are no shared objects not referenced by
+    // the first page (i.e., nshared_total == nshared_first_page).
+
+    HSharedObject& so = this->shared_object_hints;
+    if (so.nshared_total < so.nshared_first_page)
+    {
+	errors.push_back("shared object hint table: ntotal < nfirst_page");
+    }
+    else
+    {
+	// The first nshared_first_page objects are consecutive
+	// objects starting with the first page object.  The rest are
+	// consecutive starting from the first_shared_obj object.
+	int cur_object = pages[0].getObjectID();
+	for (int i = 0; i < so.nshared_total; ++i)
+	{
+	    if (i == so.nshared_first_page)
+	    {
+		QTC::TC("qpdf", "QPDF lin check shared past first page");
+		if (this->part8.empty())
+		{
+		    errors.push_back(
+			"part 8 is empty but nshared_total > "
+			"nshared_first_page");
+		}
+		else
+		{
+		    int obj = this->part8[0].getObjectID();
+		    if (obj != so.first_shared_obj)
+		    {
+			errors.push_back(
+			    "first shared object number mismatch: "
+			    "hint table = " +
+			    QUtil::int_to_string(so.first_shared_obj) +
+			    "; computed = " +
+			    QUtil::int_to_string(obj));
+		    }
+		}
+
+		cur_object = so.first_shared_obj;
+
+		ObjGen og(cur_object, 0);
+		assert(this->xref_table.count(og) > 0);
+		int offset = getLinearizationOffset(og);
+		int h_offset = adjusted_offset(so.first_shared_offset);
+		if (offset != h_offset)
+		{
+		    errors.push_back(
+			"first shared object offset mismatch: hint table = " +
+			QUtil::int_to_string(h_offset) + "; computed = " +
+			QUtil::int_to_string(offset));
+		}
+	    }
+
+	    idx_to_obj[i] = cur_object;
+	    HSharedObjectEntry& se = so.entries[i];
+	    int nobjects = se.nobjects_minus_one + 1;
+	    int length = lengthNextN(cur_object, nobjects, errors);
+	    int h_length = so.min_group_length + se.delta_group_length;
+	    if (length != h_length)
+	    {
+		errors.push_back(
+		    "shared object " + QUtil::int_to_string(i) +
+		    " length mismatch: hint table = " +
+		    QUtil::int_to_string(h_length) + "; computed = " +
+		    QUtil::int_to_string(length));
+	    }
+	    cur_object += nobjects;
+	}
+    }
+}
+
+void
+QPDF::checkHOutlines(std::list<std::string>& warnings)
+{
+    // Empirically, Acrobat generates the correct value for the object
+    // number but incorrectly stores the next object number's offset
+    // as the offset, at least when outlines appear in part 6.  It
+    // also generates an incorrect value for length (specifically, the
+    // length that would cover the correct number of objects from the
+    // wrong starting place).  pdlin appears to generate correct
+    // values in those cases.
+
+    if (this->c_outline_data.nobjects == this->outline_hints.nobjects)
+    {
+	if (this->c_outline_data.nobjects == 0)
+	{
+	    return;
+	}
+
+	if (this->c_outline_data.first_object ==
+	    this->outline_hints.first_object)
+	{
+	    // Check length and offset.  Acrobat gets these wrong.
+	    QPDFObjectHandle outlines = getRoot().getKey("/Outlines");
+	    ObjGen og(outlines.getObjectID(), outlines.getGeneration());
+	    assert(this->xref_table.count(og) > 0);
+	    int offset = getLinearizationOffset(og);
+	    ObjUser ou(ObjUser::ou_root_key, "/Outlines");
+	    int length = maxEnd(ou) - offset;
+	    int table_offset =
+		adjusted_offset(this->outline_hints.first_object_offset);
+	    if (offset != table_offset)
+	    {
+		warnings.push_back(
+		    "incorrect offset in outlines table: hint table = " +
+		    QUtil::int_to_string(table_offset) +
+		    "; computed = " + QUtil::int_to_string(offset));
+	    }
+	    int table_length = this->outline_hints.group_length;
+	    if (length != table_length)
+	    {
+		warnings.push_back(
+		    "incorrect length in outlines table: hint table = " +
+		    QUtil::int_to_string(table_length) +
+		    "; computed = " + QUtil::int_to_string(length));
+	    }
+	}
+	else
+	{
+	    warnings.push_back("incorrect first object number in outline "
+			       "hints table.");
+	}
+    }
+    else
+    {
+	warnings.push_back("incorrect object count in outline hint table");
+    }
+}
+
+void
+QPDF::showLinearizationData()
+{
+    try
+    {
+	readLinearizationData();
+	checkLinearizationInternal();
+	dumpLinearizationDataInternal();
+    }
+    catch (QPDFExc& e)
+    {
+	std::cout << e.what() << std::endl;
+    }
+}
+
+void
+QPDF::dumpLinearizationDataInternal()
+{
+    std::cout << this->file.getName() << ": linearization data:" << std::endl
+	      << std::endl;
+
+    std::cout
+	<< "file_size: " << this->linp.file_size << std::endl
+	<< "first_page_object: " << this->linp.first_page_object << std::endl
+	<< "first_page_end: " << this->linp.first_page_end << std::endl
+	<< "npages: " << this->linp.npages << std::endl
+	<< "xref_zero_offset: " << this->linp.xref_zero_offset << std::endl
+	<< "first_page: " << this->linp.first_page << std::endl
+	<< "H_offset: " << this->linp.H_offset << std::endl
+	<< "H_length: " << this->linp.H_length << std::endl
+	<< std::endl;
+
+    std::cout << "Page Offsets Hint Table" << std::endl
+	      << std::endl;
+    dumpHPageOffset();
+    std::cout << std::endl
+	      << "Shared Objects Hint Table" << std::endl
+	      << std::endl;
+    dumpHSharedObject();
+
+    if (this->outline_hints.nobjects > 0)
+    {
+	std::cout << std::endl
+		  << "Outlines Hint Table" << std::endl
+		  << std::endl;
+	dumpHGeneric(this->outline_hints);
+    }
+}
+
+int
+QPDF::adjusted_offset(int offset)
+{
+    // All offsets >= H_offset have to be increased by H_length
+    // since all hint table location values disregard the hint table
+    // itself.
+    if (offset >= this->linp.H_offset)
+    {
+	return offset + this->linp.H_length;
+    }
+    return offset;
+}
+
+
+void
+QPDF::dumpHPageOffset()
+{
+    HPageOffset& t = this->page_offset_hints;
+    std::cout
+	<< "min_nobjects: " << t.min_nobjects
+	<< std::endl
+	<< "first_page_offset: " << adjusted_offset(t.first_page_offset)
+	<< std::endl
+	<< "nbits_delta_nobjects: " << t.nbits_delta_nobjects
+	<< std::endl
+	<< "min_page_length: " << t.min_page_length
+	<< std::endl
+	<< "nbits_delta_page_length: " << t.nbits_delta_page_length
+	<< std::endl
+	<< "min_content_offset: " << t.min_content_offset
+	<< std::endl
+	<< "nbits_delta_content_offset: " << t.nbits_delta_content_offset
+	<< std::endl
+	<< "min_content_length: " << t.min_content_length
+	<< std::endl
+	<< "nbits_delta_content_length: " << t.nbits_delta_content_length
+	<< std::endl
+	<< "nbits_nshared_objects: " << t.nbits_nshared_objects
+	<< std::endl
+	<< "nbits_shared_identifier: " << t.nbits_shared_identifier
+	<< std::endl
+	<< "nbits_shared_numerator: " << t.nbits_shared_numerator
+	<< std::endl
+	<< "shared_denominator: " << t.shared_denominator
+	<< std::endl;
+
+    for (int i1 = 0; i1 < this->linp.npages; ++i1)
+    {
+	HPageOffsetEntry& pe = t.entries[i1];
+	std::cout
+	    << "Page " << i1 << ":" << std::endl
+	    << "  nobjects: " << pe.delta_nobjects + t.min_nobjects
+	    << std::endl
+	    << "  length: " << pe.delta_page_length + t.min_page_length
+	    << std::endl
+	    // content offset is relative to page, not file
+	    << "  content_offset: "
+	    << pe.delta_content_offset + t.min_content_offset << std::endl
+	    << "  content_length: "
+	    << pe.delta_content_length + t.min_content_length << std::endl
+	    << "  nshared_objects: " << pe.nshared_objects << std::endl;
+	for (int i2 = 0; i2 < pe.nshared_objects; ++i2)
+	{
+	    std::cout << "    identifier " << i2 << ": "
+		      << pe.shared_identifiers[i2] << std::endl;
+	    std::cout << "    numerator " << i2 << ": "
+		      << pe.shared_numerators[i2] << std::endl;
+	}
+    }
+}
+
+void
+QPDF::dumpHSharedObject()
+{
+    HSharedObject& t = this->shared_object_hints;
+    std::cout
+	<< "first_shared_obj: " << t.first_shared_obj
+	<< std::endl
+	<< "first_shared_offset: " << adjusted_offset(t.first_shared_offset)
+	<< std::endl
+	<< "nshared_first_page: " << t.nshared_first_page
+	<< std::endl
+	<< "nshared_total: " << t.nshared_total
+	<< std::endl
+	<< "nbits_nobjects: " << t.nbits_nobjects
+	<< std::endl
+	<< "min_group_length: " << t.min_group_length
+	<< std::endl
+	<< "nbits_delta_group_length: " << t.nbits_delta_group_length
+	<< std::endl;
+
+    for (int i = 0; i < t.nshared_total; ++i)
+    {
+	HSharedObjectEntry& se = t.entries[i];
+	std::cout << "Shared Object " << i << ":" << std::endl;
+	std::cout << "  group length: "
+		  << se.delta_group_length + t.min_group_length << std::endl;
+	// PDF spec says signature present nobjects_minus_one are
+	// always 0, so print them only if they have a non-zero value.
+	if (se.signature_present)
+	{
+	    std::cout << "  signature present" << std::endl;
+	}
+	if (se.nobjects_minus_one != 0)
+	{
+	    std::cout << "  nobjects: "
+		      << se.nobjects_minus_one + 1 << std::endl;
+	}
+    }
+}
+
+void
+QPDF::dumpHGeneric(HGeneric& t)
+{
+    std::cout
+	<< "first_object: " << t.first_object
+	<< std::endl
+	<< "first_object_offset: " << adjusted_offset(t.first_object_offset)
+	<< std::endl
+	<< "nobjects: " << t.nobjects
+	<< std::endl
+	<< "group_length: " << t.group_length
+	<< std::endl;
+}
+
+QPDFObjectHandle
+QPDF::objGenToIndirect(ObjGen const& og)
+{
+    return getObjectByID(og.obj, og.gen);
+}
+
+void
+QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
+{
+    // This function calculates the ordering of objects, divides them
+    // into the appropriate parts, and computes some values for the
+    // linearization parameter dictionary and hint tables.  The file
+    // must be optimized (via calling optimize()) prior to calling
+    // this function.  Note that actual offsets and lengths are not
+    // computed here, but anything related to object ordering is.
+
+    if (this->object_to_obj_users.empty())
+    {
+	// Note that we can't call optimize here because we don't know
+	// whether it should be called with or without allow changes.
+	throw QEXC::Internal("QPDF::calculateLinearizationData "
+			     "called before optimize()");
+    }
+
+    // Separate objects into the categories sufficient for us to
+    // determine which part of the linearized file should contain the
+    // object.  This categorization is useful for other purposes as
+    // well.  Part numbers refer to version 1.4 of the PDF spec.
+
+    // Parts 1, 3, 5, 10, and 11 don't contain any objects from the
+    // original file (except the trailer dictionary in part 11).
+
+    // Part 4 is the document catalog (root) and the following root
+    // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction,
+    // /AcroForm, /Encrypt.  Note that Thread information dictionaries
+    // are supposed to appear in part 9, but we are disregarding that
+    // recommendation for now.
+
+    // Part 6 is the first page section.  It includes all remaining
+    // objects referenced by the first page including shared objects
+    // but not including thumbnails.  Additionally, if /PageMode is
+    // /Outlines, then information from /Outlines also appears here.
+
+    // Part 7 contains remaining objects private to pages other than
+    // the first page.
+
+    // Part 8 contains all remaining shared objects except those that
+    // are shared only within thumbnails.
+
+    // Part 9 contains all remaining objects.
+
+    // We sort objects into the following categories:
+
+    //   * open_document: part 4
+
+    //   * first_page_private: part 6
+
+    //   * first_page_shared: part 6
+
+    //   * other_page_private: part 7
+
+    //   * other_page_shared: part 8
+
+    //   * thumbnail_private: part 9
+
+    //   * thumbnail_shared: part 9
+
+    //   * other: part 9
+
+    //   * outlines: part 6 or 9
+
+    QPDFObjectHandle root = getRoot();
+    bool outlines_in_first_page = false;
+    QPDFObjectHandle pagemode = root.getKey("/PageMode");
+    QTC::TC("qpdf", "QPDF categorize pagemode present",
+	    pagemode.isName() ? 1 : 0);
+    if (pagemode.isName())
+    {
+	if (pagemode.getName() == "/UseOutlines")
+	{
+	    if (root.hasKey("/Outlines"))
+	    {
+		outlines_in_first_page = true;
+	    }
+	    else
+	    {
+		QTC::TC("qpdf", "QPDF UseOutlines but no Outlines");
+	    }
+	}
+	QTC::TC("qpdf", "QPDF categorize pagemode outlines",
+		outlines_in_first_page ? 1 : 0);
+    }
+
+    std::set<std::string> open_document_keys;
+    open_document_keys.insert("/ViewerPreferences");
+    open_document_keys.insert("/PageMode");
+    open_document_keys.insert("/Threads");
+    open_document_keys.insert("/OpenAction");
+    open_document_keys.insert("/AcroForm");
+
+    std::set<ObjGen> lc_open_document;
+    std::set<ObjGen> lc_first_page_private;
+    std::set<ObjGen> lc_first_page_shared;
+    std::set<ObjGen> lc_other_page_private;
+    std::set<ObjGen> lc_other_page_shared;
+    std::set<ObjGen> lc_thumbnail_private;
+    std::set<ObjGen> lc_thumbnail_shared;
+    std::set<ObjGen> lc_other;
+    std::set<ObjGen> lc_outlines;
+    std::set<ObjGen> lc_root;
+
+    for (std::map<ObjGen, std::set<ObjUser> >::iterator oiter =
+	     this->object_to_obj_users.begin();
+	 oiter != this->object_to_obj_users.end(); ++oiter)
+    {
+	ObjGen const& og = (*oiter).first;
+
+	std::set<ObjUser>& ous = (*oiter).second;
+
+	bool in_open_document = false;
+	bool in_first_page = false;
+	int other_pages = 0;
+	int thumbs = 0;
+	int others = 0;
+	bool in_outlines = false;
+	bool is_root = false;
+
+	for (std::set<ObjUser>::iterator uiter = ous.begin();
+	     uiter != ous.end(); ++uiter)
+	{
+	    ObjUser const& ou = *uiter;
+	    switch (ou.ou_type)
+	    {
+	      case ObjUser::ou_trailer_key:
+		if (ou.key == "/Encrypt")
+		{
+		    in_open_document = true;
+		}
+		else
+		{
+		    ++others;
+		}
+		break;
+
+	      case ObjUser::ou_thumb:
+		++thumbs;
+		break;
+
+	      case ObjUser::ou_root_key:
+		if (open_document_keys.count(ou.key) > 0)
+		{
+		    in_open_document = true;
+		}
+		else if (ou.key == "/Outlines")
+		{
+		    in_outlines = true;
+		}
+		else
+		{
+		    ++others;
+		}
+		break;
+
+	      case ObjUser::ou_page:
+		if (ou.pageno == 0)
+		{
+		    in_first_page = true;
+		}
+		else
+		{
+		    ++other_pages;
+		}
+		break;
+
+	      case ObjUser::ou_root:
+		is_root = true;
+		break;
+
+	      case ObjUser::ou_bad:
+		throw QEXC::Internal("QPDF::calculateLinearizationData: "
+				     "invalid user type");
+		break;
+	    }
+	}
+
+	if (is_root)
+	{
+	    lc_root.insert(og);
+	}
+	else if (in_outlines)
+	{
+	    lc_outlines.insert(og);
+	}
+	else if (in_open_document)
+	{
+	    lc_open_document.insert(og);
+	}
+	else if ((in_first_page) &&
+		 (others == 0) && (other_pages == 0) && (thumbs == 0))
+	{
+	    lc_first_page_private.insert(og);
+	}
+	else if (in_first_page)
+	{
+	    lc_first_page_shared.insert(og);
+	}
+	else if ((other_pages == 1) && (others == 0) && (thumbs == 0))
+	{
+	    lc_other_page_private.insert(og);
+	}
+	else if (other_pages > 1)
+	{
+	    lc_other_page_shared.insert(og);
+	}
+	else if ((thumbs == 1) && (others == 0))
+	{
+	    lc_thumbnail_private.insert(og);
+	}
+	else if (thumbs > 1)
+	{
+	    lc_thumbnail_shared.insert(og);
+	}
+	else
+	{
+	    lc_other.insert(og);
+	}
+    }
+
+    // Generate ordering for objects in the output file.  Sometimes we
+    // just dump right from a set into a vector.  Rather than
+    // optimizing this by going straight into the vector, we'll leave
+    // these phases separate for now.  That way, this section can be
+    // concerned only with ordering, and the above section can be
+    // considered only with categorization.  Note that sets of ObjGens
+    // are sorted by ObjGen.  In a linearized file, objects appear in
+    // sequence with the possible exception of hints tables which we
+    // won't see here anyway.  That means that running
+    // calculateLinearizationData() on a linearized file should give
+    // results identical to the original file ordering.
+
+    // We seem to traverse the page tree a lot in this code, but we
+    // can address this for a future code optimization if necessary.
+    // Premature optimization is the root of all evil.
+    std::vector<QPDFObjectHandle> pages;
+    { // local scope
+	// Map all page objects to the containing object stream.  This
+	// should be a no-op in a properly linearized file.
+	std::vector<QPDFObjectHandle> t = getAllPages();
+	for (std::vector<QPDFObjectHandle>::iterator iter = t.begin();
+	     iter != t.end(); ++iter)
+	{
+	    pages.push_back(getUncompressedObject(*iter, object_stream_data));
+	}
+    }
+    unsigned int npages = pages.size();
+
+    // We will be initializing some values of the computed hint
+    // tables.  Specifically, we can initialize any items that deal
+    // with object numbers or counts but not any items that deal with
+    // lengths or offsets.  The code that writes linearized files will
+    // have to fill in these values during the first pass.  The
+    // validation code can compute them relatively easily given the
+    // rest of the information.
+
+    this->c_linp.npages = npages;
+    this->c_page_offset_data.entries = 	std::vector<CHPageOffsetEntry>(npages);
+
+    // Part 4: open document objects.  We don't care about the order.
+
+    assert(lc_root.size() == 1);
+    this->part4.push_back(objGenToIndirect(*(lc_root.begin())));
+    for (std::set<ObjGen>::iterator iter = lc_open_document.begin();
+	 iter != lc_open_document.end(); ++iter)
+    {
+	this->part4.push_back(objGenToIndirect(*iter));
+    }
+
+    // Part 6: first page objects.  Note: implementation note 124
+    // states that Acrobat always treats page 0 as the first page for
+    // linearization regardless of /OpenAction.  pdlin doesn't provide
+    // any option to set this and also disregards /OpenAction.  We
+    // will do the same.
+
+    // First, place the actual first page object itself.
+    ObjGen first_page_og(pages[0].getObjectID(), pages[0].getGeneration());
+    if (! lc_first_page_private.count(first_page_og))
+    {
+	throw QEXC::Internal("QPDF::calculateLinearizationData: first page "
+			     "object not in lc_first_page_private");
+    }
+    lc_first_page_private.erase(first_page_og);
+    this->c_linp.first_page_object = pages[0].getObjectID();
+    this->part6.push_back(pages[0]);
+
+    // The PDF spec "recommends" an order for the rest of the objects,
+    // but we are going to disregard it except to the extent that it
+    // groups private and shared objects contiguously for the sake of
+    // hint tables.
+
+    for (std::set<ObjGen>::iterator iter = lc_first_page_private.begin();
+	 iter != lc_first_page_private.end(); ++iter)
+    {
+	this->part6.push_back(objGenToIndirect(*iter));
+    }
+
+    for (std::set<ObjGen>::iterator iter = lc_first_page_shared.begin();
+	 iter != lc_first_page_shared.end(); ++iter)
+    {
+	this->part6.push_back(objGenToIndirect(*iter));
+    }
+
+    // Place the outline dictionary if it goes in the first page section.
+    if (outlines_in_first_page)
+    {
+	pushOutlinesToPart(this->part6, lc_outlines, object_stream_data);
+    }
+
+    // Fill in page offset hint table information for the first page.
+    // The PDF spec says that nshared_objects should be zero for the
+    // first page.  pdlin does not appear to obey this, but it fills
+    // in garbage values for all the shared object identifiers on the
+    // first page.
+
+    this->c_page_offset_data.entries[0].nobjects = this->part6.size();
+
+    // Part 7: other pages' private objects
+
+    // For each page in order:
+    for (unsigned int i = 1; i < npages; ++i)
+    {
+	// Place this page's page object
+
+	ObjGen page_og(pages[i].getObjectID(), pages[i].getGeneration());
+	if (! lc_other_page_private.count(page_og))
+	{
+	    throw QEXC::Internal(
+		"QPDF::calculateLinearizationData: page object for page " +
+		QUtil::int_to_string(i) + " not in lc_other_page_private");
+	}
+	lc_other_page_private.erase(page_og);
+	this->part7.push_back(pages[i]);
+
+	// Place all non-shared objects referenced by this page,
+	// updating the page object count for the hint table.
+
+	this->c_page_offset_data.entries[i].nobjects = 1;
+
+	ObjUser ou(ObjUser::ou_page, i);
+	assert(this->obj_user_to_objects.count(ou) > 0);
+	std::set<ObjGen> ogs = this->obj_user_to_objects[ou];
+	for (std::set<ObjGen>::iterator iter = ogs.begin();
+	     iter != ogs.end(); ++iter)
+	{
+	    ObjGen const& og = (*iter);
+	    if (lc_other_page_private.count(og))
+	    {
+		lc_other_page_private.erase(og);
+		this->part7.push_back(objGenToIndirect(og));
+		++this->c_page_offset_data.entries[i].nobjects;
+	    }
+	}
+    }
+    // That should have covered all part7 objects.
+    if (! lc_other_page_private.empty())
+    {
+	throw QEXC::Internal(
+	    "QPDF::calculateLinearizationData: lc_other_page_private is "
+	    "not empty after generation of part7");
+    }
+
+    // Part 8: other pages' shared objects
+
+    // Order is unimportant.
+    for (std::set<ObjGen>::iterator iter = lc_other_page_shared.begin();
+	 iter != lc_other_page_shared.end(); ++iter)
+    {
+	this->part8.push_back(objGenToIndirect(*iter));
+    }
+
+    // Part 9: other objects
+
+    // The PDF specification makes recommendations on ordering here.
+    // We follow them only to a limited extent.  Specifically, we put
+    // the pages tree first, then private thumbnail objects in page
+    // order, then shared thumbnail objects, and then outlines (unless
+    // in part 6).  After that, we throw all remaining objects in
+    // arbitrary order.
+
+    // Place the pages tree.
+    std::set<ObjGen> pages_ogs =
+	this->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")];
+    assert(! pages_ogs.empty());
+    for (std::set<ObjGen>::iterator iter = pages_ogs.begin();
+	 iter != pages_ogs.end(); ++iter)
+    {
+	ObjGen const& og = *iter;
+	if (lc_other.count(og))
+	{
+	    lc_other.erase(og);
+	    this->part9.push_back(objGenToIndirect(og));
+	}
+    }
+
+    // Place private thumbnail images in page order.  Slightly more
+    // information would be required if we were going to bother with
+    // thumbnail hint tables.
+    for (unsigned int i = 0; i < npages; ++i)
+    {
+	QPDFObjectHandle thumb = pages[i].getKey("/Thumb");
+	thumb = getUncompressedObject(thumb, object_stream_data);
+	if (! thumb.isNull())
+	{
+	    // Output the thumbnail itself
+	    ObjGen thumb_og(thumb.getObjectID(), thumb.getGeneration());
+	    if (lc_thumbnail_private.count(thumb_og))
+	    {
+		lc_thumbnail_private.erase(thumb_og);
+		this->part9.push_back(thumb);
+	    }
+	    else
+	    {
+		// No internal error this time...there's nothing to
+		// stop this object from having been referred to
+		// somewhere else outside of a page's /Thumb, and if
+		// it had been, there's nothing to prevent it from
+		// having been in some set other than
+		// lc_thumbnail_private.
+	    }
+	    std::set<ObjGen>& ogs =
+		this->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, i)];
+	    for (std::set<ObjGen>::iterator iter = ogs.begin();
+		 iter != ogs.end(); ++iter)
+	    {
+		ObjGen const& og = *iter;
+		if (lc_thumbnail_private.count(og))
+		{
+		    lc_thumbnail_private.erase(og);
+		    this->part9.push_back(objGenToIndirect(og));
+		}
+	    }
+	}
+    }
+    if (! lc_thumbnail_private.empty())
+    {
+	throw QEXC::Internal(
+	    "QPDF::calculateLinearizationData: lc_thumbnail_private "
+	    "not empty after placing thumbnails");
+    }
+
+    // Place shared thumbnail objects
+    for (std::set<ObjGen>::iterator iter = lc_thumbnail_shared.begin();
+	 iter != lc_thumbnail_shared.end(); ++iter)
+    {
+	this->part9.push_back(objGenToIndirect(*iter));
+    }
+
+    // Place outlines unless in first page
+    if (! outlines_in_first_page)
+    {
+	pushOutlinesToPart(this->part9, lc_outlines, object_stream_data);
+    }
+
+    // Place all remaining objects
+    for (std::set<ObjGen>::iterator iter = lc_other.begin();
+	 iter != lc_other.end(); ++iter)
+    {
+	this->part9.push_back(objGenToIndirect(*iter));
+    }
+
+    // Make sure we got everything exactly once.
+
+    unsigned int num_placed = this->part4.size() + this->part6.size() +
+	this->part7.size() + this->part8.size() + this->part9.size();
+    unsigned int num_wanted = this->object_to_obj_users.size();
+    if (num_placed != num_wanted)
+    {
+	throw QEXC::Internal("QPDF::calculateLinearizationData: wrong "
+			     "number of objects placed (num_placed = " +
+			     QUtil::int_to_string(num_placed) +
+			     "; number of objects: " +
+			     QUtil::int_to_string(num_wanted));
+    }
+
+    // Calculate shared object hint table information including
+    // references to shared objects from page offset hint data.
+
+    // The shared object hint table consists of all part 6 (whether
+    // shared or not) in order followed by all part 8 objects in
+    // order.  Add the objects to shared object data keeping a map of
+    // object number to index.  Then populate the shared object
+    // information for the pages.
+
+    // Note that two objects never have the same object number, so we
+    // can map from object number only without regards to generation.
+    std::map<int, int> obj_to_index;
+
+    this->c_shared_object_data.nshared_first_page = this->part6.size();
+    this->c_shared_object_data.nshared_total =
+	this->c_shared_object_data.nshared_first_page +
+	this->part8.size();
+
+    std::vector<CHSharedObjectEntry>& shared =
+	this->c_shared_object_data.entries;
+    for (std::vector<QPDFObjectHandle>::iterator iter = this->part6.begin();
+	 iter != this->part6.end(); ++iter)
+    {
+	QPDFObjectHandle& oh = *iter;
+	int obj = oh.getObjectID();
+	obj_to_index[obj] = shared.size();
+	shared.push_back(CHSharedObjectEntry(obj));
+    }
+    QTC::TC("qpdf", "QPDF lin part 8 empty", this->part8.empty() ? 1 : 0);
+    if (! this->part8.empty())
+    {
+	this->c_shared_object_data.first_shared_obj =
+	    this->part8[0].getObjectID();
+	for (std::vector<QPDFObjectHandle>::iterator iter =
+		 this->part8.begin();
+	     iter != this->part8.end(); ++iter)
+	{
+	    QPDFObjectHandle& oh = *iter;
+	    int obj = oh.getObjectID();
+	    obj_to_index[obj] = shared.size();
+	    shared.push_back(CHSharedObjectEntry(obj));
+	}
+    }
+    assert(this->c_shared_object_data.nshared_total ==
+	   (int) this->c_shared_object_data.entries.size());
+
+    // Now compute the list of shared objects for each page after the
+    // first page.
+
+    for (unsigned int i = 1; i < npages; ++i)
+    {
+	CHPageOffsetEntry& pe = this->c_page_offset_data.entries[i];
+	ObjUser ou(ObjUser::ou_page, i);
+	assert(this->obj_user_to_objects.count(ou) > 0);
+	std::set<ObjGen> const& ogs = this->obj_user_to_objects[ou];
+	for (std::set<ObjGen>::const_iterator iter = ogs.begin();
+	     iter != ogs.end(); ++iter)
+	{
+	    ObjGen const& og = *iter;
+	    if ((this->object_to_obj_users[og].size() > 1) &&
+		(obj_to_index.count(og.obj) > 0))
+	    {
+		int idx = obj_to_index[og.obj];
+		++pe.nshared_objects;
+		pe.shared_identifiers.push_back(idx);
+	    }
+	}
+    }
+}
+
+void
+QPDF::pushOutlinesToPart(
+    std::vector<QPDFObjectHandle>& part,
+    std::set<ObjGen>& lc_outlines,
+    std::map<int, int> const& object_stream_data)
+{
+    QPDFObjectHandle root = getRoot();
+    QPDFObjectHandle outlines = root.getKey("/Outlines");
+    if (outlines.isNull())
+    {
+	return;
+    }
+    outlines = getUncompressedObject(outlines, object_stream_data);
+    ObjGen outlines_og(outlines.getObjectID(), outlines.getGeneration());
+    QTC::TC("qpdf", "QPDF lin outlines in part",
+	    ((&part == (&this->part6)) ? 0
+	     : (&part == (&this->part9)) ? 1
+	     : 9999));		// can't happen
+    this->c_outline_data.first_object = outlines_og.obj;
+    this->c_outline_data.nobjects = 1;
+    lc_outlines.erase(outlines_og);
+    part.push_back(outlines);
+    for (std::set<ObjGen>::iterator iter = lc_outlines.begin();
+	 iter != lc_outlines.end(); ++iter)
+    {
+	part.push_back(objGenToIndirect(*iter));
+	++this->c_outline_data.nobjects;
+    }
+}
+
+void
+QPDF::getLinearizedParts(
+    std::map<int, int> const& object_stream_data,
+    std::vector<QPDFObjectHandle>& part4,
+    std::vector<QPDFObjectHandle>& part6,
+    std::vector<QPDFObjectHandle>& part7,
+    std::vector<QPDFObjectHandle>& part8,
+    std::vector<QPDFObjectHandle>& part9)
+{
+    calculateLinearizationData(object_stream_data);
+    part4 = this->part4;
+    part6 = this->part6;
+    part7 = this->part7;
+    part8 = this->part8;
+    part9 = this->part9;
+}
+
+static inline int nbits(int val)
+{
+    return (val == 0 ? 0 : (1 + nbits(val >> 1)));
+}
+
+int
+QPDF::outputLengthNextN(
+    int in_object, int n,
+    std::map<int, size_t> const& lengths,
+    std::map<int, int> const& obj_renumber)
+{
+    // Figure out the length of a series of n consecutive objects in
+    // the output file starting with whatever object in_object from
+    // the input file mapped to.
+
+    assert(obj_renumber.count(in_object) > 0);
+    int first = (*(obj_renumber.find(in_object))).second;
+    int length = 0;
+    for (int i = 0; i < n; ++i)
+    {
+	assert(lengths.count(first + i) > 0);
+	length += (*(lengths.find(first + i))).second;
+    }
+    return length;
+}
+
+void
+QPDF::calculateHPageOffset(
+    std::map<int, QPDFXRefEntry> const& xref,
+    std::map<int, size_t> const& lengths,
+    std::map<int, int> const& obj_renumber)
+{
+    // Page Offset Hint Table
+
+    // We are purposely leaving some values set to their initial zero
+    // values.
+
+    std::vector<QPDFObjectHandle> const& pages = getAllPages();
+    unsigned int npages = pages.size();
+    CHPageOffset& cph = this->c_page_offset_data;
+    std::vector<CHPageOffsetEntry>& cphe = cph.entries;
+
+    // Calculate minimum and maximum values for number of objects per
+    // page and page length.
+
+    int min_nobjects = cphe[0].nobjects;
+    int max_nobjects = min_nobjects;
+    int min_length = outputLengthNextN(
+	pages[0].getObjectID(), min_nobjects, lengths, obj_renumber);
+    int max_length = min_length;
+    int max_shared = cphe[0].nshared_objects;
+
+    HPageOffset& ph = this->page_offset_hints;
+    std::vector<HPageOffsetEntry>& phe = ph.entries;
+    phe = std::vector<HPageOffsetEntry>(npages);
+
+    for (unsigned int i = 0; i < npages; ++i)
+    {
+	// Calculate values for each page, assigning full values to
+	// the delta items.  They will be adjusted later.
+
+	// Repeat calculations for page 0 so we can assign to phe[i]
+	// without duplicating those assignments.
+
+	int nobjects = cphe[i].nobjects;
+	int length = outputLengthNextN(
+	    pages[i].getObjectID(), nobjects, lengths, obj_renumber);
+	int nshared = cphe[i].nshared_objects;
+
+	min_nobjects = std::min(min_nobjects, nobjects);
+	max_nobjects = std::max(max_nobjects, nobjects);
+	min_length = std::min(min_length, length);
+	max_length = std::max(max_length, length);
+	max_shared = std::max(max_shared, nshared);
+
+	phe[i].delta_nobjects = nobjects;
+	phe[i].delta_page_length = length;
+	phe[i].nshared_objects = nshared;
+    }
+
+    ph.min_nobjects = min_nobjects;
+    int in_page0_id = pages[0].getObjectID();
+    int out_page0_id = (*(obj_renumber.find(in_page0_id))).second;
+    ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset();
+    ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects);
+    ph.min_page_length = min_length;
+    ph.nbits_delta_page_length = nbits(max_length - min_length);
+    ph.nbits_nshared_objects = nbits(max_shared);
+    ph.nbits_shared_identifier =
+	nbits(this->c_shared_object_data.nshared_total);
+    ph.shared_denominator = 4;	// doesn't matter
+
+    // It isn't clear how to compute content offset and content
+    // length.  Since we are not interleaving page objects with the
+    // content stream, we'll use the same values for content length as
+    // page length.  We will use 0 as content offset because this is
+    // what Adobe does (implementation note 127) and pdlin as well.
+    ph.nbits_delta_content_length = ph.nbits_delta_page_length;
+    ph.min_content_length = ph.min_page_length;
+
+    for (unsigned int i = 0; i < npages; ++i)
+    {
+	// Adjust delta entries
+	assert(phe[i].delta_nobjects >= min_nobjects);
+	assert(phe[i].delta_page_length >= min_length);
+	phe[i].delta_nobjects -= min_nobjects;
+	phe[i].delta_page_length -= min_length;
+	phe[i].delta_content_length = phe[i].delta_page_length;
+
+	for (int j = 0; j < cphe[i].nshared_objects; ++j)
+	{
+	    phe[i].shared_identifiers.push_back(
+		cphe[i].shared_identifiers[j]);
+	    phe[i].shared_numerators.push_back(0);
+	}
+    }
+}
+
+void
+QPDF::calculateHSharedObject(
+    std::map<int, QPDFXRefEntry> const& xref,
+    std::map<int, size_t> const& lengths,
+    std::map<int, int> const& obj_renumber)
+{
+    CHSharedObject& cso = this->c_shared_object_data;
+    std::vector<CHSharedObjectEntry>& csoe = cso.entries;
+    HSharedObject& so = this->shared_object_hints;
+    std::vector<HSharedObjectEntry>& soe = so.entries;
+    soe = std::vector<HSharedObjectEntry>(cso.nshared_total);
+
+    int min_length = outputLengthNextN(
+	csoe[0].object, 1, lengths, obj_renumber);
+    int max_length = min_length;
+
+    for (int i = 0; i < cso.nshared_total; ++i)
+    {
+	// Assign absolute numbers to deltas; adjust later
+	int length = outputLengthNextN(
+	    csoe[i].object, 1, lengths, obj_renumber);
+	min_length = std::min(min_length, length);
+	max_length = std::max(max_length, length);
+	soe[i].delta_group_length = length;
+    }
+
+    so.nshared_total = cso.nshared_total;
+    so.nshared_first_page = cso.nshared_first_page;
+    if (so.nshared_total > so.nshared_first_page)
+    {
+	so.first_shared_obj =
+	    (*(obj_renumber.find(cso.first_shared_obj))).second;
+	so.first_shared_offset =
+	    (*(xref.find(so.first_shared_obj))).second.getOffset();
+    }
+    so.min_group_length = min_length;
+    so.nbits_delta_group_length = nbits(max_length - min_length);
+
+    for (int i = 0; i < cso.nshared_total; ++i)
+    {
+	// Adjust deltas
+	assert(soe[i].delta_group_length >= min_length);
+	soe[i].delta_group_length -= min_length;
+    }
+}
+
+void
+QPDF::calculateHOutline(
+    std::map<int, QPDFXRefEntry> const& xref,
+    std::map<int, size_t> const& lengths,
+    std::map<int, int> const& obj_renumber)
+{
+    HGeneric& cho = this->c_outline_data;
+
+    if (cho.nobjects == 0)
+    {
+	return;
+    }
+
+    HGeneric& ho = this->outline_hints;
+
+    ho.first_object =
+	(*(obj_renumber.find(cho.first_object))).second;
+    ho.first_object_offset =
+	(*(xref.find(ho.first_object))).second.getOffset();
+    ho.nobjects = cho.nobjects;
+    ho.group_length = outputLengthNextN(
+	cho.first_object, ho.nobjects, lengths, obj_renumber);
+}
+
+template <class T>
+static void
+write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec,
+		 int bits, int T::*field)
+{
+    // nitems times, write bits bits from the given field of the ith
+    // vector to the given bit writer.
+
+    for (int i = 0; i < nitems; ++i)
+    {
+	w.writeBits(vec[i].*field, bits);
+    }
+    // The PDF spec says that each hint table starts at a byte
+    // boundary.  Each "row" actually must start on a byte boundary.
+    w.flush();
+}
+
+template <class T>
+static void
+write_vector_vector(BitWriter& w,
+		    int nitems1, std::vector<T>& vec1, int T::*nitems2,
+		    int bits, std::vector<int> T::*vec2)
+{
+    // nitems1 times, write nitems2 (from the ith element of vec1) items
+    // from the vec2 vector field of the ith item of vec1.
+    for (int i1 = 0; i1 < nitems1; ++i1)
+    {
+	for (int i2 = 0; i2 < vec1[i1].*nitems2; ++i2)
+	{
+	    w.writeBits((vec1[i1].*vec2)[i2], bits);
+	}
+    }
+    w.flush();
+}
+
+
+void
+QPDF::writeHPageOffset(BitWriter& w)
+{
+    HPageOffset& t = this->page_offset_hints;
+
+    w.writeBits(t.min_nobjects, 32);			    // 1
+    w.writeBits(t.first_page_offset, 32);		    // 2
+    w.writeBits(t.nbits_delta_nobjects, 16);		    // 3
+    w.writeBits(t.min_page_length, 32);			    // 4
+    w.writeBits(t.nbits_delta_page_length, 16);		    // 5
+    w.writeBits(t.min_content_offset, 32);		    // 6
+    w.writeBits(t.nbits_delta_content_offset, 16);	    // 7
+    w.writeBits(t.min_content_length, 32);		    // 8
+    w.writeBits(t.nbits_delta_content_length, 16);	    // 9
+    w.writeBits(t.nbits_nshared_objects, 16);		    // 10
+    w.writeBits(t.nbits_shared_identifier, 16);		    // 11
+    w.writeBits(t.nbits_shared_numerator, 16);		    // 12
+    w.writeBits(t.shared_denominator, 16);		    // 13
+
+    unsigned int nitems = getAllPages().size();
+    std::vector<HPageOffsetEntry>& entries = t.entries;
+
+    write_vector_int(w, nitems, entries,
+		     t.nbits_delta_nobjects,
+		     &HPageOffsetEntry::delta_nobjects);
+    write_vector_int(w, nitems, entries,
+		     t.nbits_delta_page_length,
+		     &HPageOffsetEntry::delta_page_length);
+    write_vector_int(w, nitems, entries,
+		     t.nbits_nshared_objects,
+		     &HPageOffsetEntry::nshared_objects);
+    write_vector_vector(w, nitems, entries,
+			&HPageOffsetEntry::nshared_objects,
+			t.nbits_shared_identifier,
+			&HPageOffsetEntry::shared_identifiers);
+    write_vector_vector(w, nitems, entries,
+			&HPageOffsetEntry::nshared_objects,
+			t.nbits_shared_numerator,
+			&HPageOffsetEntry::shared_numerators);
+    write_vector_int(w, nitems, entries,
+		     t.nbits_delta_content_offset,
+		     &HPageOffsetEntry::delta_content_offset);
+    write_vector_int(w, nitems, entries,
+		     t.nbits_delta_content_length,
+		     &HPageOffsetEntry::delta_content_length);
+}
+
+void
+QPDF::writeHSharedObject(BitWriter& w)
+{
+    HSharedObject& t = this->shared_object_hints;
+
+    w.writeBits(t.first_shared_obj, 32);		    // 1
+    w.writeBits(t.first_shared_offset, 32);		    // 2
+    w.writeBits(t.nshared_first_page, 32);		    // 3
+    w.writeBits(t.nshared_total, 32);			    // 4
+    w.writeBits(t.nbits_nobjects, 16);			    // 5
+    w.writeBits(t.min_group_length, 32);		    // 6
+    w.writeBits(t.nbits_delta_group_length, 16);	    // 7
+
+    QTC::TC("qpdf", "QPDF lin write nshared_total > nshared_first_page",
+	    (t.nshared_total > t.nshared_first_page) ? 1 : 0);
+
+    int nitems = t.nshared_total;
+    std::vector<HSharedObjectEntry>& entries = t.entries;
+
+    write_vector_int(w, nitems, entries,
+		     t.nbits_delta_group_length,
+		     &HSharedObjectEntry::delta_group_length);
+    write_vector_int(w, nitems, entries,
+		     1, &HSharedObjectEntry::signature_present);
+    for (int i = 0; i < nitems; ++i)
+    {
+	// If signature were present, we'd have to write a 128-bit hash.
+	assert(entries[i].signature_present == 0);
+    }
+    write_vector_int(w, nitems, entries,
+		     t.nbits_nobjects,
+		     &HSharedObjectEntry::nobjects_minus_one);
+}
+
+void
+QPDF::writeHGeneric(BitWriter& w, HGeneric& t)
+{
+    w.writeBits(t.first_object, 32);			    // 1
+    w.writeBits(t.first_object_offset, 32);		    // 2
+    w.writeBits(t.nobjects, 32);			    // 3
+    w.writeBits(t.group_length, 32);			    // 4
+}
+
+void
+QPDF::generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
+			 std::map<int, size_t> const& lengths,
+			 std::map<int, int> const& obj_renumber,
+			 PointerHolder<Buffer>& hint_buffer,
+			 int& S, int& O)
+{
+    // Populate actual hint table values
+    calculateHPageOffset(xref, lengths, obj_renumber);
+    calculateHSharedObject(xref, lengths, obj_renumber);
+    calculateHOutline(xref, lengths, obj_renumber);
+
+    // Write the hint stream itself into a compressed memory buffer.
+    // Write through a couter so we can get offsets.
+    Pl_Buffer hint_stream("hint stream");
+    Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
+    Pl_Count c("count", &f);
+    BitWriter w(&c);
+
+    writeHPageOffset(w);
+    S = c.getCount();
+    writeHSharedObject(w);
+    O = 0;
+    if (this->outline_hints.nobjects > 0)
+    {
+	O = c.getCount();
+	writeHGeneric(w, this->outline_hints);
+    }
+    c.finish();
+
+    hint_buffer = hint_stream.getBuffer();
+}
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc
new file mode 100644
index 00000000..8797445c
--- /dev/null
+++ b/libqpdf/QPDF_optimization.cc
@@ -0,0 +1,490 @@
+// See doc/optimization.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+#include <qpdf/QPDF_Array.hh>
+#include <assert.h>
+
+QPDF::ObjUser::ObjUser() :
+    ou_type(ou_bad),
+    pageno(0)
+{
+}
+
+QPDF::ObjUser::ObjUser(user_e type) :
+    ou_type(type),
+    pageno(0)
+{
+    assert(type == ou_root);
+}
+
+QPDF::ObjUser::ObjUser(user_e type, int pageno) :
+    ou_type(type),
+    pageno(pageno)
+{
+    assert((type == ou_page) || (type == ou_thumb));
+}
+
+QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
+    ou_type(type),
+    pageno(0),
+    key(key)
+{
+    assert((type == ou_trailer_key) || (type == ou_root_key));
+}
+
+bool
+QPDF::ObjUser::operator<(ObjUser const& rhs) const
+{
+    if (this->ou_type < rhs.ou_type)
+    {
+	return true;
+    }
+    else if (this->ou_type == rhs.ou_type)
+    {
+	if (this->pageno < rhs.pageno)
+	{
+	    return true;
+	}
+	else if (this->pageno == rhs.pageno)
+	{
+	    return (this->key < rhs.key);
+	}
+    }
+
+    return false;
+}
+
+void
+QPDF::flattenScalarReferences()
+{
+    // Do a traversal of the entire PDF file structure replacing all
+    // indirect objects that are not arrays, streams, or dictionaries
+    // with direct objects.
+
+    std::list<QPDFObjectHandle> queue;
+    queue.push_back(this->trailer);
+    std::set<ObjGen> visited;
+
+    while (! queue.empty())
+    {
+	QPDFObjectHandle node = queue.front();
+	queue.pop_front();
+	if (node.isIndirect())
+	{
+	    if (node.isScalar())
+	    {
+		throw QEXC::Internal(
+		    "flattenScalarReferences landed at indirect scalar");
+	    }
+	    ObjGen og(node.getObjectID(), node.getGeneration());
+	    if (visited.count(og) > 0)
+	    {
+		continue;
+	    }
+	    visited.insert(og);
+	}
+
+	if (node.isArray())
+	{
+	    int nitems = node.getArrayNItems();
+	    for (int i = 0; i < nitems; ++i)
+	    {
+		QPDFObjectHandle oh = node.getArrayItem(i);
+		if (oh.isScalar())
+		{
+		    QTC::TC("qpdf", "QPDF opt flatten array scalar");
+		    oh.makeDirect();
+		    node.setArrayItem(i, oh);
+		}
+		else
+		{
+		    queue.push_back(oh);
+		}
+	    }
+	}
+	else if (node.isDictionary() || node.isStream())
+	{
+	    QPDFObjectHandle dict = node;
+	    if (node.isStream())
+	    {
+		dict = node.getDict();
+	    }
+	    std::set<std::string> keys = dict.getKeys();
+	    for (std::set<std::string>::iterator iter = keys.begin();
+		 iter != keys.end(); ++iter)
+	    {
+		std::string const& key = *iter;
+		QPDFObjectHandle oh = dict.getKey(key);
+		if (oh.isNull())
+		{
+		    // QPDF_Dictionary.getKeys() never returns null
+		    // keys.
+		    throw QEXC::Internal("dictionary with null key found");
+		}
+		else if (oh.isScalar())
+		{
+		    QTC::TC("qpdf", "QPDF opt flatten dict scalar");
+		    oh.makeDirect();
+		    dict.replaceKey(key, oh);
+		}
+		else
+		{
+		    queue.push_back(oh);
+		}
+	    }
+	}
+    }
+}
+
+void
+QPDF::optimize(std::map<int, int> const& object_stream_data,
+	       bool allow_changes)
+{
+    if (! this->obj_user_to_objects.empty())
+    {
+	// already optimized
+	return;
+    }
+
+    // Traverse pages tree pushing all inherited resources down to the
+    // page level.
+
+    // key_ancestors is a mapping of page attribute keys to a stack of
+    // Pages nodes that contain values for them.  pageno is the
+    // current page sequence number numbered from 0.
+    std::map<std::string, std::vector<QPDFObjectHandle> > key_ancestors;
+    int pageno = 0;
+    optimizePagesTree(this->trailer.getKey("/Root").getKey("/Pages"),
+		      key_ancestors, pageno, allow_changes);
+    assert(key_ancestors.empty());
+
+    // Traverse document-level items
+    std::set<std::string> keys = this->trailer.getKeys();
+    for (std::set<std::string>::iterator iter = keys.begin();
+	 iter != keys.end(); ++iter)
+    {
+	std::string const& key = *iter;
+	if (key == "/Root")
+	{
+	    // handled separately
+	}
+	else
+	{
+	    updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key),
+			     this->trailer.getKey(key));
+	}
+    }
+
+    QPDFObjectHandle root = getRoot();
+    keys = root.getKeys();
+    for (std::set<std::string>::iterator iter = keys.begin();
+	 iter != keys.end(); ++iter)
+    {
+	// Technically, /I keys from /Thread dictionaries are supposed
+	// to be handled separately, but we are going to disregard
+	// that specification for now.  There is loads of evidence
+	// that pdlin and Acrobat both disregard things like this from
+	// time to time, so this is almost certain not to cause any
+	// problems.
+
+	std::string const& key = *iter;
+	updateObjectMaps(ObjUser(ObjUser::ou_root_key, key),
+			 root.getKey(key));
+    }
+
+    ObjUser root_ou = ObjUser(ObjUser::ou_root);
+    ObjGen root_og = ObjGen(root.getObjectID(), root.getGeneration());
+    obj_user_to_objects[root_ou].insert(root_og);
+    object_to_obj_users[root_og].insert(root_ou);
+
+    filterCompressedObjects(object_stream_data);
+}
+
+void
+QPDF::optimizePagesTree(
+    QPDFObjectHandle cur_pages,
+    std::map<std::string, std::vector<QPDFObjectHandle> >& key_ancestors,
+    int& pageno, bool allow_changes)
+{
+    // Extract the underlying dictionary object
+    std::string type = cur_pages.getKey("/Type").getName();
+
+    if (type == "/Pages")
+    {
+	// Make a list of inheritable keys.  Any key other than /Type,
+	// /Parent, Kids, or /Count is an inheritable attribute.  Push
+	// this object onto the stack of pages nodes that have values
+	// for this attribute.
+
+	std::set<std::string> inheritable_keys;
+	std::set<std::string> keys = cur_pages.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    std::string const& key = *iter;
+	    if (! ((key == "/Type") || (key == "/Parent") ||
+		   (key == "/Kids") || (key == "/Count")))
+	    {
+		if (! allow_changes)
+		{
+		    throw QPDFExc(this->file.getName() +
+				  ": optimize detected an "
+				  "inheritable resource");
+		}
+
+		// This is an inheritable resource
+		inheritable_keys.insert(key);
+		QPDFObjectHandle oh = cur_pages.getKey(key);
+		QTC::TC("qpdf", "QPDF opt direct pages resource",
+			oh.isIndirect() ? 0 : 1);
+		if (! oh.isIndirect())
+		{
+		    if (! oh.isScalar())
+		    {
+			// Replace shared direct object non-scalar
+			// resources with indirect objects to avoid
+			// copying large structures around.
+			cur_pages.replaceKey(key, makeIndirectObject(oh));
+			oh = cur_pages.getKey(key);
+		    }
+		    else
+		    {
+			// Don't defeat flattenScalarReferences which
+			// would have already been called by this
+			// time.
+			QTC::TC("qpdf", "QPDF opt inherited scalar");
+		    }
+		}
+		key_ancestors[key].push_back(oh);
+		if (key_ancestors[key].size() > 1)
+		{
+		    QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
+		}
+		// Remove this resource from this node.  It will be
+		// reattached at the page level.
+		cur_pages.removeKey(key);
+	    }
+	}
+
+	// Visit descendant nodes.
+	QPDFObjectHandle kids = cur_pages.getKey("/Kids");
+	int n = kids.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    optimizePagesTree(kids.getArrayItem(i), key_ancestors, pageno,
+			      allow_changes);
+	}
+
+	// For each inheritable key, pop the stack.  If the stack
+	// becomes empty, remove it from the map.  That way, the
+	// invariant that the list of keys in key_ancestors is exactly
+	// those keys for which inheritable attributes are available.
+
+	if (! inheritable_keys.empty())
+	{
+	    QTC::TC("qpdf", "QPDF opt inheritable keys");
+	    for (std::set<std::string>::iterator iter =
+		     inheritable_keys.begin();
+		 iter != inheritable_keys.end(); ++iter)
+	    {
+		std::string const& key = (*iter);
+		key_ancestors[key].pop_back();
+		if (key_ancestors[key].empty())
+		{
+		    QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
+		    key_ancestors.erase(key);
+		}
+	    }
+	}
+	else
+	{
+	    QTC::TC("qpdf", "QPDF opt no inheritable keys");
+	}
+    }
+    else if (type == "/Page")
+    {
+	// Add all available inheritable attributes not present in
+	// this object to this object.
+	for (std::map<std::string, std::vector<QPDFObjectHandle> >::iterator
+		 iter = key_ancestors.begin();
+	     iter != key_ancestors.end(); ++iter)
+	{
+	    std::string const& key = (*iter).first;
+	    if (! cur_pages.hasKey(key))
+	    {
+		QTC::TC("qpdf", "QPDF opt resource inherited");
+		cur_pages.replaceKey(key, (*iter).second.back());
+	    }
+	    else
+	    {
+		QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
+	    }
+	}
+
+	// Traverse from this point, updating the mappings of object
+	// users to objects and objects to object users.
+
+	updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), cur_pages);
+
+	// Increment pageno so that its value will be correct for the
+	// next page.
+	++pageno;
+    }
+    else
+    {
+	throw QPDFExc(this->file.getName() + ": invalid Type in page tree");
+    }
+}
+
+void
+QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh)
+{
+    std::set<ObjGen> visited;
+    updateObjectMapsInternal(ou, oh, visited, true);
+}
+
+void
+QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
+			       std::set<ObjGen>& visited, bool top)
+{
+    // Traverse the object tree from this point taking care to avoid
+    // crossing page boundaries.
+
+    bool is_page_node = false;
+
+    if (oh.isDictionary() && oh.hasKey("/Type"))
+    {
+	std::string type = oh.getKey("/Type").getName();
+	if (type == "/Page")
+	{
+	    is_page_node = true;
+	    if (! top)
+	    {
+		return;
+	    }
+	}
+    }
+
+    if (oh.isIndirect())
+    {
+	ObjGen og(oh.getObjectID(), oh.getGeneration());
+	if (visited.count(og))
+	{
+	    QTC::TC("qpdf", "QPDF opt loop detected");
+	    return;
+	}
+	this->obj_user_to_objects[ou].insert(og);
+	this->object_to_obj_users[og].insert(ou);
+	visited.insert(og);
+    }
+
+    if (oh.isArray())
+    {
+	int n = oh.getArrayNItems();
+	for (int i = 0; i < n; ++i)
+	{
+	    updateObjectMapsInternal(ou, oh.getArrayItem(i), visited, false);
+	}
+    }
+    else if (oh.isDictionary() || oh.isStream())
+    {
+	QPDFObjectHandle dict = oh;
+	if (oh.isStream())
+	{
+	    dict = oh.getDict();
+	}
+
+	std::set<std::string> keys = dict.getKeys();
+	for (std::set<std::string>::iterator iter = keys.begin();
+	     iter != keys.end(); ++iter)
+	{
+	    std::string const& key = *iter;
+	    if (is_page_node && (key == "/Thumb"))
+	    {
+		// Traverse page thumbnail dictionaries as a special
+		// case.
+		updateObjectMaps(ObjUser(ObjUser::ou_thumb, ou.pageno),
+				 dict.getKey(key));
+	    }
+	    else if (is_page_node && (key == "/Parent"))
+	    {
+		// Don't traverse back up the page tree
+	    }
+	    else
+	    {
+		updateObjectMapsInternal(ou, dict.getKey(key),
+					 visited, false);
+	    }
+	}
+    }
+}
+
+void
+QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data)
+{
+    if (object_stream_data.empty())
+    {
+	return;
+    }
+
+    // Transform object_to_obj_users and obj_user_to_objects so that
+    // they refer only to uncompressed objects.  If something is a
+    // user of a compressed object, then it is really a user of the
+    // object stream that contains it.
+
+    std::map<ObjUser, std::set<ObjGen> > t_obj_user_to_objects;
+    std::map<ObjGen, std::set<ObjUser> > t_object_to_obj_users;
+
+    for (std::map<ObjUser, std::set<ObjGen> >::iterator i1 =
+	     this->obj_user_to_objects.begin();
+	 i1 != this->obj_user_to_objects.end(); ++i1)
+    {
+	ObjUser const& ou = (*i1).first;
+	std::set<ObjGen> const& objects = (*i1).second;
+	for (std::set<ObjGen>::const_iterator i2 = objects.begin();
+	     i2 != objects.end(); ++i2)
+	{
+	    ObjGen const& og = (*i2);
+	    std::map<int, int>::const_iterator i3 =
+		object_stream_data.find(og.obj);
+	    if (i3 == object_stream_data.end())
+	    {
+		t_obj_user_to_objects[ou].insert(og);
+	    }
+	    else
+	    {
+		t_obj_user_to_objects[ou].insert(ObjGen((*i3).second, 0));
+	    }
+	}
+    }
+
+    for (std::map<ObjGen, std::set<ObjUser> >::iterator i1 =
+	     this->object_to_obj_users.begin();
+	 i1 != this->object_to_obj_users.end(); ++i1)
+    {
+	ObjGen const& og = (*i1).first;
+	std::set<ObjUser> const& objusers = (*i1).second;
+	for (std::set<ObjUser>::const_iterator i2 = objusers.begin();
+	     i2 != objusers.end(); ++i2)
+	{
+	    ObjUser const& ou = (*i2);
+	    std::map<int, int>::const_iterator i3 =
+		object_stream_data.find(og.obj);
+	    if (i3 == object_stream_data.end())
+	    {
+		t_object_to_obj_users[og].insert(ou);
+	    }
+	    else
+	    {
+		t_object_to_obj_users[ObjGen((*i3).second, 0)].insert(ou);
+	    }
+	}
+    }
+
+    this->obj_user_to_objects = t_obj_user_to_objects;
+    this->object_to_obj_users = t_object_to_obj_users;
+}
diff --git a/libqpdf/QTC.cc b/libqpdf/QTC.cc
new file mode 100644
index 00000000..b8328b2e
--- /dev/null
+++ b/libqpdf/QTC.cc
@@ -0,0 +1,46 @@
+
+#include <qpdf/QTC.hh>
+
+#include <set>
+#include <stdio.h>
+#include <qpdf/QUtil.hh>
+
+static bool tc_active(char const* const scope)
+{
+    std::string value;
+    return (QUtil::get_env("TC_SCOPE", &value) && (value == scope));
+}
+
+void QTC::TC(char const* const scope, char const* const ccase, int n)
+{
+    static std::set<std::pair<std::string, int> > cache;
+
+    if (! tc_active(scope))
+    {
+	return;
+    }
+
+    std::string filename;
+#ifdef _WIN32
+# define TC_ENV "TC_WIN_FILENAME"
+#else
+# define TC_ENV "TC_FILENAME"
+#endif
+    if (! QUtil::get_env(TC_ENV, &filename))
+    {
+	return;
+    }
+#undef TC_ENV
+
+    if (cache.count(std::make_pair(ccase, n)))
+    {
+	return;
+    }
+    cache.insert(std::make_pair(ccase, n));
+
+    FILE* tc =
+	QUtil::fopen_wrapper("open test coverage file (" + filename + ")",
+			     fopen(filename.c_str(), "ab"));
+    fprintf(tc, "%s %d\n", ccase, n);
+    fclose(tc);
+}
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
new file mode 100644
index 00000000..c0de95f7
--- /dev/null
+++ b/libqpdf/QUtil.cc
@@ -0,0 +1,198 @@
+
+#include <qpdf/QUtil.hh>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <stdlib.h>
+#ifdef _WIN32
+#include <Windows.h>
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+
+std::string
+QUtil::int_to_string(int num, int fullpad)
+{
+    // This routine will need to be recompiled if an int can be longer than
+    // 49 digits.
+    char t[50];
+
+    // -2 or -1 to leave space for the possible negative sign and for NUL...
+    if (abs(fullpad) > (int)sizeof(t) - ((num < 0)?2:1))
+    {
+	throw QEXC::Internal("Util::int_to_string has been called with "
+			     "a padding value greater than its internal "
+			     "limit");
+    }
+
+    if (fullpad)
+    {
+	sprintf(t, "%0*d", fullpad, num);
+    }
+    else
+    {
+	sprintf(t, "%d", num);
+    }
+
+    return std::string(t);
+}
+
+std::string
+QUtil::double_to_string(double num, int decimal_places)
+{
+    // This routine will need to be recompiled if a double can be longer than
+    // 99 digits.
+    char t[100];
+
+    std::string lhs = int_to_string((int)num);
+
+    // lhs.length() gives us the length of the part on the right hand
+    // side of the dot + 1 for the dot + decimal_places: total size of
+    // the required string.  -1 on the sizeof side to allow for NUL at
+    // the end.
+    //
+    // If decimal_places <= 0, it is as if no precision was provided
+    // so trust the buffer is big enough.  The following test will
+    // always pass in those cases.
+    if (decimal_places + 1 + (int)lhs.length() > (int)sizeof(t) - 1)
+    {
+	throw QEXC::Internal("Util::double_to_string has been called with "
+			     "a number and a decimal places specification "
+			     "that would break an internal limit");
+    }
+
+    if (decimal_places)
+    {
+	sprintf(t, "%.*f", decimal_places, num);
+    }
+    else
+    {
+	sprintf(t, "%f", num);
+    }
+    return std::string(t);
+}
+
+int
+QUtil::os_wrapper(std::string const& description, int status) throw (QEXC::System)
+{
+    if (status == -1)
+    {
+	throw QEXC::System(description, errno);
+    }
+    return status;
+}
+
+FILE*
+QUtil::fopen_wrapper(std::string const& description, FILE* f) throw (QEXC::System)
+{
+    if (f == 0)
+    {
+	throw QEXC::System(description, errno);
+    }
+    return f;
+}
+
+char*
+QUtil::copy_string(std::string const& str)
+{
+    char* result = new char[str.length() + 1];
+    // Use memcpy in case string contains nulls
+    result[str.length()] = '\0';
+    memcpy(result, str.c_str(), str.length());
+    return result;
+}
+
+bool
+QUtil::get_env(std::string const& var, std::string* value)
+{
+    // This was basically ripped out of wxWindows.
+#ifdef _WIN32
+    // first get the size of the buffer
+    DWORD len = ::GetEnvironmentVariable(var.c_str(), NULL, 0);
+    if (len == 0)
+    {
+        // this means that there is no such variable
+        return false;
+    }
+
+    if (value)
+    {
+	char* t = new char[len + 1];
+        ::GetEnvironmentVariable(var.c_str(), t, len);
+	*value = t;
+	delete [] t;
+    }
+
+    return true;
+#else
+    char* p = getenv(var.c_str());
+    if (p == 0)
+    {
+        return false;
+    }
+    if (value)
+    {
+        *value = p;
+    }
+
+    return true;
+#endif
+}
+
+std::string
+QUtil::toUTF8(unsigned long uval)
+{
+    std::string result;
+
+    // A UTF-8 encoding of a Unicode value is a single byte for
+    // Unicode values <= 127.  For larger values, the first byte of
+    // the UTF-8 encoding has '1' as each of its n highest bits and
+    // '0' for its (n+1)th highest bit where n is the total number of
+    // bytes required.  Subsequent bytes start with '10' and have the
+    // remaining 6 bits free for encoding.  For example, an 11-bit
+    // unicode value can be stored in two bytes where the first is
+    // 110zzzzz, the second is 10zzzzzz, and the z's represent the
+    // remaining bits.
+
+    if (uval > 0x7fffffff)
+    {
+	throw QEXC::General("bounds error in QUtil::toUTF8");
+    }
+    else if (uval < 128)
+    {
+	result += (char)(uval);
+    }
+    else
+    {
+	unsigned char bytes[7];
+	bytes[6] = '\0';
+	unsigned char* cur_byte = &bytes[5];
+
+	// maximum value that will fit in the current number of bytes
+	unsigned char maxval = 0x3f; // six bits
+
+	while (uval > maxval)
+	{
+	    // Assign low six bits plus 10000000 to lowest unused
+	    // byte position, then shift
+	    *cur_byte = (unsigned char) (0x80 + (uval & 0x3f));
+	    uval >>= 6;
+	    // Maximum that will fit in high byte now shrinks by one bit
+	    maxval >>= 1;
+	    // Slide to the left one byte
+	    --cur_byte;
+	    if (cur_byte < bytes)
+	    {
+		throw QEXC::Internal("QUtil::toUTF8: overflow error");
+	    }
+	}
+	// If maxval is k bits long, the high (7 - k) bits of the
+	// resulting byte must be high.
+	*cur_byte = (unsigned char)((0xff - (1 + (maxval << 1))) + uval);
+
+	result += (char*)cur_byte;
+    }
+
+    return result;
+}
diff --git a/libqpdf/RC4.cc b/libqpdf/RC4.cc
new file mode 100644
index 00000000..74b538b5
--- /dev/null
+++ b/libqpdf/RC4.cc
@@ -0,0 +1,56 @@
+
+#include <qpdf/RC4.hh>
+
+#include <string.h>
+
+static void swap_byte(unsigned char &a, unsigned char &b)
+{
+    unsigned char t;
+
+    t = a;
+    a = b;
+    b = t;
+}
+
+RC4::RC4(unsigned char const* key_data, int key_len)
+{
+    if (key_len == -1)
+    {
+	key_len = strlen((char*)key_data);
+    }
+
+    for (int i = 0; i < 256; ++i)
+    {
+        key.state[i] = i;
+    }
+    key.x = 0;
+    key.y = 0;
+
+    int i1 = 0;
+    int i2 = 0;
+    for (int i = 0; i < 256; ++i)
+    {
+	i2 = (key_data[i1] + key.state[i] + i2) % 256;
+	swap_byte(key.state[i], key.state[i2]);
+	i1 = (i1 + 1) % key_len;
+    }
+}
+
+void
+RC4::process(unsigned char *in_data, int len, unsigned char* out_data)
+{
+    if (out_data == 0)
+    {
+	// Convert in place
+	out_data = in_data;
+    }
+
+    for (int i = 0; i < len; ++i)
+    {
+	key.x = (key.x + 1) % 256;
+	key.y = (key.state[key.x] + key.y) % 256;
+	swap_byte(key.state[key.x], key.state[key.y]);
+	int xor_index = (key.state[key.x] + key.state[key.y]) % 256;
+	out_data[i] = in_data[i] ^ key.state[xor_index];
+    }
+}
diff --git a/libqpdf/bits.icc b/libqpdf/bits.icc
new file mode 100644
index 00000000..465bf5b9
--- /dev/null
+++ b/libqpdf/bits.icc
@@ -0,0 +1,149 @@
+
+#ifndef __BITS_CC__
+#define __BITS_CC__
+
+#include <algorithm>
+#include <qpdf/QTC.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/Pipeline.hh>
+
+// These functions may be run at places where the function call
+// overhead from test coverage testing would be too high.  Therefore,
+// we make the test coverage cases conditional upon a preprocessor
+// symbol.  BitStream.cc includes this file without defining the
+// symbol, and the specially designed test code that fully exercises
+// this code includes with the symbol defined.
+
+#ifdef BITS_READ
+static unsigned long
+read_bits(unsigned char const*& p, unsigned int& bit_offset,
+	  unsigned int& bits_available, unsigned int bits_wanted)
+{
+    // View p as a stream of bits:
+
+    // 76543210 76543210 ....
+
+    // bit_offset is the bit number within the first byte that marks
+    // the first bit that we would read.
+
+    if (bits_wanted > bits_available)
+    {
+	throw QEXC::General("overflow reading bit stream");
+    }
+    if (bits_wanted > 32)
+    {
+	throw QEXC::Internal("read_bits: too many bits requested");
+    }
+
+    unsigned long result = 0;
+#ifdef BITS_TESTING
+    if (bits_wanted == 0)
+    {
+	QTC::TC("libtests", "bits zero bits wanted");
+    }
+#endif
+    while (bits_wanted > 0)
+    {
+	// Grab bits from the first byte clearing anything before
+	// bit_offset.
+	unsigned char byte = *p & ((1 << (bit_offset + 1)) - 1);
+
+	// There are bit_offset + 1 bits available in the first byte.
+	unsigned int to_copy = std::min(bits_wanted, bit_offset + 1);
+	unsigned int leftover = (bit_offset + 1) - to_copy;
+
+#ifdef BITS_TESTING
+	QTC::TC("libtests", "bits bit_offset",
+		((bit_offset == 0) ? 0 :
+		 (bit_offset == 7) ? 1 :
+		 2));
+	QTC::TC("libtests", "bits leftover", (leftover > 0) ? 1 : 0);
+#endif
+
+	// Right shift so that all the bits we want are right justified.
+	byte >>= leftover;
+
+	// Copy the bits into result
+	result <<= to_copy;
+	result |= byte;
+
+	// Update pointers
+	if (leftover)
+	{
+	    bit_offset = leftover - 1;
+	}
+	else
+	{
+	    bit_offset = 7;
+	    ++p;
+	}
+	bits_wanted -= to_copy;
+	bits_available -= to_copy;
+
+#ifdef BITS_TESTING
+	QTC::TC("libtests", "bits iterations",
+		((bits_wanted > 8) ? 0 :
+		 (bits_wanted > 0) ? 1 :
+		 2));
+#endif
+    }
+
+    return result;
+}
+#endif
+
+#ifdef BITS_WRITE
+static void
+write_bits(unsigned char& ch, unsigned int& bit_offset,
+	   unsigned long val, unsigned bits, Pipeline* pipeline)
+{
+    if (bits > 32)
+    {
+	throw QEXC::Internal("write_bits: too many bits requested");
+    }
+
+    // bit_offset + 1 is the number of bits left in ch
+#ifdef BITS_TESTING
+    if (bits == 0)
+    {
+	QTC::TC("libtests", "bits write zero bits");
+    }
+#endif
+    while (bits > 0)
+    {
+	int bits_to_write = std::min(bits, bit_offset + 1);
+	unsigned char newval =
+	    (val >> (bits - bits_to_write)) & ((1 << bits_to_write) - 1);
+	int bits_left_in_ch = bit_offset + 1 - bits_to_write;
+	newval <<= bits_left_in_ch;
+	ch |= newval;
+	if (bits_left_in_ch == 0)
+	{
+#ifdef BITS_TESTING
+	    QTC::TC("libtests", "bits write pipeline");
+#endif
+	    pipeline->write(&ch, 1);
+	    bit_offset = 7;
+	    ch = 0;
+	}
+	else
+	{
+#ifdef BITS_TESTING
+	    QTC::TC("libtests", "bits write leftover");
+#endif
+	    bit_offset -= bits_to_write;
+	}
+	bits -= bits_to_write;
+#ifdef BITS_TESTING
+	QTC::TC("libtests", "bits write iterations",
+		((bits > 8) ? 0 :
+		 (bits > 0) ? 1 :
+		 2));
+#endif
+    }
+
+}
+#endif
+
+
+#endif // __BITS_CC__
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
new file mode 100644
index 00000000..9733cb9f
--- /dev/null
+++ b/libqpdf/build.mk
@@ -0,0 +1,73 @@
+TARGETS_libqpdf = \
+	libqpdf/$(OUTPUT_DIR)/libqpdf.la
+
+INCLUDES_libqpdf = include libqpdf
+
+SRCS_libqpdf = \
+	libqpdf/BitStream.cc \
+	libqpdf/BitWriter.cc \
+	libqpdf/Buffer.cc \
+	libqpdf/MD5.cc \
+	libqpdf/PCRE.cc \
+	libqpdf/Pipeline.cc \
+	libqpdf/Pl_ASCII85Decoder.cc \
+	libqpdf/Pl_ASCIIHexDecoder.cc \
+	libqpdf/Pl_Buffer.cc \
+	libqpdf/Pl_Count.cc \
+	libqpdf/Pl_Discard.cc \
+	libqpdf/Pl_Flate.cc \
+	libqpdf/Pl_LZWDecoder.cc \
+	libqpdf/Pl_MD5.cc \
+	libqpdf/Pl_PNGFilter.cc \
+	libqpdf/Pl_QPDFTokenizer.cc \
+	libqpdf/Pl_RC4.cc \
+	libqpdf/Pl_StdioFile.cc \
+	libqpdf/QEXC.cc \
+	libqpdf/QPDF.cc \
+	libqpdf/QPDFExc.cc \
+	libqpdf/QPDFObject.cc \
+	libqpdf/QPDFObjectHandle.cc \
+	libqpdf/QPDFTokenizer.cc \
+	libqpdf/QPDFWriter.cc \
+	libqpdf/QPDFXRefEntry.cc \
+	libqpdf/QPDF_Array.cc \
+	libqpdf/QPDF_Bool.cc \
+	libqpdf/QPDF_Dictionary.cc \
+	libqpdf/QPDF_Integer.cc \
+	libqpdf/QPDF_Name.cc \
+	libqpdf/QPDF_Null.cc \
+	libqpdf/QPDF_Real.cc \
+	libqpdf/QPDF_Stream.cc \
+	libqpdf/QPDF_String.cc \
+	libqpdf/QPDF_encryption.cc \
+	libqpdf/QPDF_linearization.cc \
+	libqpdf/QPDF_optimization.cc \
+	libqpdf/QTC.cc \
+	libqpdf/QUtil.cc \
+	libqpdf/RC4.cc
+
+# -----
+
+OBJS_libqpdf = $(call src_to_lobj,$(SRCS_libqpdf))
+
+ifeq ($(GENDEPS),1)
+-include $(call lobj_to_dep,$(OBJS_libqpdf))
+endif
+
+$(OBJS_libqpdf): libqpdf/$(OUTPUT_DIR)/%.lo: libqpdf/%.cc
+	$(call libcompile,$<,$(INCLUDES_libqpdf))
+
+# Last three arguments to makelib are CURRENT,REVISION,AGE.
+#
+# * If any interfaces have been removed or changed, we are not binary
+#   compatible.  Increment CURRENT, and set AGE and REVISION to 0.
+#
+# * Otherwise, if any interfaces have been added since the last
+#   public release, then increment CURRENT and AGE, and set REVISION
+#   to 0.
+#
+# * Otherwise, increment REVISION
+
+libqpdf/$(OUTPUT_DIR)/libqpdf.la: $(OBJS_libqpdf)
+	$(call makelib,$(OBJS_libqpdf),$@,1,0,0)
+
diff --git a/libqpdf/qpdf/BitStream.hh b/libqpdf/qpdf/BitStream.hh
new file mode 100644
index 00000000..d02eea42
--- /dev/null
+++ b/libqpdf/qpdf/BitStream.hh
@@ -0,0 +1,23 @@
+// Read bits from a bit stream.  See BitWriter for writing.
+
+#ifndef __BITSTREAM_HH__
+#define __BITSTREAM_HH__
+
+class BitStream
+{
+  public:
+    BitStream(unsigned char const* p, int nbytes);
+    void reset();
+    unsigned long getBits(int nbits);
+    void skipToNextByte();
+
+  private:
+    unsigned char const* start;
+    int nbytes;
+
+    unsigned char const* p;
+    unsigned int bit_offset;
+    unsigned int bits_available;
+};
+
+#endif // __BITSTREAM_HH__
diff --git a/libqpdf/qpdf/BitWriter.hh b/libqpdf/qpdf/BitWriter.hh
new file mode 100644
index 00000000..1efd498a
--- /dev/null
+++ b/libqpdf/qpdf/BitWriter.hh
@@ -0,0 +1,24 @@
+// Write bits into a bit stream.  See BitStream for reading.
+
+#ifndef __THIS_FILE_Q__
+#define __THIS_FILE_Q__
+
+class Pipeline;
+
+class BitWriter
+{
+  public:
+    // Write bits to the pipeline.  It is the caller's responsibility
+    // to eventually call finish on the pipeline.
+    BitWriter(Pipeline* pl);
+    void writeBits(unsigned long val, int bits);
+    // Force any partial byte to be written to the pipeline.
+    void flush();
+
+  private:
+    Pipeline* pl;
+    unsigned char ch;
+    unsigned int bit_offset;
+};
+
+#endif // __THIS_FILE_Q__
diff --git a/libqpdf/qpdf/MD5.hh b/libqpdf/qpdf/MD5.hh
new file mode 100644
index 00000000..0ae15da9
--- /dev/null
+++ b/libqpdf/qpdf/MD5.hh
@@ -0,0 +1,73 @@
+
+#ifndef __MD5_HH__
+#define __MD5_HH__
+
+#include <string>
+#include <qpdf/QEXC.hh>
+
+class MD5
+{
+  public:
+    typedef unsigned char Digest[16];
+
+    MD5();
+    void reset();
+
+    // encodes string and finalizes
+    void encodeString(char const* input_string);
+
+    // encodes file and finalizes
+    void encodeFile(char const* filename, int up_to_size = -1)
+	throw(QEXC::System);
+
+    // appends string to current md5 object
+    void appendString(char const* input_string);
+
+    // appends arbitrary data to current md5 object
+    void encodeDataIncrementally(char const* input_data, int len);
+
+    // computes a raw digest
+    void digest(Digest);
+
+    // prints the digest to stdout terminated with \r\n (primarily for
+    // testing)
+    void print();
+
+    // returns the digest as a hexademical string
+    std::string unparse();
+
+    // Convenience functions
+    static std::string getDataChecksum(char const* buf, int len);
+    static std::string getFileChecksum(char const* filename, int up_to_size = -1);
+    static bool checkDataChecksum(char const* const checksum,
+				  char const* buf, int len);
+    static bool checkFileChecksum(char const* const checksum,
+				  char const* filename, int up_to_size = -1);
+
+  private:
+    // POINTER defines a generic pointer type
+    typedef void *POINTER;
+
+    // UINT2 defines a two byte word
+    typedef unsigned short int UINT2;
+
+    // UINT4 defines a four byte word
+    typedef unsigned long int UINT4;
+
+    void init();
+    void update(unsigned char *, unsigned int);
+    void final();
+
+    static void transform(UINT4 [4], unsigned char [64]);
+    static void encode(unsigned char *, UINT4 *, unsigned int);
+    static void decode(UINT4 *, unsigned char *, unsigned int);
+
+    UINT4 state[4];		// state (ABCD)
+    UINT4 count[2];		// number of bits, modulo 2^64 (lsb first)
+    unsigned char buffer[64];	// input buffer
+
+    bool finalized;
+    Digest digest_val;
+};
+
+#endif // __MD5_HH__
diff --git a/libqpdf/qpdf/PCRE.hh b/libqpdf/qpdf/PCRE.hh
new file mode 100644
index 00000000..a226aa19
--- /dev/null
+++ b/libqpdf/qpdf/PCRE.hh
@@ -0,0 +1,107 @@
+// This is a C++ wrapper class around Philip Hazel's perl-compatible
+// regular expressions library.
+//
+
+#ifndef __PCRE_HH__
+#define __PCRE_HH__
+
+#include <pcre.h>
+#include <string>
+
+#include <qpdf/QEXC.hh>
+
+// Note: this class does not encapsulate all features of the PCRE
+// package -- only those that I actually need right now are here.
+
+class PCRE
+{
+  public:
+    class Exception: public QEXC::General
+    {
+      public:
+	Exception(std::string const& message);
+	virtual ~Exception() throw() {}
+    };
+
+    // This is thrown when an attempt is made to access a non-existent
+    // back reference.
+    class NoBackref: public Exception
+    {
+      public:
+	NoBackref();
+	virtual ~NoBackref() throw() {}
+    };
+
+    class Match
+    {
+	friend class PCRE;
+      public:
+	Match(int nbackrefs, char const* subject);
+	Match(Match const&);
+	Match& operator=(Match const&);
+	~Match();
+	operator bool();
+
+	// All the back reference accessing routines may throw the
+	// special exception NoBackref (derived from Exception) if the
+	// back reference does not exist.  Exception will be thrown
+	// for other error conditions.  This allows callers to trap
+	// this condition explicitly when they care about the
+	// difference between a backreference matching an empty string
+	// and not matching at all.
+
+	// see getMatch flags below
+	std::string getMatch(int n, int flags = 0)
+	    throw(QEXC::General, Exception);
+	void getOffsetLength(int n, int& offset, int& length) throw(Exception);
+	int getOffset(int n) throw(Exception);
+	int getLength(int n) throw(Exception);
+
+	// nMatches returns the number of available matches including
+	// match 0 which is the whole string.  In other words, if you
+	// have one backreference in your expression and the
+	// expression matches, nMatches() will return 2, getMatch(0)
+	// will return the whole string, getMatch(1) will return the
+	// text that matched the backreference, and getMatch(2) will
+	// throw an exception because it is out of range.
+	int nMatches() const;
+
+	// Flags for getMatch
+
+	// getMatch on a substring that didn't match should return
+	// empty string instead of throwing an exception
+	static int const gm_no_substring_returns_empty = (1 << 0);
+
+      private:
+	void init(int nmatches, int nbackrefs, char const* subject);
+	void copy(Match const&);
+	void destroy();
+
+	int nbackrefs;
+	char const* subject;
+	int* ovector;
+	int ovecsize;
+	int nmatches;
+    };
+
+    // The value passed in as options is passed to pcre_exec.  See man
+    // pcreapi for details.
+    PCRE(char const* pattern, int options = 0) throw(Exception);
+    ~PCRE();
+
+    Match match(char const* subject, int options = 0, int startoffset = 0,
+		int size = -1)
+	throw(QEXC::General, Exception);
+
+    static void test(int n = 0);
+
+  private:
+    // prohibit copying and assignment
+    PCRE(PCRE const&);
+    PCRE& operator=(PCRE const&);
+
+    pcre* code;
+    int nbackrefs;
+};
+
+#endif // __PCRE_HH__
diff --git a/libqpdf/qpdf/Pl_ASCII85Decoder.hh b/libqpdf/qpdf/Pl_ASCII85Decoder.hh
new file mode 100644
index 00000000..9883a58e
--- /dev/null
+++ b/libqpdf/qpdf/Pl_ASCII85Decoder.hh
@@ -0,0 +1,23 @@
+
+#ifndef __PL_ASCII85DECODER_HH__
+#define __PL_ASCII85DECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_ASCII85Decoder: public Pipeline
+{
+  public:
+    Pl_ASCII85Decoder(char const* identifier, Pipeline* next);
+    virtual ~Pl_ASCII85Decoder();
+    virtual void write(unsigned char* buf, int len);
+    virtual void finish();
+
+  private:
+    void flush();
+
+    char inbuf[5];
+    int pos;
+    int eod;
+};
+
+#endif // __PL_ASCII85DECODER_HH__
diff --git a/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh b/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh
new file mode 100644
index 00000000..36272328
--- /dev/null
+++ b/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh
@@ -0,0 +1,23 @@
+
+#ifndef __PL_ASCIIHEXDECODER_HH__
+#define __PL_ASCIIHEXDECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_ASCIIHexDecoder: public Pipeline
+{
+  public:
+    Pl_ASCIIHexDecoder(char const* identifier, Pipeline* next);
+    virtual ~Pl_ASCIIHexDecoder();
+    virtual void write(unsigned char* buf, int len);
+    virtual void finish();
+
+  private:
+    void flush();
+
+    char inbuf[3];
+    int pos;
+    bool eod;
+};
+
+#endif // __PL_ASCIIHEXDECODER_HH__
diff --git a/libqpdf/qpdf/Pl_LZWDecoder.hh b/libqpdf/qpdf/Pl_LZWDecoder.hh
new file mode 100644
index 00000000..95ec55b3
--- /dev/null
+++ b/libqpdf/qpdf/Pl_LZWDecoder.hh
@@ -0,0 +1,40 @@
+
+#ifndef __PL_LZWDECODER_HH__
+#define __PL_LZWDECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/Buffer.hh>
+#include <vector>
+
+class Pl_LZWDecoder: public Pipeline
+{
+  public:
+    Pl_LZWDecoder(char const* identifier, Pipeline* next,
+		  bool early_code_change);
+    virtual ~Pl_LZWDecoder();
+    virtual void write(unsigned char* buf, int len);
+    virtual void finish();
+
+  private:
+    void sendNextCode();
+    void handleCode(int code);
+    unsigned char getFirstChar(int code);
+    void addToTable(unsigned char next);
+
+    // members used for converting bits to codes
+    unsigned char buf[3];
+    int code_size;
+    int next;
+    int byte_pos;
+    int bit_pos;		// left to right: 01234567
+    int bits_available;
+
+    // members used for handle LZW decompression
+    bool code_change_delta;
+    bool eod;
+    std::vector<Buffer> table;
+    int last_code;
+};
+
+#endif // __PL_LZWDECODER_HH__
diff --git a/libqpdf/qpdf/Pl_MD5.hh b/libqpdf/qpdf/Pl_MD5.hh
new file mode 100644
index 00000000..2d9d11fd
--- /dev/null
+++ b/libqpdf/qpdf/Pl_MD5.hh
@@ -0,0 +1,30 @@
+
+#ifndef __PL_MD5_HH__
+#define __PL_MD5_HH__
+
+// This pipeline sends its output to its successor unmodified.  After
+// calling finish, the MD5 checksum of the data that passed through
+// the pipeline is available.
+
+// This pipeline is reusable; i.e., it is safe to call write() after
+// calling finish().  The first call to write() after a call to
+// finish() initializes a new MD5 object.
+
+#include <qpdf/Pipeline.hh>
+#include <qpdf/MD5.hh>
+
+class Pl_MD5: public Pipeline
+{
+  public:
+    Pl_MD5(char const* identifier, Pipeline* next);
+    virtual ~Pl_MD5();
+    virtual void write(unsigned char*, int);
+    virtual void finish();
+    std::string getHexDigest();
+
+  private:
+    bool in_progress;
+    MD5 md5;
+};
+
+#endif // __PL_MD5_HH__
diff --git a/libqpdf/qpdf/Pl_PNGFilter.hh b/libqpdf/qpdf/Pl_PNGFilter.hh
new file mode 100644
index 00000000..1ecc7060
--- /dev/null
+++ b/libqpdf/qpdf/Pl_PNGFilter.hh
@@ -0,0 +1,62 @@
+
+#ifndef __PL_PNGFILTER_HH__
+#define __PL_PNGFILTER_HH__
+
+// This pipeline applies or reverses the application of a PNG filter
+// as described in the PNG specification.
+
+// NOTE: In its initial implementation, it only encodes and decodes
+// filters "none" and "up".  The primary motivation of this code is to
+// encode and decode PDF 1.5+ XRef streams which are often encoded
+// with Flate predictor 12, which corresponds to the PNG up filter.
+// At present, the bytes_per_pixel parameter is ignored, and an
+// exception is thrown if any row of the file has a filter of other
+// than 0 or 2.  Finishing the implementation would not be difficult.
+// See chapter 6 of the PNG specification for a description of the
+// filter algorithms.
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_PNGFilter: public Pipeline
+{
+  public:
+    class Exception: public Pipeline::Exception
+    {
+      public:
+	Exception(std::string const& message) :
+	    Pipeline::Exception(message)
+	{
+	}
+
+	virtual ~Exception() throw ()
+	{
+	}
+    };
+
+    // Encoding is not presently supported
+    enum action_e { a_encode, a_decode };
+
+    Pl_PNGFilter(char const* identifier, Pipeline* next,
+		 action_e action, unsigned int columns,
+		 unsigned int bytes_per_pixel);
+    virtual ~Pl_PNGFilter();
+
+    virtual void write(unsigned char* data, int len);
+    virtual void finish();
+
+  private:
+    void processRow();
+    void encodeRow();
+    void decodeRow();
+
+    action_e action;
+    unsigned int columns;
+    unsigned char* cur_row;
+    unsigned char* prev_row;
+    unsigned char* buf1;
+    unsigned char* buf2;
+    int pos;
+    int incoming;
+};
+
+#endif // __PL_PNGFILTER_HH__
diff --git a/libqpdf/qpdf/Pl_QPDFTokenizer.hh b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
new file mode 100644
index 00000000..448dbb18
--- /dev/null
+++ b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -0,0 +1,40 @@
+
+#ifndef __PL_QPDFTOKENIZER_HH__
+#define __PL_QPDFTOKENIZER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/QPDFTokenizer.hh>
+
+//
+// Treat incoming text as a stream consisting of valid PDF tokens, but
+// output bad tokens just the same.  The idea here is to be able to
+// use pipeline for content streams to normalize newlines without
+// interfering with meaningful newlines such as those that occur
+// inside of strings.
+//
+
+class Pl_QPDFTokenizer: public Pipeline
+{
+  public:
+    Pl_QPDFTokenizer(char const* identifier, Pipeline* next);
+    virtual ~Pl_QPDFTokenizer();
+    virtual void write(unsigned char* buf, int len);
+    virtual void finish();
+
+  private:
+    void processChar(char ch);
+    void checkUnread();
+    void writeNext(char const*, int len);
+    void writeToken(QPDFTokenizer::Token&);
+
+    QPDFTokenizer tokenizer;
+    bool newline_after_next_token;
+    bool just_wrote_nl;
+    bool last_char_was_cr;
+    bool unread_char;
+    char char_to_unread;
+    bool pass_through;
+};
+
+#endif // __PL_QPDFTOKENIZER_HH__
diff --git a/libqpdf/qpdf/Pl_RC4.hh b/libqpdf/qpdf/Pl_RC4.hh
new file mode 100644
index 00000000..6bebe5aa
--- /dev/null
+++ b/libqpdf/qpdf/Pl_RC4.hh
@@ -0,0 +1,42 @@
+
+#ifndef __PL_RC4_HH__
+#define __PL_RC4_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/RC4.hh>
+
+class Pl_RC4: public Pipeline
+{
+  public:
+    class Exception: public Pipeline::Exception
+    {
+      public:
+	Exception(std::string const& message) :
+	    Pipeline::Exception(message)
+	{
+	}
+
+	virtual ~Exception() throw()
+	{
+	}
+    };
+
+    static int const def_bufsize = 65536;
+
+    // key_len of -1 means treat key_data as a null-terminated string
+    Pl_RC4(char const* identifier, Pipeline* next,
+	   unsigned char const* key_data, int key_len = -1,
+	   int out_bufsize = def_bufsize);
+    virtual ~Pl_RC4();
+
+    virtual void write(unsigned char* data, int len);
+    virtual void finish();
+
+  private:
+    unsigned char* outbuf;
+    int out_bufsize;
+    RC4 rc4;
+};
+
+#endif // __PL_RC4_HH__
diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh
new file mode 100644
index 00000000..371be50e
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Array.hh
@@ -0,0 +1,24 @@
+
+#ifndef __QPDF_ARRAY_HH__
+#define __QPDF_ARRAY_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <vector>
+#include <qpdf/QPDFObjectHandle.hh>
+
+class QPDF_Array: public QPDFObject
+{
+  public:
+    QPDF_Array(std::vector<QPDFObjectHandle> const& items);
+    virtual ~QPDF_Array();
+    virtual std::string unparse();
+    int getNItems() const;
+    QPDFObjectHandle getItem(int n) const;
+    void setItem(int, QPDFObjectHandle const&);
+
+  private:
+    std::vector<QPDFObjectHandle> items;
+};
+
+#endif // __QPDF_ARRAY_HH__
diff --git a/libqpdf/qpdf/QPDF_Bool.hh b/libqpdf/qpdf/QPDF_Bool.hh
new file mode 100644
index 00000000..06aca822
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Bool.hh
@@ -0,0 +1,19 @@
+
+#ifndef __QPDF_BOOL_HH__
+#define __QPDF_BOOL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Bool: public QPDFObject
+{
+  public:
+    QPDF_Bool(bool val);
+    virtual ~QPDF_Bool();
+    virtual std::string unparse();
+    bool getVal() const;
+
+  private:
+    bool val;
+};
+
+#endif // __QPDF_BOOL_HH__
diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh
new file mode 100644
index 00000000..6a79fb69
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Dictionary.hh
@@ -0,0 +1,35 @@
+
+#ifndef __QPDF_DICTIONARY_HH__
+#define __QPDF_DICTIONARY_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <set>
+#include <map>
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+class QPDF_Dictionary: public QPDFObject
+{
+  public:
+    QPDF_Dictionary(std::map<std::string, QPDFObjectHandle> const& items);
+    virtual ~QPDF_Dictionary();
+    virtual std::string unparse();
+
+    // hasKey() and getKeys() treat keys with null values as if they
+    // aren't there.  getKey() returns null for the value of a
+    // non-existent key.  This is as per the PDF spec.
+    bool hasKey(std::string const&);
+    QPDFObjectHandle getKey(std::string const&);
+    std::set<std::string> getKeys();
+
+    // Repalce value of key, adding it if it does not exist
+    void replaceKey(std::string const& key, QPDFObjectHandle const&);
+    // Remove key, doing nothing if key does not exist
+    void removeKey(std::string const& key);
+
+  private:
+    std::map<std::string, QPDFObjectHandle> items;
+};
+
+#endif // __QPDF_DICTIONARY_HH__
diff --git a/libqpdf/qpdf/QPDF_Integer.hh b/libqpdf/qpdf/QPDF_Integer.hh
new file mode 100644
index 00000000..fb6360b2
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Integer.hh
@@ -0,0 +1,19 @@
+
+#ifndef __QPDF_INTEGER_HH__
+#define __QPDF_INTEGER_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Integer: public QPDFObject
+{
+  public:
+    QPDF_Integer(int val);
+    virtual ~QPDF_Integer();
+    virtual std::string unparse();
+    int getVal() const;
+
+  private:
+    int val;
+};
+
+#endif // __QPDF_INTEGER_HH__
diff --git a/libqpdf/qpdf/QPDF_Name.hh b/libqpdf/qpdf/QPDF_Name.hh
new file mode 100644
index 00000000..a32f6f4f
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Name.hh
@@ -0,0 +1,22 @@
+
+#ifndef __QPDF_NAME_HH__
+#define __QPDF_NAME_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Name: public QPDFObject
+{
+  public:
+    QPDF_Name(std::string const& name);
+    virtual ~QPDF_Name();
+    virtual std::string unparse();
+    std::string getName() const;
+
+    // Put # into strings with characters unsuitable for name token
+    static std::string normalizeName(std::string const& name);
+
+  private:
+    std::string name;
+};
+
+#endif // __QPDF_NAME_HH__
diff --git a/libqpdf/qpdf/QPDF_Null.hh b/libqpdf/qpdf/QPDF_Null.hh
new file mode 100644
index 00000000..60c1ae35
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Null.hh
@@ -0,0 +1,14 @@
+
+#ifndef __QPDF_NULL_HH__
+#define __QPDF_NULL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Null: public QPDFObject
+{
+  public:
+    virtual ~QPDF_Null();
+    std::string unparse();
+};
+
+#endif // __QPDF_NULL_HH__
diff --git a/libqpdf/qpdf/QPDF_Real.hh b/libqpdf/qpdf/QPDF_Real.hh
new file mode 100644
index 00000000..b950c569
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Real.hh
@@ -0,0 +1,20 @@
+
+#ifndef __QPDF_REAL_HH__
+#define __QPDF_REAL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Real: public QPDFObject
+{
+  public:
+    QPDF_Real(std::string const& val);
+    virtual ~QPDF_Real();
+    std::string unparse();
+    std::string getVal();
+
+  private:
+    // Store reals as strings to avoid roundoff errors.
+    std::string val;
+};
+
+#endif // __QPDF_REAL_HH__
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
new file mode 100644
index 00000000..71381fd3
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -0,0 +1,42 @@
+
+#ifndef __QPDF_STREAM_HH__
+#define __QPDF_STREAM_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+class Pipeline;
+class QPDF;
+
+class QPDF_Stream: public QPDFObject
+{
+  public:
+    QPDF_Stream(QPDF*, int objid, int generation,
+		QPDFObjectHandle stream_dict,
+		off_t offset, int length);
+    virtual ~QPDF_Stream();
+    virtual std::string unparse();
+    QPDFObjectHandle getDict() const;
+
+    // See comments in QPDFObjectHandle.hh
+    bool pipeStreamData(Pipeline*, bool filter,
+			bool normalize, bool compress);
+
+    // See comments in QPDFObjectHandle.hh
+    PointerHolder<Buffer> getStreamData();
+
+  private:
+    bool filterable(std::vector<std::string>& filters,
+		    int& predictor, int& columns, bool& early_code_change);
+
+
+    QPDF* qpdf;
+    int objid;
+    int generation;
+    QPDFObjectHandle stream_dict;
+    off_t offset;
+    int length;
+};
+
+#endif // __QPDF_STREAM_HH__
diff --git a/libqpdf/qpdf/QPDF_String.hh b/libqpdf/qpdf/QPDF_String.hh
new file mode 100644
index 00000000..f3063c50
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_String.hh
@@ -0,0 +1,23 @@
+
+#ifndef __QPDF_STRING_HH__
+#define __QPDF_STRING_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+// QPDF_Strings may included embedded null characters.
+
+class QPDF_String: public QPDFObject
+{
+  public:
+    QPDF_String(std::string const& val);
+    virtual ~QPDF_String();
+    virtual std::string unparse();
+    std::string unparse(bool force_binary);
+    std::string getVal() const;
+    std::string getUTF8Val() const;
+
+  private:
+    std::string val;
+};
+
+#endif // __QPDF_STRING_HH__
diff --git a/libqpdf/qpdf/RC4.hh b/libqpdf/qpdf/RC4.hh
new file mode 100644
index 00000000..657bf35b
--- /dev/null
+++ b/libqpdf/qpdf/RC4.hh
@@ -0,0 +1,26 @@
+
+#ifndef __RC4_HH__
+#define __RC4_HH__
+
+class RC4
+{
+  public:
+    // key_len of -1 means treat key_data as a null-terminated string
+    RC4(unsigned char const* key_data, int key_len = -1);
+
+    // out_data = 0 means to encrypt/decrypt in place
+    void process(unsigned char* in_data, int len, unsigned char* out_data = 0);
+
+  private:
+    class RC4Key
+    {
+      public:
+        unsigned char state[256];
+        unsigned char x;
+        unsigned char y;
+    };
+
+    RC4Key key;
+};
+
+#endif // __RC4_HH__