aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2008-04-29 14:55:25 +0200
committerJay Berkenbilt <ejb@ql.org>2008-04-29 14:55:25 +0200
commit9a0b88bf7777c153dc46ace22db74ef24d51583a (patch)
treef567ac1cf2bf5071a611eb49323a935b6ac938ff /libqpdf
downloadqpdf-9a0b88bf7777c153dc46ace22db74ef24d51583a.tar.zst
update release date to actual daterelease-qpdf-2.0
git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/BitStream.cc45
-rw-r--r--libqpdf/BitWriter.cc30
-rw-r--r--libqpdf/Buffer.cc79
-rw-r--r--libqpdf/MD5.cc441
-rw-r--r--libqpdf/Makefile1
-rw-r--r--libqpdf/PCRE.cc365
-rw-r--r--libqpdf/Pipeline.cc25
-rw-r--r--libqpdf/Pl_ASCII85Decoder.cc131
-rw-r--r--libqpdf/Pl_ASCIIHexDecoder.cc108
-rw-r--r--libqpdf/Pl_Buffer.cc67
-rw-r--r--libqpdf/Pl_Count.cc42
-rw-r--r--libqpdf/Pl_Discard.cc23
-rw-r--r--libqpdf/Pl_Flate.cc198
-rw-r--r--libqpdf/Pl_LZWDecoder.cc229
-rw-r--r--libqpdf/Pl_MD5.cc43
-rw-r--r--libqpdf/Pl_PNGFilter.cc146
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc179
-rw-r--r--libqpdf/Pl_RC4.cc57
-rw-r--r--libqpdf/Pl_StdioFile.cc48
-rw-r--r--libqpdf/QEXC.cc67
-rw-r--r--libqpdf/QPDF.cc1851
-rw-r--r--libqpdf/QPDFExc.cc20
-rw-r--r--libqpdf/QPDFObject.cc2
-rw-r--r--libqpdf/QPDFObjectHandle.cc637
-rw-r--r--libqpdf/QPDFTokenizer.cc458
-rw-r--r--libqpdf/QPDFWriter.cc2021
-rw-r--r--libqpdf/QPDFXRefEntry.cc61
-rw-r--r--libqpdf/QPDF_Array.cc51
-rw-r--r--libqpdf/QPDF_Bool.cc23
-rw-r--r--libqpdf/QPDF_Dictionary.cc84
-rw-r--r--libqpdf/QPDF_Integer.cc25
-rw-r--r--libqpdf/QPDF_Name.cc46
-rw-r--r--libqpdf/QPDF_Null.cc12
-rw-r--r--libqpdf/QPDF_Real.cc23
-rw-r--r--libqpdf/QPDF_Stream.cc309
-rw-r--r--libqpdf/QPDF_String.cc178
-rw-r--r--libqpdf/QPDF_encryption.cc441
-rw-r--r--libqpdf/QPDF_linearization.cc2103
-rw-r--r--libqpdf/QPDF_optimization.cc490
-rw-r--r--libqpdf/QTC.cc46
-rw-r--r--libqpdf/QUtil.cc198
-rw-r--r--libqpdf/RC4.cc56
-rw-r--r--libqpdf/bits.icc149
-rw-r--r--libqpdf/build.mk73
-rw-r--r--libqpdf/qpdf/BitStream.hh23
-rw-r--r--libqpdf/qpdf/BitWriter.hh24
-rw-r--r--libqpdf/qpdf/MD5.hh73
-rw-r--r--libqpdf/qpdf/PCRE.hh107
-rw-r--r--libqpdf/qpdf/Pl_ASCII85Decoder.hh23
-rw-r--r--libqpdf/qpdf/Pl_ASCIIHexDecoder.hh23
-rw-r--r--libqpdf/qpdf/Pl_LZWDecoder.hh40
-rw-r--r--libqpdf/qpdf/Pl_MD5.hh30
-rw-r--r--libqpdf/qpdf/Pl_PNGFilter.hh62
-rw-r--r--libqpdf/qpdf/Pl_QPDFTokenizer.hh40
-rw-r--r--libqpdf/qpdf/Pl_RC4.hh42
-rw-r--r--libqpdf/qpdf/QPDF_Array.hh24
-rw-r--r--libqpdf/qpdf/QPDF_Bool.hh19
-rw-r--r--libqpdf/qpdf/QPDF_Dictionary.hh35
-rw-r--r--libqpdf/qpdf/QPDF_Integer.hh19
-rw-r--r--libqpdf/qpdf/QPDF_Name.hh22
-rw-r--r--libqpdf/qpdf/QPDF_Null.hh14
-rw-r--r--libqpdf/qpdf/QPDF_Real.hh20
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh42
-rw-r--r--libqpdf/qpdf/QPDF_String.hh23
-rw-r--r--libqpdf/qpdf/RC4.hh26
65 files changed, 12412 insertions, 0 deletions
diff --git a/libqpdf/BitStream.cc b/libqpdf/BitStream.cc
new file mode 100644
index 00000000..c6fda4e6
--- /dev/null
+++ b/libqpdf/BitStream.cc
@@ -0,0 +1,45 @@
+
+
+#include <qpdf/BitStream.hh>
+
+// See comments in bits.cc
+#define BITS_READ 1
+#include "bits.icc"
+
+BitStream::BitStream(unsigned char const* p, int nbytes) :
+ start(p),
+ nbytes(nbytes)
+{
+ reset();
+}
+
+void
+BitStream::reset()
+{
+ p = start;
+ bit_offset = 7;
+ bits_available = 8 * nbytes;
+}
+
+unsigned long
+BitStream::getBits(int nbits)
+{
+ return read_bits(this->p, this->bit_offset,
+ this->bits_available, nbits);
+}
+
+void
+BitStream::skipToNextByte()
+{
+ if (bit_offset != 7)
+ {
+ unsigned int bits_to_skip = bit_offset + 1;
+ if (bits_available < bits_to_skip)
+ {
+ throw QEXC::Internal("overflow skipping to next byte in bitstream");
+ }
+ bit_offset = 7;
+ ++p;
+ bits_available -= bits_to_skip;
+ }
+}
diff --git a/libqpdf/BitWriter.cc b/libqpdf/BitWriter.cc
new file mode 100644
index 00000000..f682aac5
--- /dev/null
+++ b/libqpdf/BitWriter.cc
@@ -0,0 +1,30 @@
+
+
+#include <qpdf/BitWriter.hh>
+
+// See comments in bits.cc
+#define BITS_WRITE 1
+#include "bits.icc"
+
+BitWriter::BitWriter(Pipeline* pl) :
+ pl(pl),
+ ch(0),
+ bit_offset(7)
+{
+}
+
+void
+BitWriter::writeBits(unsigned long val, int bits)
+{
+ write_bits(this->ch, this->bit_offset, val, bits, this->pl);
+}
+
+void
+BitWriter::flush()
+{
+ if (bit_offset < 7)
+ {
+ int bits_to_write = bit_offset + 1;
+ write_bits(this->ch, this->bit_offset, 0, bits_to_write, this->pl);
+ }
+}
diff --git a/libqpdf/Buffer.cc b/libqpdf/Buffer.cc
new file mode 100644
index 00000000..3dde1f90
--- /dev/null
+++ b/libqpdf/Buffer.cc
@@ -0,0 +1,79 @@
+
+#include <qpdf/Buffer.hh>
+
+#include <string.h>
+
+Buffer::Buffer()
+{
+ init(0);
+}
+
+Buffer::Buffer(unsigned long size)
+{
+ init(size);
+}
+
+Buffer::Buffer(Buffer const& rhs)
+{
+ init(0);
+ copy(rhs);
+}
+
+Buffer&
+Buffer::operator=(Buffer const& rhs)
+{
+ copy(rhs);
+ return *this;
+}
+
+Buffer::~Buffer()
+{
+ destroy();
+}
+
+void
+Buffer::init(unsigned long size)
+{
+ this->size = size;
+ this->buf = (size ? new unsigned char[size] : 0);
+}
+
+void
+Buffer::copy(Buffer const& rhs)
+{
+ if (this != &rhs)
+ {
+ this->destroy();
+ this->init(rhs.size);
+ if (this->size)
+ {
+ memcpy(this->buf, rhs.buf, this->size);
+ }
+ }
+}
+
+void
+Buffer::destroy()
+{
+ delete [] this->buf;
+ this->size = 0;
+ this->buf = 0;
+}
+
+unsigned long
+Buffer::getSize() const
+{
+ return this->size;
+}
+
+unsigned char const*
+Buffer::getBuffer() const
+{
+ return this->buf;
+}
+
+unsigned char*
+Buffer::getBuffer()
+{
+ return this->buf;
+}
diff --git a/libqpdf/MD5.cc b/libqpdf/MD5.cc
new file mode 100644
index 00000000..ecdd8a33
--- /dev/null
+++ b/libqpdf/MD5.cc
@@ -0,0 +1,441 @@
+// This file implements a class for computation of MD5 checksums.
+// It is derived from the reference algorithm for MD5 as given in
+// RFC 1321. The original copyright notice is as follows:
+//
+/////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+// rights reserved.
+//
+// License to copy and use this software is granted provided that it
+// is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+// Algorithm" in all material mentioning or referencing this software
+// or this function.
+//
+// License is also granted to make and use derivative works provided
+// that such works are identified as "derived from the RSA Data
+// Security, Inc. MD5 Message-Digest Algorithm" in all material
+// mentioning or referencing the derived work.
+//
+// RSA Data Security, Inc. makes no representations concerning either
+// the merchantability of this software or the suitability of this
+// software for any particular purpose. It is provided "as is"
+// without express or implied warranty of any kind.
+//
+// These notices must be retained in any copies of any part of this
+// documentation and/or software.
+//
+/////////////////////////////////////////////////////////////////////////
+
+#include <qpdf/MD5.hh>
+
+#include <stdio.h>
+#include <memory.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+int const S11 = 7;
+int const S12 = 12;
+int const S13 = 17;
+int const S14 = 22;
+int const S21 = 5;
+int const S22 = 9;
+int const S23 = 14;
+int const S24 = 20;
+int const S31 = 4;
+int const S32 = 11;
+int const S33 = 16;
+int const S34 = 23;
+int const S41 = 6;
+int const S42 = 10;
+int const S43 = 15;
+int const S44 = 21;
+
+static unsigned char PADDING[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+// F, G, H and I are basic MD5 functions.
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+// ROTATE_LEFT rotates x left n bits.
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+// Rotation is separate from addition to prevent recomputation.
+#define FF(a, b, c, d, x, s, ac) { \
+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define GG(a, b, c, d, x, s, ac) { \
+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define HH(a, b, c, d, x, s, ac) { \
+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define II(a, b, c, d, x, s, ac) { \
+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+
+// MD5 initialization. Begins an MD5 operation, writing a new context.
+void MD5::init()
+{
+ count[0] = count[1] = 0;
+ // Load magic initialization constants.
+ state[0] = 0x67452301;
+ state[1] = 0xefcdab89;
+ state[2] = 0x98badcfe;
+ state[3] = 0x10325476;
+
+ finalized = false;
+ memset(digest_val, 0, sizeof(digest_val));
+}
+
+// MD5 block update operation. Continues an MD5 message-digest
+// operation, processing another message block, and updating the
+// context.
+
+void MD5::update(unsigned char *input,
+ unsigned int inputLen)
+{
+ unsigned int i, index, partLen;
+
+ // Compute number of bytes mod 64
+ index = (unsigned int)((count[0] >> 3) & 0x3F);
+
+ // Update number of bits
+ if ((count[0] += ((UINT4)inputLen << 3))
+ < ((UINT4)inputLen << 3))
+ count[1]++;
+ count[1] += ((UINT4)inputLen >> 29);
+
+ partLen = 64 - index;
+
+ // Transform as many times as possible.
+
+ if (inputLen >= partLen) {
+ memcpy
+ ((POINTER)&buffer[index], (POINTER)input, partLen);
+ transform(state, buffer);
+
+ for (i = partLen; i + 63 < inputLen; i += 64)
+ transform(state, &input[i]);
+
+ index = 0;
+ }
+ else
+ i = 0;
+
+ // Buffer remaining input
+ memcpy
+ ((POINTER)&buffer[index], (POINTER)&input[i],
+ inputLen-i);
+}
+
+// MD5 finalization. Ends an MD5 message-digest operation, writing the
+// the message digest and zeroizing the context.
+void MD5::final()
+{
+ if (finalized)
+ {
+ return;
+ }
+
+ unsigned char bits[8];
+ unsigned int index, padLen;
+
+ // Save number of bits
+ encode(bits, count, 8);
+
+ // Pad out to 56 mod 64.
+
+ index = (unsigned int)((count[0] >> 3) & 0x3f);
+ padLen = (index < 56) ? (56 - index) : (120 - index);
+ update(PADDING, padLen);
+
+ // Append length (before padding)
+ update(bits, 8);
+ // Store state in digest_val
+ encode(digest_val, state, 16);
+
+ // Zeroize sensitive information.
+ memset(state, 0, sizeof(state));
+ memset(count, 0, sizeof(count));
+ memset(buffer, 0, sizeof(buffer));
+
+ finalized = true;
+}
+
+// MD5 basic transformation. Transforms state based on block.
+void MD5::transform(UINT4 state[4], unsigned char block[64])
+{
+ UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+ decode(x, block, 64);
+
+ // Round 1
+ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); // 1
+ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); // 2
+ FF (c, d, a, b, x[ 2], S13, 0x242070db); // 3
+ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); // 4
+ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); // 5
+ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); // 6
+ FF (c, d, a, b, x[ 6], S13, 0xa8304613); // 7
+ FF (b, c, d, a, x[ 7], S14, 0xfd469501); // 8
+ FF (a, b, c, d, x[ 8], S11, 0x698098d8); // 9
+ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); // 10
+ FF (c, d, a, b, x[10], S13, 0xffff5bb1); // 11
+ FF (b, c, d, a, x[11], S14, 0x895cd7be); // 12
+ FF (a, b, c, d, x[12], S11, 0x6b901122); // 13
+ FF (d, a, b, c, x[13], S12, 0xfd987193); // 14
+ FF (c, d, a, b, x[14], S13, 0xa679438e); // 15
+ FF (b, c, d, a, x[15], S14, 0x49b40821); // 16
+
+ // Round 2
+ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); // 17
+ GG (d, a, b, c, x[ 6], S22, 0xc040b340); // 18
+ GG (c, d, a, b, x[11], S23, 0x265e5a51); // 19
+ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); // 20
+ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); // 21
+ GG (d, a, b, c, x[10], S22, 0x2441453); // 22
+ GG (c, d, a, b, x[15], S23, 0xd8a1e681); // 23
+ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); // 24
+ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); // 25
+ GG (d, a, b, c, x[14], S22, 0xc33707d6); // 26
+ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); // 27
+ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); // 28
+ GG (a, b, c, d, x[13], S21, 0xa9e3e905); // 29
+ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); // 30
+ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); // 31
+ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); // 32
+
+ // Round 3
+ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); // 33
+ HH (d, a, b, c, x[ 8], S32, 0x8771f681); // 34
+ HH (c, d, a, b, x[11], S33, 0x6d9d6122); // 35
+ HH (b, c, d, a, x[14], S34, 0xfde5380c); // 36
+ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); // 37
+ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); // 38
+ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); // 39
+ HH (b, c, d, a, x[10], S34, 0xbebfbc70); // 40
+ HH (a, b, c, d, x[13], S31, 0x289b7ec6); // 41
+ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); // 42
+ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); // 43
+ HH (b, c, d, a, x[ 6], S34, 0x4881d05); // 44
+ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); // 45
+ HH (d, a, b, c, x[12], S32, 0xe6db99e5); // 46
+ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); // 47
+ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); // 48
+
+ // Round 4
+ II (a, b, c, d, x[ 0], S41, 0xf4292244); // 49
+ II (d, a, b, c, x[ 7], S42, 0x432aff97); // 50
+ II (c, d, a, b, x[14], S43, 0xab9423a7); // 51
+ II (b, c, d, a, x[ 5], S44, 0xfc93a039); // 52
+ II (a, b, c, d, x[12], S41, 0x655b59c3); // 53
+ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); // 54
+ II (c, d, a, b, x[10], S43, 0xffeff47d); // 55
+ II (b, c, d, a, x[ 1], S44, 0x85845dd1); // 56
+ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); // 57
+ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); // 58
+ II (c, d, a, b, x[ 6], S43, 0xa3014314); // 59
+ II (b, c, d, a, x[13], S44, 0x4e0811a1); // 60
+ II (a, b, c, d, x[ 4], S41, 0xf7537e82); // 61
+ II (d, a, b, c, x[11], S42, 0xbd3af235); // 62
+ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); // 63
+ II (b, c, d, a, x[ 9], S44, 0xeb86d391); // 64
+
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+
+ // Zeroize sensitive information.
+
+ memset ((POINTER)x, 0, sizeof (x));
+}
+
+// Encodes input (UINT4) into output (unsigned char). Assumes len is a
+// multiple of 4.
+void MD5::encode(unsigned char *output, UINT4 *input, unsigned int len)
+{
+ unsigned int i, j;
+
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (unsigned char)(input[i] & 0xff);
+ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
+ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
+ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+ }
+}
+
+// Decodes input (unsigned char) into output (UINT4). Assumes len is a
+// multiple of 4.
+void MD5::decode(UINT4 *output, unsigned char *input, unsigned int len)
+{
+ unsigned int i, j;
+
+ for (i = 0, j = 0; j < len; i++, j += 4)
+ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
+ (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
+}
+
+// Public functions
+
+MD5::MD5()
+{
+ init();
+}
+
+void MD5::reset()
+{
+ init();
+}
+
+void MD5::encodeString(char const* str)
+{
+ unsigned int len = strlen(str);
+
+ update((unsigned char *)str, len);
+ final();
+}
+
+void MD5::appendString(char const* input_string)
+{
+ update((unsigned char *)input_string, strlen(input_string));
+}
+
+void MD5::encodeDataIncrementally(char const* data, int len)
+{
+ update((unsigned char *)data, len);
+}
+
+void MD5::encodeFile(char const *filename, int up_to_size)
+ throw (QEXC::System)
+{
+ FILE *file;
+ unsigned char buffer[1024];
+
+ if ((file = fopen (filename, "rb")) == NULL)
+ {
+ throw QEXC::System(std::string("MD5: can't open ") + filename, errno);
+ }
+
+ int len;
+ int so_far = 0;
+ int to_try = 1024;
+ do
+ {
+ if ((up_to_size >= 0) && ((so_far + to_try) > up_to_size))
+ {
+ to_try = up_to_size - so_far;
+ }
+ len = fread(buffer, 1, to_try, file);
+ if (len > 0)
+ {
+ update(buffer, len);
+ so_far += len;
+ if ((up_to_size >= 0) && (so_far >= up_to_size))
+ {
+ break;
+ }
+ }
+ } while (len > 0);
+ if (ferror(file))
+ {
+ // Assume, perhaps incorrectly, that errno was set by the
+ // underlying call to read....
+ (void) fclose(file);
+ throw QEXC::System(std::string("MD5: read error on ") + filename, errno);
+ }
+ (void) fclose(file);
+
+ final();
+}
+
+void MD5::digest(Digest result)
+{
+ final();
+ memcpy(result, digest_val, sizeof(digest_val));
+}
+
+void MD5::print()
+{
+ final();
+
+ unsigned int i;
+ for (i = 0; i < 16; ++i)
+ {
+ printf("%02x", digest_val[i]);
+ }
+ printf("\n");
+}
+
+std::string MD5::unparse()
+{
+ final();
+
+ char result[33];
+ char* p = result;
+ unsigned int i;
+ for (i = 0; i < 16; ++i)
+ {
+ sprintf(p, "%02x", digest_val[i]);
+ p += 2;
+ }
+ return result;
+}
+
+std::string
+MD5::getDataChecksum(char const* buf, int len)
+{
+ MD5 m;
+ m.encodeDataIncrementally(buf, len);
+ return m.unparse();
+}
+
+std::string
+MD5::getFileChecksum(char const* filename, int up_to_size)
+{
+ MD5 m;
+ m.encodeFile(filename, up_to_size);
+ return m.unparse();
+}
+
+bool
+MD5::checkDataChecksum(char const* const checksum,
+ char const* buf, int len)
+{
+ std::string actual_checksum = getDataChecksum(buf, len);
+ return (checksum == actual_checksum);
+}
+
+bool
+MD5::checkFileChecksum(char const* const checksum,
+ char const* filename, int up_to_size)
+{
+ bool result = false;
+ try
+ {
+ std::string actual_checksum = getFileChecksum(filename, up_to_size);
+ result = (checksum == actual_checksum);
+ }
+ catch (QEXC::System)
+ {
+ // Ignore -- return false
+ }
+ return result;
+}
diff --git a/libqpdf/Makefile b/libqpdf/Makefile
new file mode 100644
index 00000000..90899055
--- /dev/null
+++ b/libqpdf/Makefile
@@ -0,0 +1 @@
+include ../make/proxy.mk
diff --git a/libqpdf/PCRE.cc b/libqpdf/PCRE.cc
new file mode 100644
index 00000000..afa6e954
--- /dev/null
+++ b/libqpdf/PCRE.cc
@@ -0,0 +1,365 @@
+
+
+#include <qpdf/PCRE.hh>
+#include <qpdf/QUtil.hh>
+
+#include <iostream>
+
+PCRE::Exception::Exception(std::string const& message)
+{
+ this->setMessage("PCRE error: " + message);
+}
+
+PCRE::NoBackref::NoBackref() :
+ Exception("no match")
+{
+}
+
+PCRE::Match::Match(int nbackrefs, char const* subject)
+{
+ this->init(-1, nbackrefs, subject);
+}
+
+PCRE::Match::~Match()
+{
+ this->destroy();
+}
+
+PCRE::Match::Match(Match const& rhs)
+{
+ this->copy(rhs);
+}
+
+PCRE::Match&
+PCRE::Match::operator=(Match const& rhs)
+{
+ if (this != &rhs)
+ {
+ this->destroy();
+ this->copy(rhs);
+ }
+ return *this;
+}
+
+void
+PCRE::Match::init(int nmatches, int nbackrefs, char const* subject)
+{
+ this->nmatches = nmatches;
+ this->nbackrefs = nbackrefs;
+ this->subject = subject;
+ this->ovecsize = 3 * (1 + nbackrefs);
+ this->ovector = 0;
+ if (this->ovecsize)
+ {
+ this->ovector = new int[this->ovecsize];
+ }
+}
+
+void
+PCRE::Match::copy(Match const& rhs)
+{
+ this->init(rhs.nmatches, rhs.nbackrefs, rhs.subject);
+ int i;
+ for (i = 0; i < this->ovecsize; ++i)
+ {
+ this->ovector[i] = rhs.ovector[i];
+ }
+}
+
+void
+PCRE::Match::destroy()
+{
+ delete [] this->ovector;
+}
+
+PCRE::Match::operator bool()
+{
+ return (this->nmatches >= 0);
+}
+
+
+std::string
+PCRE::Match::getMatch(int n, int flags)
+ throw(QEXC::General, Exception)
+{
+ // This method used to be implemented in terms of
+ // pcre_get_substring, but that function gives you an empty string
+ // for an unmatched backreference that is in range.
+
+ int offset;
+ int length;
+ try
+ {
+ getOffsetLength(n, offset, length);
+ }
+ catch (NoBackref&)
+ {
+ if (flags & gm_no_substring_returns_empty)
+ {
+ return "";
+ }
+ else
+ {
+ throw;
+ }
+ }
+
+ return std::string(this->subject).substr(offset, length);
+}
+
+void
+PCRE::Match::getOffsetLength(int n, int& offset, int& length) throw(Exception)
+{
+ if ((this->nmatches < 0) ||
+ (n > this->nmatches - 1) ||
+ (this->ovector[n * 2] == -1))
+ {
+ throw NoBackref();
+ }
+ offset = this->ovector[n * 2];
+ length = this->ovector[n * 2 + 1] - offset;
+}
+
+
+int
+PCRE::Match::getOffset(int n) throw(Exception)
+{
+ int offset;
+ int length;
+ this->getOffsetLength(n, offset, length);
+ return offset;
+}
+
+
+int
+PCRE::Match::getLength(int n) throw(Exception)
+{
+ int offset;
+ int length;
+ this->getOffsetLength(n, offset, length);
+ return length;
+}
+
+
+int
+PCRE::Match::nMatches() const
+{
+ return this->nmatches;
+}
+
+PCRE::PCRE(char const* pattern, int options) throw (Exception)
+{
+ char const *errptr;
+ int erroffset;
+ this->code = pcre_compile(pattern, options, &errptr, &erroffset, 0);
+ if (this->code)
+ {
+ this->nbackrefs = pcre_info(this->code, 0, 0);
+ }
+ else
+ {
+ std::string message = (std::string("compilation of ") + pattern +
+ " failed at offset " +
+ QUtil::int_to_string(erroffset) + ": " +
+ errptr);
+ throw Exception(message);
+ }
+}
+
+PCRE::~PCRE()
+{
+ pcre_free(this->code);
+}
+
+PCRE::Match
+PCRE::match(char const* subject, int options, int startoffset, int size)
+ throw (QEXC::General, Exception)
+{
+ if (size == -1)
+ {
+ size = strlen(subject);
+ }
+
+ Match result(this->nbackrefs, subject);
+ int status = pcre_exec(this->code, 0, subject, size,
+ startoffset, options,
+ result.ovector, result.ovecsize);
+ if (status >= 0)
+ {
+ result.nmatches = status;
+ }
+ else
+ {
+ std::string message;
+
+ switch (status)
+ {
+ case PCRE_ERROR_NOMATCH:
+ break;
+
+ case PCRE_ERROR_BADOPTION:
+ message = "bad option passed to PCRE::match()";
+ throw Exception(message);
+ break;
+
+ case PCRE_ERROR_NOMEMORY:
+ message = "insufficient memory";
+ throw Exception(message);
+ break;
+
+ case PCRE_ERROR_NULL:
+ case PCRE_ERROR_BADMAGIC:
+ case PCRE_ERROR_UNKNOWN_NODE:
+ default:
+ message = "pcre_exec returned " + QUtil::int_to_string(status);
+ throw QEXC::Internal(message);
+ }
+ }
+
+ return result;
+}
+
+void
+PCRE::test(int n)
+{
+ try
+ {
+ if (n == 1)
+ {
+ static char const* utf8 = "abπdefq";
+ PCRE u1("^([[:alpha:]]+)");
+ PCRE u2("^([\\p{L}]+)", PCRE_UTF8);
+ PCRE::Match m1 = u1.match(utf8);
+ if (m1)
+ {
+ std::cout << "no utf8: " << m1.getMatch(1) << std::endl;
+ }
+ PCRE::Match m2 = u2.match(utf8);
+ if (m2)
+ {
+ std::cout << "utf8: " << m2.getMatch(1) << std::endl;
+ }
+ return;
+ }
+
+ try
+ {
+ PCRE pcre1("a**");
+ }
+ catch (Exception& e)
+ {
+ std::cout << e.unparse() << std::endl;
+ }
+
+ PCRE pcre2("^([^\\s:]*)\\s*:\\s*(.*?)\\s*$");
+ PCRE::Match m2 = pcre2.match("key: value one two three ");
+ if (m2)
+ {
+ std::cout << m2.nMatches() << std::endl;
+ std::cout << m2.getMatch(0) << std::endl;
+ std::cout << m2.getOffset(0) << std::endl;
+ std::cout << m2.getLength(0) << std::endl;
+ std::cout << m2.getMatch(1) << std::endl;
+ std::cout << m2.getOffset(1) << std::endl;
+ std::cout << m2.getLength(1) << std::endl;
+ std::cout << m2.getMatch(2) << std::endl;
+ std::cout << m2.getOffset(2) << std::endl;
+ std::cout << m2.getLength(2) << std::endl;
+ try
+ {
+ std::cout << m2.getMatch(3) << std::endl;
+ }
+ catch (Exception& e)
+ {
+ std::cout << e.unparse() << std::endl;
+ }
+ try
+ {
+ std::cout << m2.getOffset(3) << std::endl;
+ }
+ catch (Exception& e)
+ {
+ std::cout << e.unparse() << std::endl;
+ }
+ }
+ PCRE pcre3("^(a+)(b+)?$");
+ PCRE::Match m3 = pcre3.match("aaa");
+ try
+ {
+ if (m3)
+ {
+ std::cout << m3.nMatches() << std::endl;
+ std::cout << m3.getMatch(0) << std::endl;
+ std::cout << m3.getMatch(1) << std::endl;
+ std::cout << "-"
+ << m3.getMatch(
+ 2, Match::gm_no_substring_returns_empty)
+ << "-" << std::endl;
+ std::cout << "hello" << std::endl;
+ std::cout << m3.getMatch(2) << std::endl;
+ std::cout << "can't see this" << std::endl;
+ }
+ }
+ catch (Exception& e)
+ {
+ std::cout << e.unparse() << std::endl;
+ }
+
+ // backref: 1 2 3 4 5
+ PCRE pcre4("^((?:(a(b)?)(?:,(c))?)|(c))?$");
+ static char const* candidates[] = {
+ "qqqcqqq", // no match
+ "ab,c", // backrefs: 0, 1, 2, 3, 4
+ "ab", // backrefs: 0, 1, 2, 3
+ "a", // backrefs: 0, 1, 2
+ "a,c", // backrefs: 0, 1, 2, 4
+ "c", // backrefs: 0, 1, 5
+ "", // backrefs: 0
+ 0
+ };
+ for (char const** p = candidates; *p; ++p)
+ {
+ PCRE::Match m(pcre4.match(*p));
+ if (m)
+ {
+ int nmatches = m.nMatches();
+ for (int i = 0; i < nmatches; ++i)
+ {
+ std::cout << *p << ": " << i << ": ";
+ try
+ {
+ std::string match = m.getMatch(i);
+ std::cout << match;
+ }
+ catch (NoBackref&)
+ {
+ std::cout << "no backref (getMatch)";
+ }
+ std::cout << std::endl;
+
+ std::cout << *p << ": " << i << ": ";
+ try
+ {
+ int offset;
+ int length;
+ m.getOffsetLength(i, offset, length);
+ std::cout << offset << ", " << length;
+ }
+ catch (NoBackref&)
+ {
+ std::cout << "no backref (getOffsetLength)";
+ }
+ std:: cout << std::endl;
+ }
+ }
+ else
+ {
+ std::cout << *p << ": no match" << std::endl;
+ }
+ }
+ }
+ catch (QEXC::General& e)
+ {
+ std::cout << "unexpected exception: " << e.unparse() << std::endl;
+ }
+}
diff --git a/libqpdf/Pipeline.cc b/libqpdf/Pipeline.cc
new file mode 100644
index 00000000..17c0c8b2
--- /dev/null
+++ b/libqpdf/Pipeline.cc
@@ -0,0 +1,25 @@
+
+
+#include <qpdf/Pipeline.hh>
+
+Pipeline::Pipeline(char const* identifier, Pipeline* next) :
+ identifier(identifier),
+ next(next)
+{
+}
+
+Pipeline::~Pipeline()
+{
+}
+
+Pipeline*
+Pipeline::getNext(bool allow_null)
+{
+ if ((next == 0) && (! allow_null))
+ {
+ throw Exception(
+ this->identifier +
+ ": Pipeline::getNext() called on pipeline with no next");
+ }
+ return this->next;
+}
diff --git a/libqpdf/Pl_ASCII85Decoder.cc b/libqpdf/Pl_ASCII85Decoder.cc
new file mode 100644
index 00000000..4ecdaf41
--- /dev/null
+++ b/libqpdf/Pl_ASCII85Decoder.cc
@@ -0,0 +1,131 @@
+#include <qpdf/Pl_ASCII85Decoder.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+
+Pl_ASCII85Decoder::Pl_ASCII85Decoder(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ pos(0),
+ eod(0)
+{
+ memset(this->inbuf, 117, 5);
+}
+
+Pl_ASCII85Decoder::~Pl_ASCII85Decoder()
+{
+}
+
+void
+Pl_ASCII85Decoder::write(unsigned char* buf, int len)
+{
+ if (eod > 1)
+ {
+ return;
+ }
+ for (int i = 0; i < len; ++i)
+ {
+ if (eod > 1)
+ {
+ break;
+ }
+ else if (eod == 1)
+ {
+ if (buf[i] == '>')
+ {
+ flush();
+ eod = 2;
+ }
+ else
+ {
+ throw QEXC::General(
+ "broken end-of-data sequence in base 85 data");
+ }
+ }
+ else
+ {
+ switch (buf[i])
+ {
+ case ' ':
+ case '\f':
+ case '\v':
+ case '\t':
+ case '\r':
+ case '\n':
+ QTC::TC("libtests", "Pl_ASCII85Decoder ignore space");
+ // ignore whitespace
+ break;
+
+ case '~':
+ eod = 1;
+ break;
+
+ case 'z':
+ if (pos != 0)
+ {
+ throw QEXC::General(
+ "unexpected z during base 85 decode");
+ }
+ else
+ {
+ QTC::TC("libtests", "Pl_ASCII85Decoder read z");
+ getNext()->write((unsigned char*)"\000\000\000\000", 4);
+ }
+ break;
+
+ default:
+ if ((buf[i] < 33) || (buf[i] > 117))
+ {
+ throw QEXC::General
+ ("character out of range during base 85 decode");
+ }
+ else
+ {
+ this->inbuf[this->pos++] = buf[i];
+ if (pos == 5)
+ {
+ flush();
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+void
+Pl_ASCII85Decoder::flush()
+{
+ if (this->pos == 0)
+ {
+ QTC::TC("libtests", "Pl_ASCII85Decoder no-op flush");
+ return;
+ }
+ unsigned long lval = 0;
+ for (int i = 0; i < 5; ++i)
+ {
+ lval *= 85;
+ lval += (this->inbuf[i] - 33);
+ }
+
+ unsigned char outbuf[4];
+ memset(outbuf, 0, 4);
+ for (int i = 3; i >= 0; --i)
+ {
+ outbuf[i] = lval & 0xff;
+ lval >>= 8;
+ }
+
+ QTC::TC("libtests", "Pl_ASCII85Decoder partial flush",
+ (this->pos == 5) ? 0 : 1);
+ getNext()->write(outbuf, this->pos - 1);
+
+ this->pos = 0;
+ memset(this->inbuf, 117, 5);
+}
+
+void
+Pl_ASCII85Decoder::finish()
+{
+ flush();
+ getNext()->finish();
+}
diff --git a/libqpdf/Pl_ASCIIHexDecoder.cc b/libqpdf/Pl_ASCIIHexDecoder.cc
new file mode 100644
index 00000000..d1b4ef1c
--- /dev/null
+++ b/libqpdf/Pl_ASCIIHexDecoder.cc
@@ -0,0 +1,108 @@
+#include <qpdf/Pl_ASCIIHexDecoder.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+#include <ctype.h>
+
+Pl_ASCIIHexDecoder::Pl_ASCIIHexDecoder(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ pos(0),
+ eod(false)
+{
+ strcpy(this->inbuf, "00");
+}
+
+Pl_ASCIIHexDecoder::~Pl_ASCIIHexDecoder()
+{
+}
+
+void
+Pl_ASCIIHexDecoder::write(unsigned char* buf, int len)
+{
+ if (this->eod)
+ {
+ return;
+ }
+ for (int i = 0; i < len; ++i)
+ {
+ char ch = toupper(buf[i]);
+ switch (ch)
+ {
+ case ' ':
+ case '\f':
+ case '\v':
+ case '\t':
+ case '\r':
+ case '\n':
+ QTC::TC("libtests", "Pl_ASCIIHexDecoder ignore space");
+ // ignore whitespace
+ break;
+
+ case '>':
+ this->eod = true;
+ flush();
+ break;
+
+ default:
+ if (((ch >= '0') && (ch <= '9')) ||
+ ((ch >= 'A') && (ch <= 'F')))
+ {
+ this->inbuf[this->pos++] = ch;
+ if (this->pos == 2)
+ {
+ flush();
+ }
+ }
+ else
+ {
+ char t[2];
+ t[0] = ch;
+ t[1] = 0;
+ throw QEXC::General(
+ std::string("character out of range during base Hex decode: ") + t);
+ }
+ break;
+ }
+ if (this->eod)
+ {
+ break;
+ }
+ }
+}
+
+void
+Pl_ASCIIHexDecoder::flush()
+{
+ if (this->pos == 0)
+ {
+ QTC::TC("libtests", "Pl_ASCIIHexDecoder no-op flush");
+ return;
+ }
+ int b[2];
+ for (int i = 0; i < 2; ++i)
+ {
+ if (this->inbuf[i] >= 'A')
+ {
+ b[i] = this->inbuf[i] - 'A' + 10;
+ }
+ else
+ {
+ b[i] = this->inbuf[i] - '0';
+ }
+ }
+ unsigned char ch = (unsigned char)((b[0] << 4) + b[1]);
+
+ QTC::TC("libtests", "Pl_ASCIIHexDecoder partial flush",
+ (this->pos == 2) ? 0 : 1);
+ getNext()->write(&ch, 1);
+
+ this->pos = 0;
+ strcpy(this->inbuf, "00");
+}
+
+void
+Pl_ASCIIHexDecoder::finish()
+{
+ flush();
+ getNext()->finish();
+}
diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc
new file mode 100644
index 00000000..185cf636
--- /dev/null
+++ b/libqpdf/Pl_Buffer.cc
@@ -0,0 +1,67 @@
+
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/QEXC.hh>
+#include <assert.h>
+
+Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ ready(false),
+ total_size(0)
+{
+}
+
+Pl_Buffer::~Pl_Buffer()
+{
+}
+
+void
+Pl_Buffer::write(unsigned char* buf, int len)
+{
+ Buffer* b = new Buffer(len);
+ memcpy(b->getBuffer(), buf, len);
+ this->data.push_back(b);
+ this->ready = false;
+ this->total_size += len;
+
+ if (getNext(true))
+ {
+ getNext()->write(buf, len);
+ }
+}
+
+void
+Pl_Buffer::finish()
+{
+ this->ready = true;
+ if (getNext(true))
+ {
+ getNext()->finish();
+ }
+}
+
+Buffer*
+Pl_Buffer::getBuffer()
+{
+ if (! this->ready)
+ {
+ throw QEXC::Internal("Pl_Buffer::getBuffer() called when not ready");
+ }
+
+ Buffer* b = new Buffer(this->total_size);
+ unsigned char* p = b->getBuffer();
+ while (! this->data.empty())
+ {
+ PointerHolder<Buffer> bph = this->data.front();
+ this->data.pop_front();
+ Buffer* bp = bph.getPointer();
+ size_t bytes = bp->getSize();
+ memcpy(p, bp->getBuffer(), bytes);
+ p += bytes;
+ this->total_size -= bytes;
+ }
+
+ assert(this->total_size == 0);
+ this->ready = false;
+
+ return b;
+}
diff --git a/libqpdf/Pl_Count.cc b/libqpdf/Pl_Count.cc
new file mode 100644
index 00000000..8a361ad5
--- /dev/null
+++ b/libqpdf/Pl_Count.cc
@@ -0,0 +1,42 @@
+
+#include <qpdf/Pl_Count.hh>
+
+Pl_Count::Pl_Count(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ count(0),
+ last_char('\0')
+{
+}
+
+Pl_Count::~Pl_Count()
+{
+}
+
+void
+Pl_Count::write(unsigned char* buf, int len)
+{
+ if (len)
+ {
+ this->count += len;
+ getNext()->write(buf, len);
+ this->last_char = buf[len - 1];
+ }
+}
+
+void
+Pl_Count::finish()
+{
+ getNext()->finish();
+}
+
+int
+Pl_Count::getCount() const
+{
+ return this->count;
+}
+
+unsigned char
+Pl_Count::getLastChar() const
+{
+ return this->last_char;
+}
diff --git a/libqpdf/Pl_Discard.cc b/libqpdf/Pl_Discard.cc
new file mode 100644
index 00000000..1632ea23
--- /dev/null
+++ b/libqpdf/Pl_Discard.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/Pl_Discard.hh>
+
+// Exercised in md5 test suite
+
+Pl_Discard::Pl_Discard() :
+ Pipeline("discard", 0)
+{
+}
+
+Pl_Discard::~Pl_Discard()
+{
+}
+
+void
+Pl_Discard::write(unsigned char* buf, int len)
+{
+}
+
+void
+Pl_Discard::finish()
+{
+}
diff --git a/libqpdf/Pl_Flate.cc b/libqpdf/Pl_Flate.cc
new file mode 100644
index 00000000..ba60c472
--- /dev/null
+++ b/libqpdf/Pl_Flate.cc
@@ -0,0 +1,198 @@
+
+#include <qpdf/Pl_Flate.hh>
+
+#include <qpdf/QUtil.hh>
+
+Pl_Flate::Pl_Flate(char const* identifier, Pipeline* next,
+ action_e action, int out_bufsize) :
+ Pipeline(identifier, next),
+ out_bufsize(out_bufsize),
+ action(action),
+ initialized(false)
+{
+ this->outbuf = new unsigned char[out_bufsize];
+
+ zstream.zalloc = (alloc_func)0;
+ zstream.zfree = (free_func)0;
+ zstream.opaque = (voidpf)0;
+ zstream.next_in = 0;
+ zstream.avail_in = 0;
+ zstream.next_out = this->outbuf;
+ zstream.avail_out = out_bufsize;
+}
+
+Pl_Flate::~Pl_Flate()
+{
+ if (this->outbuf)
+ {
+ delete [] this->outbuf;
+ this->outbuf = 0;
+ }
+}
+
+void
+Pl_Flate::write(unsigned char* data, int len)
+{
+ if (this->outbuf == 0)
+ {
+ throw Exception(
+ this->identifier +
+ ": Pl_Flate: write() called after finish() called");
+ }
+ handleData(data, len, Z_NO_FLUSH);
+}
+
+void
+Pl_Flate::handleData(unsigned char* data, int len, int flush)
+{
+ this->zstream.next_in = data;
+ this->zstream.avail_in = len;
+
+ if (! this->initialized)
+ {
+ int err = Z_OK;
+ if (this->action == a_deflate)
+ {
+ err = deflateInit(&this->zstream, Z_DEFAULT_COMPRESSION);
+ }
+ else
+ {
+ err = inflateInit(&this->zstream);
+ }
+ checkError("Init", err);
+ this->initialized = true;
+ }
+
+ int err = Z_OK;
+
+ bool done = false;
+ while (! done)
+ {
+ if (action == a_deflate)
+ {
+ err = deflate(&this->zstream, flush);
+ }
+ else
+ {
+ err = inflate(&this->zstream, flush);
+ }
+ switch (err)
+ {
+ case Z_BUF_ERROR:
+ // Probably shouldn't be able to happen, but possible as a
+ // boundary condition: if the last call to inflate exactly
+ // filled the output buffer, it's possible that the next
+ // call to inflate could have nothing to do.
+ done = true;
+ break;
+
+ case Z_STREAM_END:
+ done = true;
+ // fall through
+
+ case Z_OK:
+ {
+ if ((this->zstream.avail_in == 0) &&
+ (this->zstream.avail_out > 0))
+ {
+ // There is nothing left to read, and there was
+ // sufficient buffer space to write everything we
+ // needed, so we're done for now.
+ done = true;
+ }
+ uLong ready = (this->out_bufsize - this->zstream.avail_out);
+ if (ready > 0)
+ {
+ this->getNext()->write(this->outbuf, ready);
+ this->zstream.next_out = this->outbuf;
+ this->zstream.avail_out = this->out_bufsize;
+ }
+ }
+ break;
+
+ default:
+ this->checkError("data", err);
+ break;
+ }
+ }
+}
+
+void
+Pl_Flate::finish()
+{
+ if (this->outbuf)
+ {
+ if (this->initialized)
+ {
+ unsigned char buf[1];
+ buf[0] = '\0';
+ handleData(buf, 0, Z_FINISH);
+ int err = Z_OK;
+ if (action == a_deflate)
+ {
+ err = deflateEnd(&this->zstream);
+ }
+ else
+ {
+ err = inflateEnd(&this->zstream);
+ }
+ checkError("End", err);
+ }
+
+ delete [] this->outbuf;
+ this->outbuf = 0;
+ }
+ this->getNext()->finish();
+}
+
+void
+Pl_Flate::checkError(char const* prefix, int error_code)
+{
+ if (error_code != Z_OK)
+ {
+ char const* action_str = (action == a_deflate ? "deflate" : "inflate");
+ std::string msg =
+ this->identifier + ": " + action_str + ": " + prefix + ": ";
+
+ if (this->zstream.msg)
+ {
+ msg += this->zstream.msg;
+ }
+ else
+ {
+ switch (error_code)
+ {
+ case Z_ERRNO:
+ msg += "zlib system error";
+ break;
+
+ case Z_STREAM_ERROR:
+ msg += "zlib stream error";
+ break;
+
+ case Z_DATA_ERROR:
+ msg += "zlib data error";
+ break;
+
+ case Z_MEM_ERROR:
+ msg += "zlib memory error";
+ break;
+
+ case Z_BUF_ERROR:
+ msg += "zlib buffer error";
+ break;
+
+ case Z_VERSION_ERROR:
+ msg += "zlib version error";
+ break;
+
+ default:
+ msg += std::string("zlib unknown error (") +
+ QUtil::int_to_string(error_code) + ")";
+ break;
+ }
+ }
+
+ throw Exception(msg);
+ }
+}
diff --git a/libqpdf/Pl_LZWDecoder.cc b/libqpdf/Pl_LZWDecoder.cc
new file mode 100644
index 00000000..e85531e9
--- /dev/null
+++ b/libqpdf/Pl_LZWDecoder.cc
@@ -0,0 +1,229 @@
+#include <qpdf/Pl_LZWDecoder.hh>
+
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+#include <string.h>
+#include <assert.h>
+
+Pl_LZWDecoder::Pl_LZWDecoder(char const* identifier, Pipeline* next,
+ bool early_code_change) :
+ Pipeline(identifier, next),
+ code_size(9),
+ next(0),
+ byte_pos(0),
+ bit_pos(0),
+ bits_available(0),
+ code_change_delta(early_code_change ? 1 : 0),
+ eod(false),
+ last_code(256)
+{
+ memset(buf, 0, 3);
+}
+
+
+Pl_LZWDecoder::~Pl_LZWDecoder()
+{
+}
+
+void
+Pl_LZWDecoder::write(unsigned char* bytes, int len)
+{
+ for (int i = 0; i < len; ++i)
+ {
+ this->buf[next++] = bytes[i];
+ if (this->next == 3)
+ {
+ this->next = 0;
+ }
+ this->bits_available += 8;
+ if (this->bits_available >= this->code_size)
+ {
+ sendNextCode();
+ }
+ }
+}
+
+void
+Pl_LZWDecoder::finish()
+{
+ getNext()->finish();
+}
+
+void
+Pl_LZWDecoder::sendNextCode()
+{
+ int high = this->byte_pos;
+ int med = (this->byte_pos + 1) % 3;
+ int low = (this->byte_pos + 2) % 3;
+
+ int bits_from_high = 8 - this->bit_pos;
+ int bits_from_med = this->code_size - bits_from_high;
+ int bits_from_low = 0;
+ if (bits_from_med > 8)
+ {
+ bits_from_low = bits_from_med - 8;
+ bits_from_med = 8;
+ }
+ int high_mask = (1 << bits_from_high) - 1;
+ int med_mask = 0xff - ((1 << (8 - bits_from_med)) - 1);
+ int low_mask = 0xff - ((1 << (8 - bits_from_low)) - 1);
+ int code = 0;
+ code += (this->buf[high] & high_mask) << bits_from_med;
+ code += ((this->buf[med] & med_mask) >> (8 - bits_from_med));
+ if (bits_from_low)
+ {
+ code <<= bits_from_low;
+ code += ((this->buf[low] & low_mask) >> (8 - bits_from_low));
+ this->byte_pos = low;
+ this->bit_pos = bits_from_low;
+ }
+ else
+ {
+ this->byte_pos = med;
+ this->bit_pos = bits_from_med;
+ }
+ if (this->bit_pos == 8)
+ {
+ this->bit_pos = 0;
+ ++this->byte_pos;
+ this->byte_pos %= 3;
+ }
+ this->bits_available -= this->code_size;
+
+ handleCode(code);
+}
+
+unsigned char
+Pl_LZWDecoder::getFirstChar(int code)
+{
+ unsigned char result = '\0';
+ if (code < 256)
+ {
+ result = (unsigned char) code;
+ }
+ else
+ {
+ assert(code > 257);
+ unsigned int idx = code - 258;
+ assert(idx < table.size());
+ Buffer& b = table[idx];
+ result = b.getBuffer()[0];
+ }
+ return result;
+}
+
+void
+Pl_LZWDecoder::addToTable(unsigned char next)
+{
+ unsigned int last_size = 0;
+ unsigned char const* last_data = 0;
+ unsigned char tmp[1];
+
+ if (this->last_code < 256)
+ {
+ tmp[0] = this->last_code;
+ last_data = tmp;
+ last_size = 1;
+ }
+ else
+ {
+ assert(this->last_code > 257);
+ unsigned int idx = this->last_code - 258;
+ assert(idx < table.size());
+ Buffer& b = table[idx];
+ last_data = b.getBuffer();
+ last_size = b.getSize();
+ }
+
+ Buffer entry(1 + last_size);
+ unsigned char* new_data = entry.getBuffer();
+ memcpy(new_data, last_data, last_size);
+ new_data[last_size] = next;
+ this->table.push_back(entry);
+}
+
+void
+Pl_LZWDecoder::handleCode(int code)
+{
+ if (this->eod)
+ {
+ return;
+ }
+
+ if (code == 256)
+ {
+ if (! this->table.empty())
+ {
+ QTC::TC("libtests", "Pl_LZWDecoder intermediate reset");
+ }
+ this->table.clear();
+ this->code_size = 9;
+ }
+ else if (code == 257)
+ {
+ this->eod = true;
+ }
+ else
+ {
+ if (this->last_code != 256)
+ {
+ // Add to the table from last time. New table entry would
+ // be what we read last plus the first character of what
+ // we're reading now.
+ unsigned char next = '\0';
+ unsigned int table_size = table.size();
+ if (code < 256)
+ {
+ // just read < 256; last time's next was code
+ next = code;
+ }
+ else if (code > 257)
+ {
+ unsigned int idx = code - 258;
+ if (idx > table_size)
+ {
+ throw QEXC::General("LZWDecoder: bad code received");
+ }
+ else if (idx == table_size)
+ {
+ // The encoder would have just created this entry,
+ // so the first character of this entry would have
+ // been the same as the first character of the
+ // last entry.
+ QTC::TC("libtests", "Pl_LZWDecoder last was table size");
+ next = getFirstChar(this->last_code);
+ }
+ else
+ {
+ next = getFirstChar(code);
+ }
+ }
+ unsigned int last_idx = 258 + table_size;
+ if (last_idx == 4095)
+ {
+ throw QEXC::General("LZWDecoder: table full");
+ }
+ addToTable(next);
+ unsigned int change_idx = last_idx + code_change_delta;
+ if ((change_idx == 511) ||
+ (change_idx == 1023) ||
+ (change_idx == 2047))
+ {
+ ++this->code_size;
+ }
+ }
+
+ if (code < 256)
+ {
+ unsigned char ch = (unsigned char) code;
+ getNext()->write(&ch, 1);
+ }
+ else
+ {
+ Buffer& b = table[code - 258];
+ getNext()->write(b.getBuffer(), b.getSize());
+ }
+ }
+
+ this->last_code = code;
+}
diff --git a/libqpdf/Pl_MD5.cc b/libqpdf/Pl_MD5.cc
new file mode 100644
index 00000000..0a2711b8
--- /dev/null
+++ b/libqpdf/Pl_MD5.cc
@@ -0,0 +1,43 @@
+
+#include <qpdf/Pl_MD5.hh>
+
+#include <qpdf/QEXC.hh>
+
+Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ in_progress(false)
+{
+}
+
+Pl_MD5::~Pl_MD5()
+{
+}
+
+void
+Pl_MD5::write(unsigned char* buf, int len)
+{
+ if (! this->in_progress)
+ {
+ this->md5.reset();
+ this->in_progress = true;
+ }
+ this->md5.encodeDataIncrementally((char*) buf, len);
+ this->getNext()->write(buf, len);
+}
+
+void
+Pl_MD5::finish()
+{
+ this->getNext()->finish();
+ this->in_progress = false;
+}
+
+std::string
+Pl_MD5::getHexDigest()
+{
+ if (this->in_progress)
+ {
+ throw QEXC::General("digest requested for in-progress MD5 Pipeline");
+ }
+ return this->md5.unparse();
+}
diff --git a/libqpdf/Pl_PNGFilter.cc b/libqpdf/Pl_PNGFilter.cc
new file mode 100644
index 00000000..28b87c5e
--- /dev/null
+++ b/libqpdf/Pl_PNGFilter.cc
@@ -0,0 +1,146 @@
+
+#include <qpdf/Pl_PNGFilter.hh>
+#include <string.h>
+
+Pl_PNGFilter::Pl_PNGFilter(char const* identifier, Pipeline* next,
+ action_e action, unsigned int columns,
+ unsigned int bytes_per_pixel) :
+ Pipeline(identifier, next),
+ action(action),
+ columns(columns),
+ cur_row(0),
+ prev_row(0),
+ buf1(0),
+ buf2(0),
+ pos(0)
+{
+ this->buf1 = new unsigned char[columns + 1];
+ this->buf2 = new unsigned char[columns + 1];
+ this->cur_row = buf1;
+
+ // number of bytes per incoming row
+ this->incoming = (action == a_encode ? columns : columns + 1);
+}
+
+Pl_PNGFilter::~Pl_PNGFilter()
+{
+ delete [] buf1;
+ delete [] buf2;
+}
+
+void
+Pl_PNGFilter::write(unsigned char* data, int len)
+{
+ int left = this->incoming - this->pos;
+ unsigned int offset = 0;
+ while (len >= left)
+ {
+ // finish off current row
+ memcpy(this->cur_row + this->pos, data + offset, left);
+ offset += left;
+ len -= left;
+
+ processRow();
+
+ // Swap rows
+ unsigned char* t = this->prev_row;
+ this->prev_row = this->cur_row;
+ this->cur_row = t ? t : this->buf2;
+ memset(this->cur_row, 0, this->columns + 1);
+ left = this->incoming;
+ this->pos = 0;
+ }
+ if (len)
+ {
+ memcpy(this->cur_row + this->pos, data + offset, len);
+ }
+ this->pos += len;
+}
+
+void
+Pl_PNGFilter::processRow()
+{
+ if (this->action == a_encode)
+ {
+ encodeRow();
+ }
+ else
+ {
+ decodeRow();
+ }
+}
+
+void
+Pl_PNGFilter::decodeRow()
+{
+ int filter = (int) this->cur_row[0];
+ if (this->prev_row)
+ {
+ switch (filter)
+ {
+ case 0: // none
+ break;
+
+ case 1: // sub
+ throw Exception("sub filter not implemented");
+ break;
+
+ case 2: // up
+ for (unsigned int i = 1; i <= this->columns; ++i)
+ {
+ this->cur_row[i] += this->prev_row[i];
+ }
+ break;
+
+ case 3: // average
+ throw Exception("average filter not implemented");
+ break;
+
+ case 4: // Paeth
+ throw Exception("Paeth filter not implemented");
+ break;
+
+ default:
+ // ignore
+ break;
+ }
+ }
+
+ getNext()->write(this->cur_row + 1, this->columns);
+}
+
+void
+Pl_PNGFilter::encodeRow()
+{
+ // For now, hard-code to using UP filter.
+ unsigned char ch = 2;
+ getNext()->write(&ch, 1);
+ if (this->prev_row)
+ {
+ for (unsigned int i = 0; i < this->columns; ++i)
+ {
+ ch = this->cur_row[i] - this->prev_row[i];
+ getNext()->write(&ch, 1);
+ }
+ }
+ else
+ {
+ getNext()->write(this->cur_row, this->columns);
+ }
+}
+
+void
+Pl_PNGFilter::finish()
+{
+ if (this->pos)
+ {
+ // write partial row
+ processRow();
+ }
+ this->prev_row = 0;
+ this->cur_row = buf1;
+ this->pos = 0;
+ memset(this->cur_row, 0, this->columns + 1);
+
+ getNext()->finish();
+}
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
new file mode 100644
index 00000000..63f0caaf
--- /dev/null
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -0,0 +1,179 @@
+
+#include <qpdf/Pl_QPDFTokenizer.hh>
+#include <qpdf/QPDF_String.hh>
+#include <qpdf/QPDF_Name.hh>
+
+Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
+ Pipeline(identifier, next),
+ newline_after_next_token(false),
+ just_wrote_nl(false),
+ last_char_was_cr(false),
+ unread_char(false),
+ char_to_unread('\0'),
+ pass_through(false)
+{
+}
+
+Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
+{
+}
+
+void
+Pl_QPDFTokenizer::writeNext(char const* buf, int len)
+{
+ if (len)
+ {
+ unsigned char* t = new unsigned char[len];
+ memcpy(t, buf, len);
+ getNext()->write(t, len);
+ delete [] t;
+ this->just_wrote_nl = (buf[len-1] == '\n');
+ }
+}
+
+void
+Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
+{
+ std::string value = token.getRawValue();
+
+ switch (token.getType())
+ {
+ case QPDFTokenizer::tt_string:
+ value = QPDF_String(token.getValue()).unparse();
+ break;
+
+ case QPDFTokenizer::tt_name:
+ value = QPDF_Name(token.getValue()).unparse();
+ break;
+
+ default:
+ break;
+ }
+ writeNext(value.c_str(), value.length());
+}
+
+void
+Pl_QPDFTokenizer::processChar(char ch)
+{
+ if (this->pass_through)
+ {
+ // We're not noramlizing anymore -- just write this without
+ // looking at it.
+ writeNext(&ch, 1);
+ return;
+ }
+
+ tokenizer.presentCharacter(ch);
+ QPDFTokenizer::Token token;
+ if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+ {
+ writeToken(token);
+ if (this->newline_after_next_token)
+ {
+ writeNext("\n", 1);
+ this->newline_after_next_token = false;
+ }
+ if ((token.getType() == QPDFTokenizer::tt_word) &&
+ (token.getValue() == "BI"))
+ {
+ // Uh oh.... we're not sophisticated enough to handle
+ // inline images safely. We'd have to to set up all the
+ // filters and pipe the iamge data through it until the
+ // filtered output was the right size for an image of the
+ // specified dimensions. Then we'd either have to write
+ // out raw image data or continue to write filtered data,
+ // resuming normalization when we get to the end.
+ // Insetad, for now, we'll just turn off noramlization for
+ // the remainder of this stream.
+ this->pass_through = true;
+ if (this->unread_char)
+ {
+ writeNext(&this->char_to_unread, 1);
+ this->unread_char = false;
+ }
+ }
+ }
+ else
+ {
+ bool suppress = false;
+ if ((ch == '\n') && (this->last_char_was_cr))
+ {
+ // Always ignore \n following \r
+ suppress = true;
+ }
+
+ if ((this->last_char_was_cr = (ch == '\r')))
+ {
+ ch = '\n';
+ }
+
+ if (this->tokenizer.betweenTokens())
+ {
+ if (! suppress)
+ {
+ writeNext(&ch, 1);
+ }
+ }
+ else
+ {
+ if (ch == '\n')
+ {
+ this->newline_after_next_token = true;
+ }
+ }
+ }
+}
+
+
+void
+Pl_QPDFTokenizer::checkUnread()
+{
+ if (this->unread_char)
+ {
+ processChar(this->char_to_unread);
+ if (this->unread_char)
+ {
+ throw QEXC::Internal("unread_char still true after processing "
+ "unread character");
+ }
+ }
+}
+
+void
+Pl_QPDFTokenizer::write(unsigned char* buf, int len)
+{
+ checkUnread();
+ for (int i = 0; i < len; ++i)
+ {
+ processChar(buf[i]);
+ checkUnread();
+ }
+}
+
+void
+Pl_QPDFTokenizer::finish()
+{
+ this->tokenizer.presentEOF();
+ if (! this->pass_through)
+ {
+ QPDFTokenizer::Token token;
+ if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
+ {
+ writeToken(token);
+ if (unread_char)
+ {
+ if (this->char_to_unread == '\r')
+ {
+ this->char_to_unread = '\n';
+ }
+ writeNext(&this->char_to_unread, 1);
+ }
+ }
+ }
+ if (! this->just_wrote_nl)
+ {
+ writeNext("\n", 1);
+ }
+
+ getNext()->finish();
+}
diff --git a/libqpdf/Pl_RC4.cc b/libqpdf/Pl_RC4.cc
new file mode 100644
index 00000000..74e53c8b
--- /dev/null
+++ b/libqpdf/Pl_RC4.cc
@@ -0,0 +1,57 @@
+
+#include <qpdf/Pl_RC4.hh>
+
+#include <qpdf/QUtil.hh>
+
+Pl_RC4::Pl_RC4(char const* identifier, Pipeline* next,
+ unsigned char const* key_data, int key_len,
+ int out_bufsize) :
+ Pipeline(identifier, next),
+ out_bufsize(out_bufsize),
+ rc4(key_data, key_len)
+{
+ this->outbuf = new unsigned char[out_bufsize];
+}
+
+Pl_RC4::~Pl_RC4()
+{
+ if (this->outbuf)
+ {
+ delete [] this->outbuf;
+ this->outbuf = 0;
+ }
+}
+
+void
+Pl_RC4::write(unsigned char* data, int len)
+{
+ if (this->outbuf == 0)
+ {
+ throw Exception(
+ this->identifier +
+ ": Pl_RC4: write() called after finish() called");
+ }
+
+ int bytes_left = len;
+ unsigned char* p = data;
+
+ while (bytes_left > 0)
+ {
+ int bytes = (bytes_left < this->out_bufsize ? bytes_left : out_bufsize);
+ bytes_left -= bytes;
+ rc4.process(p, bytes, outbuf);
+ p += bytes;
+ getNext()->write(outbuf, bytes);
+ }
+}
+
+void
+Pl_RC4::finish()
+{
+ if (this->outbuf)
+ {
+ delete [] this->outbuf;
+ this->outbuf = 0;
+ }
+ this->getNext()->finish();
+}
diff --git a/libqpdf/Pl_StdioFile.cc b/libqpdf/Pl_StdioFile.cc
new file mode 100644
index 00000000..c0f42afd
--- /dev/null
+++ b/libqpdf/Pl_StdioFile.cc
@@ -0,0 +1,48 @@
+
+#include <qpdf/Pl_StdioFile.hh>
+
+#include <errno.h>
+
+Pl_StdioFile::Pl_StdioFile(char const* identifier, FILE* f) :
+ Pipeline(identifier, 0),
+ file(f)
+{
+}
+
+Pl_StdioFile::~Pl_StdioFile()
+{
+}
+
+void
+Pl_StdioFile::write(unsigned char* buf, int len)
+{
+ size_t so_far = 0;
+ while (len > 0)
+ {
+ so_far = fwrite(buf, 1, len, this->file);
+ if (so_far == 0)
+ {
+ throw QEXC::System(this->identifier + ": Pl_StdioFile::write",
+ errno);
+ }
+ else
+ {
+ buf += so_far;
+ len -= so_far;
+ }
+ }
+}
+
+void
+Pl_StdioFile::finish()
+{
+ if (fileno(this->file) != -1)
+ {
+ fflush(this->file);
+ }
+ else
+ {
+ throw QEXC::Internal(this->identifier +
+ ": Pl_StdioFile::finish: stream already closed");
+ }
+}
diff --git a/libqpdf/QEXC.cc b/libqpdf/QEXC.cc
new file mode 100644
index 00000000..c65afbb6
--- /dev/null
+++ b/libqpdf/QEXC.cc
@@ -0,0 +1,67 @@
+
+#include <qpdf/QEXC.hh>
+#include <string.h>
+#include <errno.h>
+
+QEXC::Base::Base()
+{
+ // nothing needed
+}
+
+QEXC::Base::Base(std::string const& message) :
+ message(message)
+{
+ // nothing needed
+}
+
+std::string const&
+QEXC::Base::unparse() const
+{
+ return this->message;
+}
+
+void
+QEXC::Base::setMessage(std::string const& message)
+{
+ this->message = message;
+}
+
+const char*
+QEXC::Base::what() const throw()
+{
+ // Since unparse() returns a const string reference, its
+ // implementors must arrange to have it return a reference to a
+ // string that is not going to disappear. It is therefore safe
+ // for us to return it's c_str() pointer.
+ return this->unparse().c_str();
+}
+
+QEXC::General::General()
+{
+ // nothing needed
+}
+
+QEXC::General::General(std::string const& message) :
+ Base(message)
+{
+ // nothing needed
+}
+
+QEXC::System::System(std::string const& prefix, int sys_errno)
+{
+ // Note: using sys_errno in case errno is a macro.
+ this->sys_errno = sys_errno;
+ this->setMessage(prefix + ": " + strerror(sys_errno));
+}
+
+int
+QEXC::System::getErrno() const
+{
+ return this->sys_errno;
+}
+
+QEXC::Internal::Internal(std::string const& message) :
+ Base("INTERNAL ERROR: " + message)
+{
+ // nothing needed
+}
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
new file mode 100644
index 00000000..6f51fa2c
--- /dev/null
+++ b/libqpdf/QPDF.cc
@@ -0,0 +1,1851 @@
+
+#include <qpdf/QPDF.hh>
+
+#include <vector>
+#include <map>
+#include <string.h>
+#include <memory.h>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/PCRE.hh>
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+
+void
+QPDF::InputSource::setLastOffset(off_t offset)
+{
+ this->last_offset = offset;
+}
+
+off_t
+QPDF::InputSource::getLastOffset() const
+{
+ return this->last_offset;
+}
+
+std::string
+QPDF::InputSource::readLine()
+{
+ // Read a line terminated by one or more \r or \n characters
+ // without caring what the exact terminator is. Consume the
+ // trailing newline characters but don't return them.
+
+ off_t offset = this->tell();
+ std::string buf;
+ enum { st_before_nl, st_at_nl } state = st_before_nl;
+ char ch;
+ while (1)
+ {
+ size_t len = this->read(&ch, 1);
+ if (len == 0)
+ {
+ break;
+ }
+
+ if (state == st_before_nl)
+ {
+ if ((ch == '\012') || (ch == '\015'))
+ {
+ state = st_at_nl;
+ }
+ else
+ {
+ buf += ch;
+ }
+ }
+ else if (state == st_at_nl)
+ {
+ if ((ch == '\012') || (ch == '\015'))
+ {
+ // do nothing
+ }
+ else
+ {
+ // unread this character
+ this->unreadCh(ch);
+ break;
+ }
+ }
+ }
+ // Override last offset to be where we started this line rather
+ // than before the last character read
+ this->last_offset = offset;
+ return buf;
+}
+
+QPDF::FileInputSource::FileInputSource() :
+ file(0)
+{
+}
+
+void
+QPDF::FileInputSource::setFilename(char const* filename)
+{
+ destroy();
+ this->filename = filename;
+ this->file = QUtil::fopen_wrapper(std::string("open ") + this->filename,
+ fopen(this->filename.c_str(), "rb"));
+}
+
+QPDF::FileInputSource::~FileInputSource()
+{
+ destroy();
+}
+
+void
+QPDF::FileInputSource::destroy()
+{
+ if (this->file)
+ {
+ fclose(this->file);
+ this->file = 0;
+ }
+}
+
+std::string const&
+QPDF::FileInputSource::getName() const
+{
+ return this->filename;
+}
+
+off_t
+QPDF::FileInputSource::tell()
+{
+ return ftell(this->file);
+}
+
+void
+QPDF::FileInputSource::seek(off_t offset, int whence)
+{
+ QUtil::os_wrapper(std::string("seek to ") + this->filename + ", offset " +
+ QUtil::int_to_string(offset) + " (" +
+ QUtil::int_to_string(whence) + ")",
+ fseek(this->file, offset, whence));
+}
+
+void
+QPDF::FileInputSource::rewind()
+{
+ ::rewind(this->file);
+}
+
+size_t
+QPDF::FileInputSource::read(char* buffer, int length)
+{
+ this->last_offset = ftell(this->file);
+ size_t len = fread(buffer, 1, length, this->file);
+ if ((len == 0) && ferror(this->file))
+ {
+ throw QPDFExc(this->filename, this->last_offset,
+ std::string("read ") +
+ QUtil::int_to_string(length) + " bytes");
+ }
+ return len;
+}
+
+void
+QPDF::FileInputSource::unreadCh(char ch)
+{
+ QUtil::os_wrapper(this->filename + ": unread character",
+ ungetc((unsigned char)ch, this->file));
+}
+
+QPDF::BufferInputSource::BufferInputSource(std::string const& description,
+ Buffer* buf) :
+ description(description),
+ buf(buf),
+ cur_offset(0)
+{
+}
+
+QPDF::BufferInputSource::~BufferInputSource()
+{
+}
+
+std::string const&
+QPDF::BufferInputSource::getName() const
+{
+ return this->description;
+}
+
+off_t
+QPDF::BufferInputSource::tell()
+{
+ return this->cur_offset;
+}
+
+void
+QPDF::BufferInputSource::seek(off_t offset, int whence)
+{
+ switch (whence)
+ {
+ case SEEK_SET:
+ this->cur_offset = offset;
+ break;
+
+ case SEEK_END:
+ this->cur_offset = this->buf->getSize() - offset;
+ break;
+
+ case SEEK_CUR:
+ this->cur_offset += offset;
+ break;
+
+ default:
+ throw QEXC::Internal("invalid argument to BufferInputSource::seek");
+ break;
+ }
+}
+
+void
+QPDF::BufferInputSource::rewind()
+{
+ this->cur_offset = 0;
+}
+
+size_t
+QPDF::BufferInputSource::read(char* buffer, int length)
+{
+ off_t end_pos = this->buf->getSize();
+ if (this->cur_offset >= end_pos)
+ {
+ this->last_offset = end_pos;
+ return 0;
+ }
+
+ this->last_offset = this->cur_offset;
+ size_t len = std::min((int)(end_pos - this->cur_offset), length);
+ memcpy(buffer, buf->getBuffer() + this->cur_offset, len);
+ this->cur_offset += len;
+ return len;
+}
+
+void
+QPDF::BufferInputSource::unreadCh(char ch)
+{
+ if (this->cur_offset > 0)
+ {
+ --this->cur_offset;
+ }
+}
+
+QPDF::ObjGen::ObjGen(int o = 0, int g = 0) :
+ obj(o),
+ gen(g)
+{
+}
+
+bool
+QPDF::ObjGen::ObjGen::operator<(ObjGen const& rhs) const
+{
+ return ((this->obj < rhs.obj) ||
+ ((this->obj == rhs.obj) && (this->gen < rhs.gen)));
+}
+
+QPDF::QPDF() :
+ encrypted(false),
+ encryption_initialized(false),
+ ignore_xref_streams(false),
+ suppress_warnings(false),
+ attempt_recovery(true),
+ cached_key_objid(0),
+ cached_key_generation(0),
+ first_xref_item_offset(0),
+ uncompressed_after_compressed(false)
+{
+}
+
+QPDF::~QPDF()
+{
+}
+
+void
+QPDF::processFile(char const* filename, char const* password)
+{
+ this->file.setFilename(filename);
+ this->provided_password = password;
+ parse();
+}
+
+void
+QPDF::setIgnoreXRefStreams(bool val)
+{
+ this->ignore_xref_streams = val;
+}
+
+void
+QPDF::setSuppressWarnings(bool val)
+{
+ this->suppress_warnings = val;
+}
+
+void
+QPDF::setAttemptRecovery(bool val)
+{
+ this->attempt_recovery = val;
+}
+
+std::vector<std::string>
+QPDF::getWarnings()
+{
+ std::vector<std::string> result = this->warnings;
+ this->warnings.clear();
+ return result;
+}
+
+void
+QPDF::parse()
+{
+ static PCRE header_re("^%PDF-(1.\\d+)\\b");
+ static PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
+
+ std::string line = this->file.readLine();
+ PCRE::Match m1 = header_re.match(line.c_str());
+ if (m1)
+ {
+ this->pdf_version = m1.getMatch(1);
+ if (atof(this->pdf_version.c_str()) < 1.2)
+ {
+ this->tokenizer.allowPoundAnywhereInName();
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF not a pdf file");
+ throw QPDFExc(this->file.getName(), 0, "not a PDF file");
+ }
+
+ // PDF spec says %%EOF must be found within the last 1024 bytes of
+ // the file. We add an extra 30 characters to leave room for the
+ // startxref stuff.
+ static int const tbuf_size = 1054;
+ this->file.seek(0, SEEK_END);
+ if (this->file.tell() > tbuf_size)
+ {
+ this->file.seek(-tbuf_size, SEEK_END);
+ }
+ else
+ {
+ this->file.rewind();
+ }
+ char* buf = new char[tbuf_size + 1];
+ // Put buf in a PointerHolder to guarantee deletion of buf. This
+ // calls delete rather than delete [], but it's okay since buf is
+ // an array of fundamental types.
+ PointerHolder<char> b(buf);
+ memset(buf, '\0', tbuf_size + 1);
+ this->file.read(buf, tbuf_size);
+
+ // Since buf may contain null characters, we can't do a regexp
+ // search on buf directly. Find the last occurrence within buf
+ // where the regexp matches.
+ char* p = buf;
+ char const* candidate = "";
+ while ((p = (char*)memchr(p, 's', tbuf_size - (p - buf))) != 0)
+ {
+ if (eof_re.match(p))
+ {
+ candidate = p;
+ }
+ ++p;
+ }
+
+ try
+ {
+ PCRE::Match m2 = eof_re.match(candidate);
+ if (! m2)
+ {
+ QTC::TC("qpdf", "QPDF can't find startxref");
+ throw QPDFExc(this->file.getName() + ": can't find startxref");
+ }
+ off_t xref_offset = atoi(m2.getMatch(1).c_str());
+ read_xref(xref_offset);
+ }
+ catch (QPDFExc& e)
+ {
+ if (this->attempt_recovery)
+ {
+ reconstruct_xref(e);
+ QTC::TC("qpdf", "QPDF reconstructed xref table");
+ }
+ else
+ {
+ throw e;
+ }
+ }
+
+ initializeEncryption();
+}
+
+void
+QPDF::warn(QPDFExc const& e)
+{
+ this->warnings.push_back(e.unparse());
+ if (! this->suppress_warnings)
+ {
+ std::cerr << "WARNING: " << this->warnings.back() << std::endl;
+ }
+}
+
+void
+QPDF::setTrailer(QPDFObjectHandle obj)
+{
+ if (this->trailer.isInitialized())
+ {
+ return;
+ }
+ this->trailer = obj;
+}
+
+void
+QPDF::reconstruct_xref(QPDFExc& e)
+{
+ static PCRE obj_re("^(\\d+) (\\d+) obj\\b");
+ static PCRE endobj_re("^endobj\\b");
+ static PCRE trailer_re("^trailer\\b");
+
+ warn(QPDFExc(this->file.getName(), 0, "file is damaged"));
+ warn(e);
+ warn(QPDFExc("Attempting to reconstruct cross-reference table"));
+
+ this->file.seek(0, SEEK_END);
+ off_t eof = this->file.tell();
+ this->file.seek(0, SEEK_SET);
+ bool in_obj = false;
+ while (this->file.tell() < eof)
+ {
+ std::string line = this->file.readLine();
+ if (in_obj)
+ {
+ if (endobj_re.match(line.c_str()))
+ {
+ in_obj = false;
+ }
+ }
+ else
+ {
+ PCRE::Match m = obj_re.match(line.c_str());
+ if (m)
+ {
+ in_obj = true;
+ int obj = atoi(m.getMatch(1).c_str());
+ int gen = atoi(m.getMatch(2).c_str());
+ int offset = this->file.getLastOffset();
+ insertXrefEntry(obj, 1, offset, gen);
+ }
+ else if ((! this->trailer.isInitialized()) &&
+ trailer_re.match(line.c_str()))
+ {
+ // read "trailer"
+ this->file.seek(this->file.getLastOffset(), SEEK_SET);
+ readToken(&this->file);
+ QPDFObjectHandle t = readObject(&this->file, 0, 0, false);
+ if (! t.isDictionary())
+ {
+ // Oh well. It was worth a try.
+ }
+ else
+ {
+ setTrailer(t);
+ }
+ }
+ }
+ }
+
+ if (! this->trailer.isInitialized())
+ {
+ // We could check the last encountered object to see if it was
+ // an xref stream. If so, we could try to get the trailer
+ // from there. This may make it possible to recover files
+ // with bad startxref pointers even when they have object
+ // streams.
+
+ throw QPDFExc(this->file.getName() + ": unable to find trailer "
+ "dictionary while recovering damanged file");
+ }
+
+ // We could iterate through the objects looking for streams and
+ // try to find objects inside of them, but it's probably not worth
+ // the trouble. Acrobat can't recover files with any errors in an
+ // xref stream, and this would be a real long shot anyway. If we
+ // wanted to do anything that involved looking at stream contents,
+ // we'd also have to call initializeEncryption() here. It's safe
+ // to call it more than once.
+}
+
+void
+QPDF::read_xref(off_t xref_offset)
+{
+ std::map<int, int> free_table;
+ while (xref_offset)
+ {
+ this->file.seek(xref_offset, SEEK_SET);
+ std::string line = this->file.readLine();
+ if (line == "xref")
+ {
+ xref_offset = read_xrefTable(this->file.tell());
+ }
+ else
+ {
+ xref_offset = read_xrefStream(xref_offset);
+ }
+ }
+
+ int size = this->trailer.getKey("/Size").getIntValue();
+ int max_obj = (*(xref_table.rbegin())).first.obj;
+ if (! this->deleted_objects.empty())
+ {
+ max_obj = std::max(max_obj, *(this->deleted_objects.rbegin()));
+ }
+ if (size != max_obj + 1)
+ {
+ QTC::TC("qpdf", "QPDF xref size mismatch");
+ warn(QPDFExc(this->file.getName() +
+ std::string(": reported number of objects (") +
+ QUtil::int_to_string(size) +
+ ") inconsistent with actual number of objects (" +
+ QUtil::int_to_string(max_obj + 1) + ")"));
+ }
+
+ // We no longer need the deleted_objects table, so go ahead and
+ // clear it out to make sure we never depend on its being set.
+ this->deleted_objects.clear();
+}
+
+int
+QPDF::read_xrefTable(off_t xref_offset)
+{
+ static PCRE xref_first_re("^(\\d+)\\s+(\\d+)");
+ static PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
+
+ std::vector<ObjGen> deleted_items;
+
+ this->file.seek(xref_offset, SEEK_SET);
+ bool done = false;
+ while (! done)
+ {
+ std::string line = this->file.readLine();
+ PCRE::Match m1 = xref_first_re.match(line.c_str());
+ if (! m1)
+ {
+ QTC::TC("qpdf", "QPDF invalid xref");
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "xref syntax invalid");
+ }
+ int obj = atoi(m1.getMatch(1).c_str());
+ int num = atoi(m1.getMatch(2).c_str());
+ static int const xref_entry_size = 20;
+ char xref_entry[xref_entry_size + 1];
+ for (int i = obj; i < obj + num; ++i)
+ {
+ if (i == 0)
+ {
+ // This is needed by checkLinearization()
+ this->first_xref_item_offset = this->file.tell();
+ }
+ memset(xref_entry, 0, sizeof(xref_entry));
+ this->file.read(xref_entry, xref_entry_size);
+ PCRE::Match m2 = xref_entry_re.match(xref_entry);
+ if (! m2)
+ {
+ QTC::TC("qpdf", "QPDF invalid xref entry");
+ throw QPDFExc(
+ this->file.getName(), this->file.getLastOffset(),
+ "invalid xref entry (obj=" +
+ QUtil::int_to_string(i) + ")");
+ }
+
+ int f1 = atoi(m2.getMatch(1).c_str());
+ int f2 = atoi(m2.getMatch(2).c_str());
+ char type = m2.getMatch(3)[0];
+ if (type == 'f')
+ {
+ // Save deleted items until after we've checked the
+ // XRefStm, if any.
+ deleted_items.push_back(ObjGen(i, f2));
+ }
+ else
+ {
+ insertXrefEntry(i, 1, f1, f2);
+ }
+ }
+ off_t pos = this->file.tell();
+ QPDFTokenizer::Token t = readToken(&this->file);
+ if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))
+ {
+ done = true;
+ }
+ else
+ {
+ this->file.seek(pos, SEEK_SET);
+ }
+ }
+
+ // Set offset to previous xref table if any
+ QPDFObjectHandle cur_trailer = readObject(&this->file, 0, 0, false);
+ if (! cur_trailer.isDictionary())
+ {
+ QTC::TC("qpdf", "QPDF missing trailer");
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "expected trailer dictionary");
+ }
+
+ if (! this->trailer.isInitialized())
+ {
+ setTrailer(cur_trailer);
+
+ if (! this->trailer.hasKey("/Size"))
+ {
+ QTC::TC("qpdf", "QPDF trailer lacks size");
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "trailer dictionary lacks /Size key");
+ }
+ if (! this->trailer.getKey("/Size").isInteger())
+ {
+ QTC::TC("qpdf", "QPDF trailer size not integer");
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "/Size key in trailer dictionary is not "
+ "an integer");
+ }
+ }
+
+ if (cur_trailer.hasKey("/XRefStm"))
+ {
+ if (this->ignore_xref_streams)
+ {
+ QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
+ }
+ else
+ {
+ if (cur_trailer.getKey("/XRefStm").isInteger())
+ {
+ // Read the xref stream but disregard any return value
+ // -- we'll use our trailer's /Prev key instead of the
+ // xref stream's.
+ (void) read_xrefStream(
+ cur_trailer.getKey("/XRefStm").getIntValue());
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName(), xref_offset,
+ "invalid /XRefStm");
+ }
+ }
+ }
+
+ // Handle any deleted items now that we've read the /XRefStm.
+ for (std::vector<ObjGen>::iterator iter = deleted_items.begin();
+ iter != deleted_items.end(); ++iter)
+ {
+ ObjGen& og = *iter;
+ insertXrefEntry(og.obj, 0, 0, og.gen);
+ }
+
+ if (cur_trailer.hasKey("/Prev"))
+ {
+ if (! cur_trailer.getKey("/Prev").isInteger())
+ {
+ QTC::TC("qpdf", "QPDF trailer prev not integer");
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "/Prev key in trailer dictionary is not "
+ "an integer");
+ }
+ QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
+ xref_offset = cur_trailer.getKey("/Prev").getIntValue();
+ }
+ else
+ {
+ xref_offset = 0;
+ }
+
+ return xref_offset;
+}
+
+int
+QPDF::read_xrefStream(off_t xref_offset)
+{
+ bool found = false;
+ if (! this->ignore_xref_streams)
+ {
+ int xobj;
+ int xgen;
+ QPDFObjectHandle xref_obj;
+ try
+ {
+ xref_obj = readObjectAtOffset(xref_offset, 0, 0, xobj, xgen);
+ }
+ catch (QPDFExc& e)
+ {
+ // ignore -- report error below
+ }
+ if (xref_obj.isInitialized() &&
+ xref_obj.isStream() &&
+ xref_obj.getDict().getKey("/Type").isName() &&
+ xref_obj.getDict().getKey("/Type").getName() == "/XRef")
+ {
+ QTC::TC("qpdf", "QPDF found xref stream");
+ found = true;
+ xref_offset = processXRefStream(xref_offset, xref_obj);
+ }
+ }
+
+ if (! found)
+ {
+ QTC::TC("qpdf", "QPDF can't find xref");
+ throw QPDFExc(this->file.getName(), xref_offset, "xref not found");
+ }
+
+ return xref_offset;
+}
+
+int
+QPDF::processXRefStream(off_t xref_offset, QPDFObjectHandle& xref_obj)
+{
+ QPDFObjectHandle dict = xref_obj.getDict();
+ QPDFObjectHandle W_obj = dict.getKey("/W");
+ QPDFObjectHandle Index_obj = dict.getKey("/Index");
+ if (! (W_obj.isArray() &&
+ (W_obj.getArrayNItems() >= 3) &&
+ W_obj.getArrayItem(0).isInteger() &&
+ W_obj.getArrayItem(1).isInteger() &&
+ W_obj.getArrayItem(2).isInteger() &&
+ dict.getKey("/Size").isInteger() &&
+ (Index_obj.isArray() || Index_obj.isNull())))
+ {
+ throw QPDFExc(this->file.getName(), xref_offset,
+ "Cross-reference stream does not have"
+ " proper /W and /Index keys");
+ }
+ std::vector<int> indx;
+ if (Index_obj.isArray())
+ {
+ int n_index = Index_obj.getArrayNItems();
+ if ((n_index % 2) || (n_index < 2))
+ {
+ throw QPDFExc(this->file.getName(), xref_offset,
+ "Cross-reference stream's /Index has an"
+ " invalid number of values");
+ }
+ for (int i = 0; i < n_index; ++i)
+ {
+ if (Index_obj.getArrayItem(i).isInteger())
+ {
+ indx.push_back(Index_obj.getArrayItem(i).getIntValue());
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName(), xref_offset,
+ "Cross-reference stream's /Index's item " +
+ QUtil::int_to_string(i) +
+ " is not an integer");
+ }
+ }
+ QTC::TC("qpdf", "QPDF xref /Index is array",
+ n_index == 2 ? 0 : 1);
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF xref /Index is null");
+ int size = dict.getKey("/Size").getIntValue();
+ indx.push_back(0);
+ indx.push_back(size);
+ }
+
+ int num_entries = 0;
+ for (unsigned int i = 1; i < indx.size(); i += 2)
+ {
+ num_entries += indx[i];
+ }
+
+ int W[3];
+ int entry_size = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ W[i] = W_obj.getArrayItem(i).getIntValue();
+ entry_size += W[i];
+ }
+
+ int expected_size = entry_size * num_entries;
+
+ PointerHolder<Buffer> bp = xref_obj.getStreamData();
+ int actual_size = bp.getPointer()->getSize();
+
+ if (expected_size != actual_size)
+ {
+ throw QPDFExc(this->file.getName(), xref_offset,
+ "Cross-reference stream data has the wrong size;"
+ " expected = " + QUtil::int_to_string(expected_size) +
+ "; actual = " + QUtil::int_to_string(actual_size));
+ }
+
+ int cur_chunk = 0;
+ int chunk_count = 0;
+
+ bool saw_first_compressed_object = false;
+
+ unsigned char const* data = bp.getPointer()->getBuffer();
+ for (int i = 0; i < num_entries; ++i)
+ {
+ // Read this entry
+ unsigned char const* entry = data + (entry_size * i);
+ int fields[3];
+ unsigned char const* p = entry;
+ for (int j = 0; j < 3; ++j)
+ {
+ fields[j] = 0;
+ if ((j == 0) && (W[0] == 0))
+ {
+ QTC::TC("qpdf", "QPDF default for xref stream field 0");
+ fields[0] = 1;
+ }
+ for (int k = 0; k < W[j]; ++k)
+ {
+ fields[j] <<= 8;
+ fields[j] += (int)(*p++);
+ }
+ }
+
+ // Get the object and generation number. The object number is
+ // based on /Index. The generation number is 0 unless this is
+ // an uncompressed object record, in which case the generation
+ // number appears as the third field.
+ int obj = indx[cur_chunk] + chunk_count;
+ ++chunk_count;
+ if (chunk_count >= indx[cur_chunk + 1])
+ {
+ cur_chunk += 2;
+ chunk_count = 0;
+ }
+
+ if (saw_first_compressed_object)
+ {
+ if (fields[0] != 2)
+ {
+ this->uncompressed_after_compressed = true;
+ }
+ }
+ else if (fields[0] == 2)
+ {
+ saw_first_compressed_object = true;
+ }
+ if (obj == 0)
+ {
+ // This is needed by checkLinearization()
+ this->first_xref_item_offset = xref_offset;
+ }
+ insertXrefEntry(obj, fields[0], fields[1], fields[2]);
+ }
+
+ if (! this->trailer.isInitialized())
+ {
+ setTrailer(dict);
+ }
+
+ if (dict.hasKey("/Prev"))
+ {
+ if (! dict.getKey("/Prev").isInteger())
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "/Prev key in xref stream dictionary is not "
+ "an integer");
+ }
+ QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
+ xref_offset = dict.getKey("/Prev").getIntValue();
+ }
+ else
+ {
+ xref_offset = 0;
+ }
+
+ return xref_offset;
+}
+
+void
+QPDF::insertXrefEntry(int obj, int f0, int f1, int f2)
+{
+ // Populate the xref table in such a way that the first reference
+ // to an object that we see, which is the one in the latest xref
+ // table in which it appears, is the one that gets stored.
+
+ // If there is already an entry for this object and generation in
+ // the table, it means that a later xref table has registered this
+ // object. Disregard this one.
+ { // private scope
+ int gen = (f0 == 2 ? 0 : f2);
+ ObjGen og(obj, gen);
+ if (this->xref_table.count(og))
+ {
+ QTC::TC("qpdf", "QPDF xref reused object");
+ return;
+ }
+ if (this->deleted_objects.count(obj))
+ {
+ QTC::TC("qpdf", "QPDF xref deleted object");
+ return;
+ }
+ }
+
+ switch (f0)
+ {
+ case 0:
+ this->deleted_objects.insert(obj);
+ break;
+
+ case 1:
+ // f2 is generation
+ QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
+ this->xref_table[ObjGen(obj, f2)] = QPDFXRefEntry(f0, f1, f2);
+ break;
+
+ case 2:
+ this->xref_table[ObjGen(obj, 0)] = QPDFXRefEntry(f0, f1, f2);
+ break;
+
+ default:
+ throw QPDFExc(this->file.getName(), 0,
+ "unknown xref stream entry type " +
+ QUtil::int_to_string(f0));
+ break;
+ }
+}
+
+void
+QPDF::showXRefTable()
+{
+ for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+ this->xref_table.begin();
+ iter != this->xref_table.end(); ++iter)
+ {
+ ObjGen const& og = (*iter).first;
+ QPDFXRefEntry const& entry = (*iter).second;
+ std::cout << og.obj << "/" << og.gen << ": ";
+ switch (entry.getType())
+ {
+ case 1:
+ std::cout << "uncompressed; offset = " << entry.getOffset();
+ break;
+
+ case 2:
+ std::cout << "compressed; stream = " << entry.getObjStreamNumber()
+ << ", index = " << entry.getObjStreamIndex();
+ break;
+
+ default:
+ throw QEXC::Internal("unknown cross-reference table type while"
+ " showing xref_table");
+ break;
+ }
+ std::cout << std::endl;
+ }
+}
+
+QPDFObjectHandle
+QPDF::readObject(InputSource* input, int objid, int generation,
+ bool in_object_stream)
+{
+ off_t offset = input->tell();
+ QPDFObjectHandle object = readObjectInternal(
+ input, objid, generation, in_object_stream, false, false);
+ // Override last_offset so that it points to the beginning of the
+ // object we just read
+ input->setLastOffset(offset);
+ return object;
+}
+
+QPDFObjectHandle
+QPDF::readObjectInternal(InputSource* input,
+ int objid, int generation,
+ bool in_object_stream,
+ bool in_array, bool in_dictionary)
+{
+ if (in_dictionary && in_array)
+ {
+ // Although dictionaries and arrays arbitrarily nest, these
+ // variables indicate what is at the top of the stack right
+ // now, so they can, by definition, never both be true.
+ throw QEXC::Internal("readObjectInternal: in_dict && in_array");
+ }
+
+ QPDFObjectHandle object;
+
+ off_t offset = input->tell();
+ std::vector<QPDFObjectHandle> olist;
+ bool done = false;
+ while (! done)
+ {
+ object = QPDFObjectHandle();
+
+ QPDFTokenizer::Token token = readToken(input);
+
+ switch (token.getType())
+ {
+ case QPDFTokenizer::tt_brace_open:
+ case QPDFTokenizer::tt_brace_close:
+ // Don't know what to do with these for now
+ QTC::TC("qpdf", "QPDF bad brace");
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "unexpected brace token");
+ break;
+
+ case QPDFTokenizer::tt_array_close:
+ if (in_array)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF bad array close");
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "unexpected array close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_dict_close:
+ if (in_dictionary)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF bad dictionary close");
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "unexpected dictionary close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_array_open:
+ object = readObjectInternal(
+ input, objid, generation, in_object_stream, true, false);
+ break;
+
+ case QPDFTokenizer::tt_dict_open:
+ object = readObjectInternal(
+ input, objid, generation, in_object_stream, false, true);
+ break;
+
+ case QPDFTokenizer::tt_bool:
+ object = QPDFObjectHandle::newBool(
+ (token.getValue() == "true"));
+ break;
+
+ case QPDFTokenizer::tt_null:
+ object = QPDFObjectHandle::newNull();
+ break;
+
+ case QPDFTokenizer::tt_integer:
+ object = QPDFObjectHandle::newInteger(
+ atoi(token.getValue().c_str()));
+ break;
+
+ case QPDFTokenizer::tt_real:
+ object = QPDFObjectHandle::newReal(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_name:
+ object = QPDFObjectHandle::newName(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_word:
+ {
+ std::string const& value = token.getValue();
+ if ((value == "R") && (in_array || in_dictionary) &&
+ (olist.size() >= 2) &&
+ (olist[olist.size() - 1].isInteger()) &&
+ (olist[olist.size() - 2].isInteger()))
+ {
+ // Try to resolve indirect objects
+ object = QPDFObjectHandle::Factory::newIndirect(
+ this,
+ olist[olist.size() - 2].getIntValue(),
+ olist[olist.size() - 1].getIntValue());
+ olist.pop_back();
+ olist.pop_back();
+ }
+ else
+ {
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "unknown token while reading object (" +
+ value + ")");
+ }
+ }
+ break;
+
+ case QPDFTokenizer::tt_string:
+ {
+ std::string val = token.getValue();
+ if (this->encrypted && (! in_object_stream))
+ {
+ decryptString(val, objid, generation);
+ }
+ object = QPDFObjectHandle::newString(val);
+ }
+ break;
+
+ default:
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "unknown token type while reading object");
+ break;
+ }
+
+ if (in_dictionary || in_array)
+ {
+ if (! done)
+ {
+ olist.push_back(object);
+ }
+ }
+ else if (! object.isInitialized())
+ {
+ throw QEXC::Internal(std::string("uninitialized object (token = ") +
+ QUtil::int_to_string(token.getType()) +
+ ", " + token.getValue() + ")");
+ }
+ else
+ {
+ done = true;
+ }
+ }
+
+ if (in_array)
+ {
+ object = QPDFObjectHandle::newArray(olist);
+ }
+ else if (in_dictionary)
+ {
+ // Convert list to map. Alternating elements are keys.
+ std::map<std::string, QPDFObjectHandle> dict;
+ if (olist.size() % 2)
+ {
+ QTC::TC("qpdf", "QPDF dictionary odd number of elements");
+ throw QPDFExc(
+ input->getName(), input->getLastOffset(),
+ "dictionary ending here has an odd number of elements");
+ }
+ for (unsigned int i = 0; i < olist.size(); i += 2)
+ {
+ QPDFObjectHandle key_obj = olist[i];
+ QPDFObjectHandle val = olist[i + 1];
+ if (! key_obj.isName())
+ {
+ throw QPDFExc(
+ input->getName(), offset,
+ std::string("dictionary key not name (") +
+ key_obj.unparse() + ")");
+ }
+ dict[key_obj.getName()] = val;
+ }
+ object = QPDFObjectHandle::newDictionary(dict);
+
+ if (! in_object_stream)
+ {
+ // check for stream
+ off_t cur_offset = input->tell();
+ if (readToken(input) ==
+ QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
+ {
+ // Kill to next actual newline. Do not use readLine()
+ // here -- streams are a special case. The next
+ // single newline character marks the end of the
+ // stream token. It is incorrect to strip subsequent
+ // carriage returns or newlines as they may be part of
+ // the stream.
+ {
+ char ch;
+ do
+ {
+ if (input->read(&ch, 1) == 0)
+ {
+ // A premature EOF here will result in
+ // some other problem that will get
+ // reported at another time.
+ ch = '\n';
+ }
+ } while (ch != '\n');
+ }
+
+ // Must get offset before accessing any additional
+ // objects since resolving a previously unresolved
+ // indirect object will change file position.
+ off_t stream_offset = input->tell();
+ int length = 0;
+
+ try
+ {
+ if (dict.count("/Length") == 0)
+ {
+ QTC::TC("qpdf", "QPDF stream without length");
+ throw QPDFExc(input->getName(), offset,
+ "stream dictionary lacks /Length key");
+ }
+
+ QPDFObjectHandle length_obj = dict["/Length"];
+ if (! length_obj.isInteger())
+ {
+ QTC::TC("qpdf", "QPDF stream length not integer");
+ throw QPDFExc(input->getName(), offset,
+ "/Length key in stream dictionary is not "
+ "an integer");
+ }
+
+ length = length_obj.getIntValue();
+ input->seek(stream_offset + length, SEEK_SET);
+ if (! (readToken(input) ==
+ QPDFTokenizer::Token(
+ QPDFTokenizer::tt_word, "endstream")))
+ {
+ QTC::TC("qpdf", "QPDF missing endstream");
+ throw QPDFExc(input->getName(), input->getLastOffset(),
+ "expected endstream");
+ }
+ }
+ catch (QPDFExc& e)
+ {
+ if (this->attempt_recovery)
+ {
+ // may throw an exception
+ length = recoverStreamLength(
+ input, objid, generation, stream_offset);
+ }
+ else
+ {
+ throw e;
+ }
+ }
+ object = QPDFObjectHandle::Factory::newStream(
+ this, objid, generation, object, stream_offset, length);
+ }
+ else
+ {
+ input->seek(cur_offset, SEEK_SET);
+ }
+ }
+ }
+
+ return object;
+}
+
+int
+QPDF::recoverStreamLength(InputSource* input,
+ int objid, int generation, off_t stream_offset)
+{
+ static PCRE endobj_re("^endobj\\b");
+
+ // Try to reconstruct stream length by looking for
+ // endstream(\r\n?|\n)endobj
+ warn(QPDFExc(input->getName(), stream_offset,
+ "attempting to recover stream length"));
+
+ input->seek(0, SEEK_END);
+ off_t eof = input->tell();
+ input->seek(stream_offset, SEEK_SET);
+ std::string last_line;
+ off_t last_line_offset = 0;
+ int length = 0;
+ while (input->tell() < eof)
+ {
+ std::string line = input->readLine();
+ // Can't use regexp last_line since it might contain nulls
+ if (endobj_re.match(line.c_str()) &&
+ (last_line.length() >= 9) &&
+ (last_line.substr(last_line.length() - 9, 9) == "endstream"))
+ {
+ // Stream probably ends right before "endstream", which
+ // contains 9 characters.
+ length = last_line_offset + last_line.length() - 9 - stream_offset;
+ // Go back to where we would have been if we had just read
+ // the endstream.
+ input->seek(input->getLastOffset(), SEEK_SET);
+ break;
+ }
+ last_line = line;
+ last_line_offset = input->getLastOffset();
+ }
+
+ if (length)
+ {
+ int this_obj_offset = 0;
+ ObjGen this_obj(0, 0);
+
+ // Make sure this is inside this object
+ for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+ this->xref_table.begin();
+ iter != this->xref_table.end(); ++iter)
+ {
+ ObjGen const& og = (*iter).first;
+ QPDFXRefEntry const& entry = (*iter).second;
+ if (entry.getType() == 1)
+ {
+ int obj_offset = entry.getOffset();
+ if ((obj_offset > stream_offset) &&
+ ((this_obj_offset == 0) ||
+ (this_obj_offset > obj_offset)))
+ {
+ this_obj_offset = obj_offset;
+ this_obj = og;
+ }
+ }
+ }
+ if (this_obj_offset &&
+ (this_obj.obj == objid) &&
+ (this_obj.gen == generation))
+ {
+ // Well, we found endstream\nendobj within the space
+ // allowed for this object, so we're probably in good
+ // shape.
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
+ }
+ }
+
+ if (length == 0)
+ {
+ throw QPDFExc(input->getName(), stream_offset,
+ "unable to recover stream data");
+ }
+
+ QTC::TC("qpdf", "QPDF recovered stream length");
+ return length;
+}
+
+QPDFTokenizer::Token
+QPDF::readToken(InputSource* input)
+{
+ off_t offset = input->tell();
+ QPDFTokenizer::Token token;
+ bool unread_char;
+ char char_to_unread;
+ while (! this->tokenizer.getToken(token, unread_char, char_to_unread))
+ {
+ char ch;
+ if (input->read(&ch, 1) == 0)
+ {
+ throw QPDFExc(input->getName(), offset, "EOF while reading token");
+ }
+ else
+ {
+ if (isspace(ch) && (input->getLastOffset() == offset))
+ {
+ ++offset;
+ }
+ this->tokenizer.presentCharacter(ch);
+ }
+ }
+
+ if (unread_char)
+ {
+ input->unreadCh(char_to_unread);
+ }
+
+ if (token.getType() == QPDFTokenizer::tt_bad)
+ {
+ throw QPDFExc(input->getName(), offset, token.getErrorMessage());
+ }
+
+ input->setLastOffset(offset);
+
+ return token;
+}
+
+QPDFObjectHandle
+QPDF::readObjectAtOffset(off_t offset, int exp_objid, int exp_generation,
+ int& objid, int& generation)
+{
+ this->file.seek(offset, SEEK_SET);
+
+ QPDFTokenizer::Token tobjid = readToken(&this->file);
+ QPDFTokenizer::Token tgen = readToken(&this->file);
+ QPDFTokenizer::Token tobj = readToken(&this->file);
+
+ bool objidok = (tobjid.getType() == QPDFTokenizer::tt_integer);
+ int genok = (tgen.getType() == QPDFTokenizer::tt_integer);
+ int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"));
+
+ QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
+ QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
+ QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
+
+ try
+ {
+ if (! (objidok && genok && objok))
+ {
+ QTC::TC("qpdf", "QPDF expected n n obj");
+ throw QPDFExc(this->file.getName(), offset, "expected n n obj");
+ }
+ objid = atoi(tobjid.getValue().c_str());
+ generation = atoi(tgen.getValue().c_str());
+
+ if (exp_objid &&
+ (! ((objid == exp_objid) && (generation == exp_generation))))
+ {
+ QTC::TC("qpdf", "QPDF err wrong objid/generation");
+ throw QPDFExc(this->file.getName(), offset,
+ std::string("expected ") +
+ QUtil::int_to_string(exp_objid) + " " +
+ QUtil::int_to_string(exp_generation) + " obj");
+ }
+ }
+ catch (QPDFExc& e)
+ {
+ if (exp_objid && this->attempt_recovery)
+ {
+ // Try again after reconstructing xref table
+ reconstruct_xref(e);
+ ObjGen og(exp_objid, exp_generation);
+ if (this->xref_table.count(og) &&
+ (this->xref_table[og].getType() == 1))
+ {
+ off_t new_offset = this->xref_table[og].getOffset();
+ // Call readObjectAtOffset with 0 for exp_objid to
+ // avoid an infinite loop.
+ QPDFObjectHandle result =
+ readObjectAtOffset(new_offset, 0, 0, objid, generation);
+ QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
+ return result;
+ }
+ }
+ else
+ {
+ throw e;
+ }
+ }
+
+ QPDFObjectHandle oh = readObject(
+ &this->file, objid, generation, false);
+
+ if (! (readToken(&this->file) ==
+ QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj")))
+ {
+ QTC::TC("qpdf", "QPDF err expected endobj");
+ warn(QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "expected endobj"));
+ }
+
+ ObjGen og(objid, generation);
+ if (! this->obj_cache.count(og))
+ {
+ // Store the object in the cache here so it gets cached
+ // whether we first know the offset or whether we first know
+ // the object ID and generation (in which we case we would get
+ // here through resolve).
+
+ // Determine the end offset of this object before and after
+ // white space. We use these numbers to validate
+ // linearization hint tables. Offsets and lengths of objects
+ // may imply the end of an object to be anywhere between these
+ // values.
+ off_t end_before_space = this->file.tell();
+
+ // skip over spaces
+ while (true)
+ {
+ char ch;
+ if (this->file.read(&ch, 1))
+ {
+ if (! isspace(ch))
+ {
+ this->file.seek(-1, SEEK_CUR);
+ break;
+ }
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName(), offset,
+ "EOF after endobj");
+ }
+ }
+ off_t end_after_space = this->file.tell();
+
+ this->obj_cache[og] =
+ ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh),
+ end_before_space, end_after_space);
+ }
+
+ return oh;
+}
+
+PointerHolder<QPDFObject>
+QPDF::resolve(int objid, int generation)
+{
+ // Check object cache before checking xref table. This allows us
+ // to insert things into the object cache that don't actually
+ // exist in the file.
+ ObjGen og(objid, generation);
+ if (! this->obj_cache.count(og))
+ {
+ if (! this->xref_table.count(og))
+ {
+ // PDF spec says unknown objects resolve to the null object.
+ return new QPDF_Null;
+ }
+
+ QPDFXRefEntry const& entry = this->xref_table[og];
+ switch (entry.getType())
+ {
+ case 1:
+ {
+ off_t offset = entry.getOffset();
+ // Object stored in cache by readObjectAtOffset
+ int aobjid;
+ int ageneration;
+ QPDFObjectHandle oh =
+ readObjectAtOffset(offset, objid, generation,
+ aobjid, ageneration);
+ }
+ break;
+
+ case 2:
+ resolveObjectsInStream(entry.getObjStreamNumber());
+ break;
+
+ default:
+ throw QPDFExc(this->file.getName(), 0,
+ "object " +
+ QUtil::int_to_string(objid) + "/" +
+ QUtil::int_to_string(generation) +
+ " has unexpected xref entry type");
+ }
+ }
+
+ return this->obj_cache[og].object;
+}
+
+void
+QPDF::resolveObjectsInStream(int obj_stream_number)
+{
+ // Force resolution of object stream
+ QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);
+ if (! obj_stream.isStream())
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "supposed object stream " +
+ QUtil::int_to_string(obj_stream_number) +
+ " is not a stream");
+ }
+
+ // For linearization data in the object, use the data from the
+ // object stream for the objects in the stream.
+ ObjGen stream_og(obj_stream_number, 0);
+ off_t end_before_space = this->obj_cache[stream_og].end_before_space;
+ off_t end_after_space = this->obj_cache[stream_og].end_after_space;
+
+ QPDFObjectHandle dict = obj_stream.getDict();
+ if (! (dict.getKey("/Type").isName() &&
+ dict.getKey("/Type").getName() == "/ObjStm"))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "supposed object stream " +
+ QUtil::int_to_string(obj_stream_number) +
+ " has wrong type");
+ }
+
+ if (! (dict.getKey("/N").isInteger() &&
+ dict.getKey("/First").isInteger()))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "object stream " +
+ QUtil::int_to_string(obj_stream_number) +
+ " has incorrect keys");
+ }
+
+ int n = dict.getKey("/N").getIntValue();
+ int first = dict.getKey("/First").getIntValue();
+
+ std::map<int, int> offsets;
+
+ PointerHolder<Buffer> bp = obj_stream.getStreamData();
+ BufferInputSource input(
+ "object stream " + QUtil::int_to_string(obj_stream_number),
+ bp.getPointer());
+
+ for (int i = 0; i < n; ++i)
+ {
+ QPDFTokenizer::Token tnum = readToken(&input);
+ QPDFTokenizer::Token toffset = readToken(&input);
+ if (! ((tnum.getType() == QPDFTokenizer::tt_integer) &&
+ (toffset.getType() == QPDFTokenizer::tt_integer)))
+ {
+ throw QPDFExc(input.getName(), input.getLastOffset(),
+ "expected integer in object stream header");
+ }
+
+ int num = atoi(tnum.getValue().c_str());
+ int offset = atoi(toffset.getValue().c_str());
+ offsets[num] = offset + first;
+ }
+
+ for (std::map<int, int>::iterator iter = offsets.begin();
+ iter != offsets.end(); ++iter)
+ {
+ int obj = (*iter).first;
+ int offset = (*iter).second;
+ input.seek(offset, SEEK_SET);
+ QPDFObjectHandle oh = readObject(&input, obj, 0, true);
+
+ // Store in cache
+ ObjGen og(obj, 0);
+
+ this->obj_cache[og] =
+ ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh),
+ end_before_space, end_after_space);
+ }
+}
+
+QPDFObjectHandle
+QPDF::makeIndirectObject(QPDFObjectHandle oh)
+{
+ ObjGen o1 = (*(this->obj_cache.rbegin())).first;
+ ObjGen o2 = (*(this->xref_table.rbegin())).first;
+ QTC::TC("qpdf", "QPDF indirect last obj from xref",
+ (o2.obj > o1.obj) ? 1 : 0);
+ int max_objid = std::max(o1.obj, o2.obj);
+ ObjGen next(max_objid + 1, 0);
+ this->obj_cache[next] =
+ ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
+ return QPDFObjectHandle::Factory::newIndirect(this, next.obj, next.gen);
+}
+
+QPDFObjectHandle
+QPDF::getObjectByID(int objid, int generation)
+{
+ return QPDFObjectHandle::Factory::newIndirect(this, objid, generation);
+}
+
+void
+QPDF::trimTrailerForWrite()
+{
+ // Note that removing the encryption dictionary does not interfere
+ // with reading encrypted files. QPDF loads all the information
+ // it needs from the encryption dictionary at the beginning and
+ // never looks at it again.
+ this->trailer.removeKey("/ID");
+ this->trailer.removeKey("/Encrypt");
+ this->trailer.removeKey("/Prev");
+
+ // Remove all trailer keys that potentially come from a
+ // cross-reference stream
+ this->trailer.removeKey("/Index");
+ this->trailer.removeKey("/W");
+ this->trailer.removeKey("/Length");
+ this->trailer.removeKey("/Filter");
+ this->trailer.removeKey("/DecodeParms");
+ this->trailer.removeKey("/Type");
+ this->trailer.removeKey("/XRefStm");
+}
+
+std::string
+QPDF::getFilename() const
+{
+ return this->file.getName();
+}
+
+std::string
+QPDF::getPDFVersion() const
+{
+ return this->pdf_version;
+}
+
+QPDFObjectHandle
+QPDF::getTrailer()
+{
+ return this->trailer;
+}
+
+QPDFObjectHandle
+QPDF::getRoot()
+{
+ return this->trailer.getKey("/Root");
+}
+
+void
+QPDF::getObjectStreamData(std::map<int, int>& omap)
+{
+ for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
+ this->xref_table.begin();
+ iter != this->xref_table.end(); ++iter)
+ {
+ ObjGen const& og = (*iter).first;
+ QPDFXRefEntry const& entry = (*iter).second;
+ if (entry.getType() == 2)
+ {
+ omap[og.obj] = entry.getObjStreamNumber();
+ }
+ }
+}
+
+std::vector<int>
+QPDF::getCompressibleObjects()
+{
+ // Return a set of object numbers of objects that are allowed to
+ // be in object streams. We disregard generation numbers here
+ // since this is a helper function for QPDFWriter which is going
+ // to renumber objects anyway. This code will do weird things if
+ // we have two objects with the same object number and different
+ // generations, but so do virtually all PDF consumers,
+ // particularly since this is not a permitted condition.
+
+ // We walk through the objects by traversing the document from the
+ // root, including a traversal of the pages tree. This makes that
+ // objects that are on the same page are more likely to be in the
+ // same object stream, which is slightly more efficient,
+ // particularly with linearized files. This is better than
+ // iterating through the xref table since it avoids preserving
+ // orphaned items.
+
+ // Exclude encryption dictionary, if any
+ int encryption_dict_id = 0;
+ QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
+ if (encryption_dict.isIndirect())
+ {
+ encryption_dict_id = encryption_dict.getObjectID();
+ }
+
+ std::set<int> visited;
+ std::list<QPDFObjectHandle> queue;
+ queue.push_front(this->trailer);
+ std::vector<int> result;
+ while (! queue.empty())
+ {
+ QPDFObjectHandle obj = queue.front();
+ queue.pop_front();
+ if (obj.isIndirect())
+ {
+ int objid = obj.getObjectID();
+ if (visited.count(objid))
+ {
+ QTC::TC("qpdf", "QPDF loop detected traversing objects");
+ continue;
+ }
+ if (objid == encryption_dict_id)
+ {
+ QTC::TC("qpdf", "QPDF exclude encryption dictionary");
+ }
+ else if (! obj.isStream())
+ {
+ result.push_back(objid);
+ }
+ visited.insert(objid);
+ }
+ if (obj.isStream())
+ {
+ QPDFObjectHandle dict = obj.getDict();
+ std::set<std::string> keys = dict.getKeys();
+ for (std::set<std::string>::reverse_iterator iter = keys.rbegin();
+ iter != keys.rend(); ++iter)
+ {
+ std::string const& key = *iter;
+ QPDFObjectHandle value = dict.getKey(key);
+ if (key == "/Length")
+ {
+ // omit stream lengths
+ if (value.isIndirect())
+ {
+ QTC::TC("qpdf", "QPDF exclude indirect length");
+ }
+ }
+ else
+ {
+ queue.push_front(value);
+ }
+ }
+ }
+ else if (obj.isDictionary())
+ {
+ std::set<std::string> keys = obj.getKeys();
+ for (std::set<std::string>::reverse_iterator iter = keys.rbegin();
+ iter != keys.rend(); ++iter)
+ {
+ queue.push_front(obj.getKey(*iter));
+ }
+ }
+ else if (obj.isArray())
+ {
+ int n = obj.getArrayNItems();
+ for (int i = 1; i <= n; ++i)
+ {
+ queue.push_front(obj.getArrayItem(n - i));
+ }
+ }
+ }
+
+ return result;
+}
+
+void
+QPDF::pipeStreamData(int objid, int generation,
+ off_t offset, size_t length,
+ QPDFObjectHandle stream_dict,
+ Pipeline* pipeline)
+{
+ std::vector<PointerHolder<Pipeline> > to_delete;
+ if (this->encrypted)
+ {
+ bool xref_stream = false;
+ if (stream_dict.getKey("/Type").isName() &&
+ (stream_dict.getKey("/Type").getName() == "/XRef"))
+ {
+ QTC::TC("qpdf", "QPDF piping xref stream from encrypted file");
+ xref_stream = true;
+ }
+ if (! xref_stream)
+ {
+ decryptStream(pipeline, objid, generation, to_delete);
+ }
+ }
+
+ this->file.seek(offset, SEEK_SET);
+ char buf[10240];
+ while (length > 0)
+ {
+ size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
+ size_t len = this->file.read(buf, to_read);
+ if (len == 0)
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "unexpected EOF reading stream data");
+ }
+ length -= len;
+ pipeline->write((unsigned char*)buf, len);
+ }
+ pipeline->finish();
+}
+
+std::vector<QPDFObjectHandle> const&
+QPDF::getAllPages()
+{
+ if (this->all_pages.empty())
+ {
+ getAllPagesInternal(
+ this->trailer.getKey("/Root").getKey("/Pages"), this->all_pages);
+ }
+ return this->all_pages;
+}
+
+void
+QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
+ std::vector<QPDFObjectHandle>& result)
+{
+ std::string type = cur_pages.getKey("/Type").getName();
+ if (type == "/Pages")
+ {
+ QPDFObjectHandle kids = cur_pages.getKey("/Kids");
+ int n = kids.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ getAllPagesInternal(kids.getArrayItem(i), result);
+ }
+ }
+ else if (type == "/Page")
+ {
+ result.push_back(cur_pages);
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName() + ": invalid Type in page tree");
+ }
+}
diff --git a/libqpdf/QPDFExc.cc b/libqpdf/QPDFExc.cc
new file mode 100644
index 00000000..c7270677
--- /dev/null
+++ b/libqpdf/QPDFExc.cc
@@ -0,0 +1,20 @@
+
+#include <qpdf/QPDFExc.hh>
+
+#include <qpdf/QUtil.hh>
+
+QPDFExc::QPDFExc(std::string const& message) :
+ QEXC::General(message)
+{
+}
+
+QPDFExc::QPDFExc(std::string const& filename, int offset,
+ std::string const& message) :
+ QEXC::General(filename + ": offset " + QUtil::int_to_string(offset) +
+ ": " + message)
+{
+}
+
+QPDFExc::~QPDFExc() throw ()
+{
+}
diff --git a/libqpdf/QPDFObject.cc b/libqpdf/QPDFObject.cc
new file mode 100644
index 00000000..6c4963e2
--- /dev/null
+++ b/libqpdf/QPDFObject.cc
@@ -0,0 +1,2 @@
+
+#include <qpdf/QPDFObject.hh>
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
new file mode 100644
index 00000000..9fba7b43
--- /dev/null
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -0,0 +1,637 @@
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDF_Bool.hh>
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Integer.hh>
+#include <qpdf/QPDF_Real.hh>
+#include <qpdf/QPDF_Name.hh>
+#include <qpdf/QPDF_String.hh>
+#include <qpdf/QPDF_Array.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+#include <qpdf/QPDF_Stream.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QUtil.hh>
+
+QPDFObjectHandle::QPDFObjectHandle() :
+ initialized(false),
+ objid(0),
+ generation(0)
+{
+}
+
+QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, int objid, int generation) :
+ initialized(true),
+ qpdf(qpdf),
+ objid(objid),
+ generation(generation)
+{
+}
+
+QPDFObjectHandle::QPDFObjectHandle(QPDFObject* data) :
+ initialized(true),
+ qpdf(0),
+ objid(0),
+ generation(0),
+ obj(data)
+{
+}
+
+bool
+QPDFObjectHandle::isInitialized() const
+{
+ return this->initialized;
+}
+
+template <class T>
+class QPDFObjectTypeAccessor
+{
+ public:
+ static bool check(QPDFObject* o)
+ {
+ return (o && dynamic_cast<T*>(o));
+ }
+};
+
+bool
+QPDFObjectHandle::isBool()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Bool>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isNull()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isInteger()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Integer>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isReal()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Real>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isNumber()
+{
+ return (isInteger() || isReal());
+}
+
+double
+QPDFObjectHandle::getNumericValue()
+{
+ double result = 0.0;
+ if (isInteger())
+ {
+ result = getIntValue();
+ }
+ else if (isReal())
+ {
+ result = atof(getRealValue().c_str());
+ }
+ else
+ {
+ throw QEXC::Internal("getNumericValue called for non-numeric object");
+ }
+ return result;
+}
+
+bool
+QPDFObjectHandle::isName()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Name>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isString()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_String>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isArray()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Array>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isDictionary()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isStream()
+{
+ dereference();
+ return QPDFObjectTypeAccessor<QPDF_Stream>::check(obj.getPointer());
+}
+
+bool
+QPDFObjectHandle::isIndirect()
+{
+ assertInitialized();
+ return (this->objid != 0);
+}
+
+bool
+QPDFObjectHandle::isScalar()
+{
+ return (! (isArray() || isDictionary() || isStream()));
+}
+
+// Bool accessors
+
+bool
+QPDFObjectHandle::getBoolValue()
+{
+ assertType("Boolean", isBool());
+ return dynamic_cast<QPDF_Bool*>(obj.getPointer())->getVal();
+}
+
+// Integer accessors
+
+int
+QPDFObjectHandle::getIntValue()
+{
+ assertType("Integer", isInteger());
+ return dynamic_cast<QPDF_Integer*>(obj.getPointer())->getVal();
+}
+
+// Real accessors
+
+std::string
+QPDFObjectHandle::getRealValue()
+{
+ assertType("Real", isReal());
+ return dynamic_cast<QPDF_Real*>(obj.getPointer())->getVal();
+}
+
+// Name acessors
+
+std::string
+QPDFObjectHandle::getName()
+{
+ assertType("Name", isName());
+ return dynamic_cast<QPDF_Name*>(obj.getPointer())->getName();
+}
+
+// String accessors
+
+std::string
+QPDFObjectHandle::getStringValue()
+{
+ assertType("String", isString());
+ return dynamic_cast<QPDF_String*>(obj.getPointer())->getVal();
+}
+
+std::string
+QPDFObjectHandle::getUTF8Value()
+{
+ assertType("String", isString());
+ return dynamic_cast<QPDF_String*>(obj.getPointer())->getUTF8Val();
+}
+
+// Array acessors
+
+int
+QPDFObjectHandle::getArrayNItems()
+{
+ assertType("Array", isArray());
+ return dynamic_cast<QPDF_Array*>(obj.getPointer())->getNItems();
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::getArrayItem(int n)
+{
+ assertType("Array", isArray());
+ return dynamic_cast<QPDF_Array*>(obj.getPointer())->getItem(n);
+}
+
+// Array mutators
+
+void
+QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item)
+{
+ assertType("Array", isArray());
+ return dynamic_cast<QPDF_Array*>(obj.getPointer())->setItem(n, item);
+}
+
+// Dictionary accesors
+
+bool
+QPDFObjectHandle::hasKey(std::string const& key)
+{
+ assertType("Dictionary", isDictionary());
+ return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->hasKey(key);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::getKey(std::string const& key)
+{
+ assertType("Dictionary", isDictionary());
+ return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKey(key);
+}
+
+std::set<std::string>
+QPDFObjectHandle::getKeys()
+{
+ assertType("Dictionary", isDictionary());
+ return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKeys();
+}
+
+// Dictionary mutators
+
+void
+QPDFObjectHandle::replaceKey(std::string const& key,
+ QPDFObjectHandle const& value)
+{
+ assertType("Dictionary", isDictionary());
+ return dynamic_cast<QPDF_Dictionary*>(
+ obj.getPointer())->replaceKey(key, value);
+}
+
+void
+QPDFObjectHandle::removeKey(std::string const& key)
+{
+ assertType("Dictionary", isDictionary());
+ return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->removeKey(key);
+}
+
+// Stream accessors
+QPDFObjectHandle
+QPDFObjectHandle::getDict()
+{
+ assertType("Stream", isStream());
+ return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
+}
+
+PointerHolder<Buffer>
+QPDFObjectHandle::getStreamData()
+{
+ assertType("Stream", isStream());
+ return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData();
+}
+
+bool
+QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
+ bool normalize, bool compress)
+{
+ assertType("Stream", isStream());
+ return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
+ p, filter, normalize, compress);
+}
+
+int
+QPDFObjectHandle::getObjectID() const
+{
+ return this->objid;
+}
+
+int
+QPDFObjectHandle::getGeneration() const
+{
+ return this->generation;
+}
+
+std::map<std::string, QPDFObjectHandle>
+QPDFObjectHandle::getPageImages()
+{
+ assertPageObject();
+
+ // Note: this code doesn't handle inherited resources. If this
+ // page dictionary doesn't have a /Resources key or has one whose
+ // value is null or an empty dictionary, you are supposed to walk
+ // up the page tree until you find a /Resources dictionary. As of
+ // this writing, I don't have any test files that use inherited
+ // resources, and hand-generating one won't be a good test beacuse
+ // any mistakes in my understanding would be present in both the
+ // code and the test file.
+
+ // NOTE: If support of inherited resources (see above comment) is
+ // implemented, edit comment in QPDFObjectHandle.hh for this
+ // function.
+
+ std::map<std::string, QPDFObjectHandle> result;
+ if (this->hasKey("/Resources"))
+ {
+ QPDFObjectHandle resources = this->getKey("/Resources");
+ if (resources.hasKey("/XObject"))
+ {
+ QPDFObjectHandle xobject = resources.getKey("/XObject");
+ std::set<std::string> keys = xobject.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string key = (*iter);
+ QPDFObjectHandle value = xobject.getKey(key);
+ if (value.isStream())
+ {
+ QPDFObjectHandle dict = value.getDict();
+ if (dict.hasKey("/Subtype") &&
+ (dict.getKey("/Subtype").getName() == "/Image") &&
+ (! dict.hasKey("/ImageMask")))
+ {
+ result[key] = value;
+ }
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+std::vector<QPDFObjectHandle>
+QPDFObjectHandle::getPageContents()
+{
+ assertPageObject();
+
+ std::vector<QPDFObjectHandle> result;
+ QPDFObjectHandle contents = this->getKey("/Contents");
+ if (contents.isArray())
+ {
+ int n_items = contents.getArrayNItems();
+ for (int i = 0; i < n_items; ++i)
+ {
+ QPDFObjectHandle item = contents.getArrayItem(i);
+ if (item.isStream())
+ {
+ result.push_back(item);
+ }
+ else
+ {
+ throw QEXC::General("unknown item type while inspecting "
+ "element of /Contents array in page "
+ "dictionary");
+ }
+ }
+ }
+ else if (contents.isStream())
+ {
+ result.push_back(contents);
+ }
+ else
+ {
+ throw QEXC::General("unknown object type inspecting /Contents "
+ "key in page dictionary");
+ }
+
+ return result;
+}
+
+std::string
+QPDFObjectHandle::unparse()
+{
+ std::string result;
+ if (this->isIndirect())
+ {
+ result = QUtil::int_to_string(this->objid) + " " +
+ QUtil::int_to_string(this->generation) + " R";
+ }
+ else
+ {
+ result = unparseResolved();
+ }
+ return result;
+}
+
+std::string
+QPDFObjectHandle::unparseResolved()
+{
+ dereference();
+ return this->obj.getPointer()->unparse();
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
+{
+ return QPDFObjectHandle(qpdf, objid, generation);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newBool(bool value)
+{
+ return QPDFObjectHandle(new QPDF_Bool(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newNull()
+{
+ return QPDFObjectHandle(new QPDF_Null());
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newInteger(int value)
+{
+ return QPDFObjectHandle(new QPDF_Integer(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newReal(std::string const& value)
+{
+ return QPDFObjectHandle(new QPDF_Real(value));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newName(std::string const& name)
+{
+ return QPDFObjectHandle(new QPDF_Name(name));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newString(std::string const& str)
+{
+ return QPDFObjectHandle(new QPDF_String(str));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newArray(std::vector<QPDFObjectHandle> const& items)
+{
+ return QPDFObjectHandle(new QPDF_Array(items));
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::newDictionary(
+ std::map<std::string, QPDFObjectHandle> const& items)
+{
+ return QPDFObjectHandle(new QPDF_Dictionary(items));
+}
+
+
+QPDFObjectHandle
+QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation,
+ QPDFObjectHandle stream_dict,
+ off_t offset, int length)
+{
+ return QPDFObjectHandle(new QPDF_Stream(
+ qpdf, objid, generation,
+ stream_dict, offset, length));
+}
+
+void
+QPDFObjectHandle::makeDirectInternal(std::set<int>& visited)
+{
+ assertInitialized();
+
+ if (isStream())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle ERR clone stream");
+ throw QEXC::General("attempt to make a stream into a direct object");
+ }
+
+ int cur_objid = this->objid;
+ if (cur_objid != 0)
+ {
+ if (visited.count(cur_objid))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
+ throw QEXC::General("loop detected while converting object from "
+ "indirect to direct");
+ }
+ visited.insert(cur_objid);
+ }
+
+ dereference();
+ this->objid = 0;
+ this->generation = 0;
+
+ QPDFObject* new_obj = 0;
+
+ if (isBool())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone bool");
+ new_obj = new QPDF_Bool(getBoolValue());
+ }
+ else if (isNull())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone null");
+ new_obj = new QPDF_Null();
+ }
+ else if (isInteger())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone integer");
+ new_obj = new QPDF_Integer(getIntValue());
+ }
+ else if (isReal())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone real");
+ new_obj = new QPDF_Real(getRealValue());
+ }
+ else if (isName())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone name");
+ new_obj = new QPDF_Name(getName());
+ }
+ else if (isString())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone string");
+ new_obj = new QPDF_String(getStringValue());
+ }
+ else if (isArray())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone array");
+ std::vector<QPDFObjectHandle> items;
+ int n = getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ items.push_back(getArrayItem(i));
+ items.back().makeDirectInternal(visited);
+ }
+ new_obj = new QPDF_Array(items);
+ }
+ else if (isDictionary())
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle clone dictionary");
+ std::set<std::string> keys = getKeys();
+ std::map<std::string, QPDFObjectHandle> items;
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ items[*iter] = getKey(*iter);
+ items[*iter].makeDirectInternal(visited);
+ }
+ new_obj = new QPDF_Dictionary(items);
+ }
+ else
+ {
+ throw QEXC::Internal("QPDFObjectHandle::makeIndirect: "
+ "unknown object type");
+ }
+
+ this->obj = new_obj;
+
+ if (cur_objid)
+ {
+ visited.erase(cur_objid);
+ }
+}
+
+void
+QPDFObjectHandle::makeDirect()
+{
+ std::set<int> visited;
+ makeDirectInternal(visited);
+}
+
+void
+QPDFObjectHandle::assertInitialized() const
+{
+ if (! this->initialized)
+ {
+ throw QEXC::Internal("operation attempted on uninitialized "
+ "QPDFObjectHandle");
+ }
+}
+
+void
+QPDFObjectHandle::assertType(char const* type_name, bool istype)
+{
+ if (! istype)
+ {
+ throw QEXC::Internal(std::string("operation for ") + type_name +
+ " object attempted on object of wrong type");
+ }
+}
+
+void
+QPDFObjectHandle::assertPageObject()
+{
+ if (! (this->isDictionary() && this->hasKey("/Type") &&
+ (this->getKey("/Type").getName() == "/Page")))
+ {
+ throw QEXC::Internal("page operation called on non-Page object");
+ }
+}
+
+void
+QPDFObjectHandle::dereference()
+{
+ if (this->obj.getPointer() == 0)
+ {
+ this->obj = QPDF::Resolver::resolve(
+ this->qpdf, this->objid, this->generation);
+ if (this->obj.getPointer() == 0)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle indirect to unknown");
+ this->obj = new QPDF_Null();
+ }
+ }
+}
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
new file mode 100644
index 00000000..4eed6f16
--- /dev/null
+++ b/libqpdf/QPDFTokenizer.cc
@@ -0,0 +1,458 @@
+
+#include <qpdf/QPDFTokenizer.hh>
+
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+
+#include <qpdf/PCRE.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/QTC.hh>
+
+// See note above about ctype.
+static bool is_hex_digit(char ch)
+{
+ return (strchr("0123456789abcdefABCDEF", ch) != 0);
+}
+
+QPDFTokenizer::QPDFTokenizer() :
+ pound_special_in_name(true)
+{
+ reset();
+}
+
+void
+QPDFTokenizer::allowPoundAnywhereInName()
+{
+ QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name");
+ this->pound_special_in_name = false;
+}
+
+void
+QPDFTokenizer::reset()
+{
+ state = st_top;
+ type = tt_bad;
+ val = "";
+ raw_val = "";
+ error_message = "";
+ unread_char = false;
+ char_to_unread = '\0';
+ string_depth = 0;
+ string_ignoring_newline = false;
+ last_char_was_bs = false;
+}
+
+void
+QPDFTokenizer::presentCharacter(char ch)
+{
+ static PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$");
+
+ if (state == st_token_ready)
+ {
+ throw QEXC::Internal("QPDF tokenizer presented character "
+ "while token is waiting");
+ }
+
+ char orig_ch = ch;
+
+ // State machine is implemented such that some characters may be
+ // handled more than once. This happens whenever you have to use
+ // the character that caused a state change in the new state.
+
+ bool handled = true;
+ if (state == st_top)
+ {
+ // Note: we specifically do not use ctype here. It is
+ // locale-dependent.
+ if (strchr(" \t\n\v\f\r", ch))
+ {
+ // ignore
+ }
+ else if (ch == '%')
+ {
+ // Discard comments
+ state = st_in_comment;
+ }
+ else if (ch == '(')
+ {
+ string_depth = 1;
+ string_ignoring_newline = false;
+ memset(bs_num_register, '\0', sizeof(bs_num_register));
+ last_char_was_bs = false;
+ state = st_in_string;
+ }
+ else if (ch == '<')
+ {
+ state = st_lt;
+ }
+ else if (ch == '>')
+ {
+ state = st_gt;
+ }
+ else
+ {
+ val += ch;
+ if (ch == ')')
+ {
+ type = tt_bad;
+ QTC::TC("qpdf", "QPDF_Tokenizer bad )");
+ error_message = "unexpected )";
+ state = st_token_ready;
+ }
+ else if (ch == '[')
+ {
+ type = tt_array_open;
+ state = st_token_ready;
+ }
+ else if (ch == ']')
+ {
+ type = tt_array_close;
+ state = st_token_ready;
+ }
+ else if (ch == '{')
+ {
+ type = tt_brace_open;
+ state = st_token_ready;
+ }
+ else if (ch == '}')
+ {
+ type = tt_brace_close;
+ state = st_token_ready;
+ }
+ else
+ {
+ state = st_literal;
+ }
+ }
+ }
+ else if (state == st_in_comment)
+ {
+ if ((ch == '\r') || (ch == '\n'))
+ {
+ state = st_top;
+ }
+ }
+ else if (state == st_lt)
+ {
+ if (ch == '<')
+ {
+ val = "<<";
+ type = tt_dict_open;
+ state = st_token_ready;
+ }
+ else
+ {
+ handled = false;
+ state = st_in_hexstring;
+ }
+ }
+ else if (state == st_gt)
+ {
+ if (ch == '>')
+ {
+ val = ">>";
+ type = tt_dict_close;
+ state = st_token_ready;
+ }
+ else
+ {
+ val = ">";
+ type = tt_bad;
+ QTC::TC("qpdf", "QPDF_Tokenizer bad >");
+ error_message = "unexpected >";
+ unread_char = true;
+ char_to_unread = ch;
+ state = st_token_ready;
+ }
+ }
+ else if (state == st_in_string)
+ {
+ if (string_ignoring_newline && (! ((ch == '\r') || (ch == '\n'))))
+ {
+ string_ignoring_newline = false;
+ }
+
+ unsigned int bs_num_count = strlen(bs_num_register);
+ bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
+ if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal)))
+ {
+ // We've accumulated \ddd. PDF Spec says to ignore
+ // high-order overflow.
+ val += (char) strtol(bs_num_register, 0, 8);
+ memset(bs_num_register, '\0', sizeof(bs_num_register));
+ bs_num_count = 0;
+ }
+
+ if (string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
+ {
+ // ignore
+ }
+ else if (ch_is_octal && (last_char_was_bs || (bs_num_count > 0)))
+ {
+ bs_num_register[bs_num_count++] = ch;
+ }
+ else if (last_char_was_bs)
+ {
+ switch (ch)
+ {
+ case 'n':
+ val += '\n';
+ break;
+
+ case 'r':
+ val += '\r';
+ break;
+
+ case 't':
+ val += '\t';
+ break;
+
+ case 'b':
+ val += '\b';
+ break;
+
+ case 'f':
+ val += '\f';
+ break;
+
+ case '\r':
+ case '\n':
+ string_ignoring_newline = true;
+ break;
+
+ default:
+ // PDF spec says backslash is ignored before anything else
+ val += ch;
+ break;
+ }
+ }
+ else if (ch == '\\')
+ {
+ // last_char_was_bs is set/cleared below as appropriate
+ if (bs_num_count)
+ {
+ throw QEXC::Internal("QPDFTokenizer: bs_num_count != 0 "
+ "when ch == '\\'");
+ }
+ }
+ else if (ch == '(')
+ {
+ val += ch;
+ ++string_depth;
+ }
+ else if ((ch == ')') && (--string_depth == 0))
+ {
+ type = tt_string;
+ state = st_token_ready;
+ }
+ else
+ {
+ val += ch;
+ }
+
+ last_char_was_bs = ((! last_char_was_bs) && (ch == '\\'));
+ }
+ else if (state == st_literal)
+ {
+ if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
+ {
+ // A C-loacle whitespace character or delimiter terminates
+ // token. It is important to unread the whitespace
+ // character even though it is ignored since it may be the
+ // newline after a stream keyword. Removing it here could
+ // make the stream-reading code break on some files,
+ // though not on any files in the test suite as of this
+ // writing.
+
+ type = tt_word;
+ unread_char = true;
+ char_to_unread = ch;
+ state = st_token_ready;
+ }
+ else
+ {
+ val += ch;
+ }
+ }
+ else
+ {
+ handled = false;
+ }
+
+
+ if (handled)
+ {
+ // okay
+ }
+ else if (state == st_in_hexstring)
+ {
+ if (ch == '>')
+ {
+ type = tt_string;
+ state = st_token_ready;
+ if (val.length() % 2)
+ {
+ // PDF spec says odd hexstrings have implicit
+ // trailing 0.
+ val += '0';
+ }
+ char num[3];
+ num[2] = '\0';
+ std::string nval;
+ for (unsigned int i = 0; i < val.length(); i += 2)
+ {
+ num[0] = val[i];
+ num[1] = val[i+1];
+ char nch = (char)(strtol(num, 0, 16));
+ nval += nch;
+ }
+ val = nval;
+ }
+ else if (is_hex_digit(ch))
+ {
+ val += ch;
+ }
+ else if (strchr(" \t\n\v\f\r", ch))
+ {
+ // ignore
+ }
+ else
+ {
+ type = tt_bad;
+ QTC::TC("qpdf", "QPDF_Tokenizer bad (");
+ error_message = std::string("invalid character (") +
+ ch + ") in hexstring";
+ state = st_token_ready;
+ }
+ }
+ else
+ {
+ throw QEXC::Internal("invalid state while reading token");
+ }
+
+ if ((state == st_token_ready) && (type == tt_word))
+ {
+ if ((val.length() > 0) && (val[0] == '/'))
+ {
+ type = tt_name;
+ // Deal with # in name token. Note: '/' by itself is a
+ // valid name, so don't strip leading /. That way we
+ // don't have to deal with the empty string as a name.
+ std::string nval = "/";
+ char const* valstr = val.c_str() + 1;
+ for (char const* p = valstr; *p; ++p)
+ {
+ if ((*p == '#') && this->pound_special_in_name)
+ {
+ if (p[1] && p[2] &&
+ is_hex_digit(p[1]) && is_hex_digit(p[2]))
+ {
+ char num[3];
+ num[0] = p[1];
+ num[1] = p[2];
+ num[2] = '\0';
+ char ch = (char)(strtol(num, 0, 16));
+ if (ch == '\0')
+ {
+ type = tt_bad;
+ QTC::TC("qpdf", "QPDF_Tokenizer null in name");
+ error_message =
+ "null character not allowed in name token";
+ nval += "#00";
+ }
+ else
+ {
+ nval += ch;
+ }
+ p += 2;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF_Tokenizer bad name");
+ type = tt_bad;
+ error_message = "invalid name token";
+ nval += *p;
+ }
+ }
+ else
+ {
+ nval += *p;
+ }
+ }
+ val = nval;
+ }
+ else if (num_re.match(val.c_str()))
+ {
+ if (val.find('.') != std::string::npos)
+ {
+ type = tt_real;
+ }
+ else
+ {
+ type = tt_integer;
+ }
+ }
+ else if ((val == "true") || (val == "false"))
+ {
+ type = tt_bool;
+ }
+ else if (val == "null")
+ {
+ type = tt_null;
+ }
+ else
+ {
+ // I don't really know what it is, so leave it as tt_word.
+ // Lots of cases ($, #, etc.) other than actual words fall
+ // into this category, but that's okay at least for now.
+ type = tt_word;
+ }
+ }
+
+ if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))
+ {
+ this->raw_val += orig_ch;
+ }
+}
+
+void
+QPDFTokenizer::presentEOF()
+{
+ switch (state)
+ {
+ case st_token_ready:
+ case st_top:
+ // okay
+ break;
+
+ case st_in_comment:
+ state = st_top;
+ break;
+
+ default:
+ type = tt_bad;
+ error_message = "EOF while reading token";
+ state = st_token_ready;
+ }
+}
+
+bool
+QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
+{
+ bool ready = (this->state == st_token_ready);
+ unread_char = this->unread_char;
+ ch = this->char_to_unread;
+ if (ready)
+ {
+ token = Token(type, val, raw_val, error_message);
+ reset();
+ }
+ return ready;
+}
+
+bool
+QPDFTokenizer::betweenTokens()
+{
+ return ((state == st_top) || (state == st_in_comment));
+}
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
new file mode 100644
index 00000000..0a611eb9
--- /dev/null
+++ b/libqpdf/QPDFWriter.cc
@@ -0,0 +1,2021 @@
+
+#include <qpdf/QPDFWriter.hh>
+
+#include <assert.h>
+#include <qpdf/Pl_StdioFile.hh>
+#include <qpdf/Pl_Count.hh>
+#include <qpdf/Pl_Discard.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_PNGFilter.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/MD5.hh>
+#include <qpdf/RC4.hh>
+#include <qpdf/QTC.hh>
+
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/QPDF_Name.hh>
+#include <qpdf/QPDF_String.hh>
+
+QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
+ pdf(pdf),
+ filename(filename),
+ file(0),
+ close_file(false),
+ normalize_content_set(false),
+ normalize_content(false),
+ stream_data_mode_set(false),
+ stream_data_mode(s_compress),
+ qdf_mode(false),
+ static_id(false),
+ direct_stream_lengths(true),
+ encrypted(false),
+ preserve_encryption(true),
+ linearized(false),
+ object_stream_mode(o_preserve),
+ encryption_dict_objid(0),
+ next_objid(1),
+ cur_stream_length_id(0),
+ cur_stream_length(0),
+ added_newline(false),
+ max_ostream_index(0)
+{
+ if (filename == 0)
+ {
+ this->filename = "standard output";
+ QTC::TC("qpdf", "QPDFWriter write to stdout");
+ file = stdout;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFWriter write to file");
+ file = QUtil::fopen_wrapper(std::string("open ") + filename,
+ fopen(filename, "wb+"));
+ close_file = true;
+ }
+ Pipeline* p = new Pl_StdioFile("qdf output", file);
+ to_delete.push_back(p);
+ pipeline = new Pl_Count("qdf count", p);
+ to_delete.push_back(pipeline);
+ pipeline_stack.push_back(pipeline);
+}
+
+QPDFWriter::~QPDFWriter()
+{
+ if (file)
+ {
+ fclose(file);
+ }
+}
+
+void
+QPDFWriter::setObjectStreamMode(object_stream_e mode)
+{
+ this->object_stream_mode = mode;
+}
+
+void
+QPDFWriter::setStreamDataMode(stream_data_e mode)
+{
+ this->stream_data_mode_set = true;
+ this->stream_data_mode = mode;
+}
+
+void
+QPDFWriter::setContentNormalization(bool val)
+{
+ this->normalize_content_set = true;
+ this->normalize_content = val;
+}
+
+void
+QPDFWriter::setQDFMode(bool val)
+{
+ this->qdf_mode = val;
+}
+
+void
+QPDFWriter::setStaticID(bool val)
+{
+ this->static_id = val;
+}
+
+void
+QPDFWriter::setPreserveEncryption(bool val)
+{
+ this->preserve_encryption = val;
+}
+
+void
+QPDFWriter::setLinearization(bool val)
+{
+ this->linearized = val;
+}
+
+void
+QPDFWriter::setR2EncryptionParameters(
+ char const* user_password, char const* owner_password,
+ bool allow_print, bool allow_modify,
+ bool allow_extract, bool allow_annotate)
+{
+ std::set<int> clear;
+ if (! allow_print)
+ {
+ clear.insert(3);
+ }
+ if (! allow_modify)
+ {
+ clear.insert(4);
+ }
+ if (! allow_extract)
+ {
+ clear.insert(5);
+ }
+ if (! allow_annotate)
+ {
+ clear.insert(6);
+ }
+
+ this->min_pdf_version = "1.3";
+ setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear);
+}
+
+void
+QPDFWriter::setR3EncryptionParameters(
+ char const* user_password, char const* owner_password,
+ bool allow_accessibility, bool allow_extract,
+ r3_print_e print, r3_modify_e modify)
+{
+ // Acrobat 5 security options:
+
+ // Checkboxes:
+ // Enable Content Access for the Visually Impaired
+ // Allow Content Copying and Extraction
+
+ // Allowed changes menu:
+ // None
+ // Only Document Assembly
+ // Only Form Field Fill-in or Signing
+ // Comment AUthoring, Form Field Fill-in or Signing
+ // General Editing, Comment and Form Field Authoring
+
+ // Allowed printing menu:
+ // None
+ // Low Resolution
+ // Full printing
+
+ std::set<int> clear;
+ if (! allow_accessibility)
+ {
+ clear.insert(10);
+ }
+ if (! allow_extract)
+ {
+ clear.insert(5);
+ }
+
+ // Note: these switch statements all "fall through" (no break
+ // statements). Each option clears successively more access bits.
+ switch (print)
+ {
+ case r3p_none:
+ clear.insert(3); // any printing
+
+ case r3p_low:
+ clear.insert(12); // high resolution printing
+
+ case r3p_full:
+ break;
+
+ // no default so gcc warns for missing cases
+ }
+
+ switch (modify)
+ {
+ case r3m_none:
+ clear.insert(11); // document essembly
+
+ case r3m_assembly:
+ clear.insert(9); // filling in form fields
+
+ case r3m_form:
+ clear.insert(6); // modify annotations, fill in form fields
+
+ case r3m_annotate:
+ clear.insert(4); // other modifications
+
+ case r3m_all:
+ break;
+
+ // no default so gcc warns for missing cases
+ }
+
+ this->min_pdf_version = "1.4";
+ setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
+}
+
+void
+QPDFWriter::setEncryptionParameters(
+ char const* user_password, char const* owner_password,
+ int V, int R, int key_len, std::set<int>& bits_to_clear)
+{
+ // PDF specification refers to bits with the low bit numbered 1.
+ // We have to convert this into a bit field.
+
+ // Specification always requirse bits 1 and 2 to be cleared.
+ bits_to_clear.insert(1);
+ bits_to_clear.insert(2);
+
+ unsigned long P = 0;
+ // Create the complement of P, then invert.
+ for (std::set<int>::iterator iter = bits_to_clear.begin();
+ iter != bits_to_clear.end(); ++iter)
+ {
+ P |= (1 << (*iter) - 1);
+ }
+ P = ~P;
+
+ generateID();
+ std::string O;
+ std::string U;
+ QPDF::compute_encryption_O_U(
+ user_password, owner_password, V, R, key_len, P, this->id1, O, U);
+ setEncryptionParametersInternal(
+ V, R, key_len, P, O, U, this->id1, user_password);
+}
+
+void
+QPDFWriter::copyEncryptionParameters()
+{
+ generateID();
+ QPDFObjectHandle trailer = this->pdf.getTrailer();
+ if (trailer.hasKey("/Encrypt"))
+ {
+ QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
+ int V = encrypt.getKey("/V").getIntValue();
+ int key_len = 5;
+ if (V > 1)
+ {
+ key_len = encrypt.getKey("/Length").getIntValue() / 8;
+ }
+ setEncryptionParametersInternal(
+ V,
+ encrypt.getKey("/R").getIntValue(),
+ key_len,
+ encrypt.getKey("/P").getIntValue(),
+ encrypt.getKey("/O").getStringValue(),
+ encrypt.getKey("/U").getStringValue(),
+ this->id1, // this->id1 == the other file's id1
+ pdf.getUserPassword());
+ }
+}
+
+void
+QPDFWriter::setEncryptionParametersInternal(
+ int V, int R, int key_len, long P,
+ std::string const& O, std::string const& U,
+ std::string const& id1, std::string const& user_password)
+{
+ encryption_dictionary["/Filter"] = "/Standard";
+ encryption_dictionary["/V"] = QUtil::int_to_string(V);
+ encryption_dictionary["/Length"] = QUtil::int_to_string(key_len * 8);
+ encryption_dictionary["/R"] = QUtil::int_to_string(R);
+ encryption_dictionary["/P"] = QUtil::int_to_string(P);
+ encryption_dictionary["/O"] = QPDF_String(O).unparse(true);
+ encryption_dictionary["/U"] = QPDF_String(U).unparse(true);
+ this->encrypted = true;
+ QPDF::EncryptionData encryption_data(V, R, key_len, P, O, U, this->id1);
+ this->encryption_key = QPDF::compute_encryption_key(
+ user_password, encryption_data);
+}
+
+void
+QPDFWriter::setDataKey(int objid)
+{
+ this->cur_data_key = QPDF::compute_data_key(
+ this->encryption_key, objid, 0);
+}
+
+int
+QPDFWriter::bytesNeeded(unsigned long n)
+{
+ int bytes = 0;
+ while (n)
+ {
+ ++bytes;
+ n >>= 8;
+ }
+ return bytes;
+}
+
+void
+QPDFWriter::writeBinary(unsigned long val, unsigned int bytes)
+{
+ assert(bytes <= sizeof(unsigned long));
+ unsigned char data[sizeof(unsigned long)];
+ for (unsigned int i = 0; i < bytes; ++i)
+ {
+ data[bytes - i - 1] = (unsigned char)(val & 0xff);
+ val >>= 8;
+ }
+ this->pipeline->write(data, bytes);
+}
+
+void
+QPDFWriter::writeString(std::string const& str)
+{
+ this->pipeline->write((unsigned char*)str.c_str(), str.length());
+}
+
+void
+QPDFWriter::writeBuffer(PointerHolder<Buffer>& b)
+{
+ this->pipeline->write(b.getPointer()->getBuffer(),
+ b.getPointer()->getSize());
+}
+
+void
+QPDFWriter::writeStringQDF(std::string const& str)
+{
+ if (this->qdf_mode)
+ {
+ writeString(str);
+ }
+}
+
+void
+QPDFWriter::writeStringNoQDF(std::string const& str)
+{
+ if (! this->qdf_mode)
+ {
+ writeString(str);
+ }
+}
+
+Pipeline*
+QPDFWriter::pushPipeline(Pipeline* p)
+{
+ assert(dynamic_cast<Pl_Count*>(p) == 0);
+ this->pipeline_stack.push_back(p);
+ return p;
+}
+
+void
+QPDFWriter::activatePipelineStack()
+{
+ Pl_Count* c = new Pl_Count("count", this->pipeline_stack.back());
+ this->pipeline_stack.push_back(c);
+ this->pipeline = c;
+}
+
+void
+QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp)
+{
+ assert(this->pipeline_stack.size() >= 2);
+ this->pipeline->finish();
+ assert(dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) ==
+ this->pipeline);
+ delete this->pipeline_stack.back();
+ this->pipeline_stack.pop_back();
+ while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
+ {
+ Pipeline* p = this->pipeline_stack.back();
+ this->pipeline_stack.pop_back();
+ Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
+ if (bp && buf)
+ {
+ *bp = buf->getBuffer();
+ }
+ delete p;
+ }
+ this->pipeline = dynamic_cast<Pl_Count*>(this->pipeline_stack.back());
+}
+
+void
+QPDFWriter::pushEncryptionFilter()
+{
+ if (this->encrypted && (! this->cur_data_key.empty()))
+ {
+ Pipeline* p =
+ new Pl_RC4("stream encryption", this->pipeline,
+ (unsigned char*) this->cur_data_key.c_str(),
+ this->cur_data_key.length());
+ pushPipeline(p);
+ }
+ // Must call this unconditionally so we can call popPipelineStack
+ // to balance pushEncryptionFilter().
+ activatePipelineStack();
+}
+
+void
+QPDFWriter::pushDiscardFilter()
+{
+ pushPipeline(new Pl_Discard());
+ activatePipelineStack();
+}
+
+int
+QPDFWriter::openObject(int objid)
+{
+ if (objid == 0)
+ {
+ objid = this->next_objid++;
+ }
+ this->xref[objid] = QPDFXRefEntry(1, pipeline->getCount(), 0);
+ writeString(QUtil::int_to_string(objid));
+ writeString(" 0 obj\n");
+ return objid;
+}
+
+void
+QPDFWriter::closeObject(int objid)
+{
+ // Write a newline before endobj as it makes the file easier to
+ // repair.
+ writeString("\nendobj\n");
+ writeStringQDF("\n");
+ this->lengths[objid] = pipeline->getCount() - this->xref[objid].getOffset();
+}
+
+void
+QPDFWriter::assignCompressedObjectNumbers(int objid)
+{
+ if (this->object_stream_to_objects.count(objid) == 0)
+ {
+ return;
+ }
+
+ // Reserve numbers for the objects that belong to this object
+ // stream.
+ for (std::set<int>::iterator iter =
+ this->object_stream_to_objects[objid].begin();
+ iter != this->object_stream_to_objects[objid].end();
+ ++iter)
+ {
+ obj_renumber[*iter] = next_objid++;
+ }
+}
+
+void
+QPDFWriter::enqueueObject(QPDFObjectHandle object)
+{
+ if (object.isIndirect())
+ {
+ if (object.isNull())
+ {
+ // This is a place-holder object for an object stream
+ }
+ else if (object.isScalar())
+ {
+ throw QEXC::Internal(
+ "QPDFWriter::enqueueObject: indirect scalar: " +
+ std::string(this->filename) + " " +
+ QUtil::int_to_string(object.getObjectID()) + " " +
+ QUtil::int_to_string(object.getGeneration()));
+ }
+ int objid = object.getObjectID();
+
+ if (obj_renumber.count(objid) == 0)
+ {
+ if (this->object_to_object_stream.count(objid))
+ {
+ // This is in an object stream. Don't process it
+ // here. Instead, enqueue the object stream.
+ int stream_id = this->object_to_object_stream[objid];
+ enqueueObject(this->pdf.getObjectByID(stream_id, 0));
+ }
+ else
+ {
+ object_queue.push_back(object);
+ obj_renumber[objid] = next_objid++;
+
+ if (this->object_stream_to_objects.count(objid))
+ {
+ // For linearized files, uncompressed objects go
+ // at end, and we take care of assigning numbers
+ // to them elsewhere.
+ if (! this->linearized)
+ {
+ assignCompressedObjectNumbers(objid);
+ }
+ }
+ else if ((! this->direct_stream_lengths) && object.isStream())
+ {
+ // reserve next object ID for length
+ ++next_objid;
+ }
+ }
+ }
+ }
+ else if (object.isArray())
+ {
+ int n = object.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ if (! this->linearized)
+ {
+ enqueueObject(object.getArrayItem(i));
+ }
+ }
+ }
+ else if (object.isDictionary())
+ {
+ std::set<std::string> keys = object.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ if (! this->linearized)
+ {
+ enqueueObject(object.getKey(*iter));
+ }
+ }
+ }
+ else
+ {
+ // ignore
+ }
+}
+
+void
+QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
+{
+ if (! this->linearized)
+ {
+ enqueueObject(child);
+ }
+ if (child.isIndirect())
+ {
+ if (child.isScalar())
+ {
+ throw QEXC::Internal(
+ "QPDFWriter::unparseChild: indirect scalar: " +
+ QUtil::int_to_string(child.getObjectID()) + " " +
+ QUtil::int_to_string(child.getGeneration()));
+ }
+ int old_id = child.getObjectID();
+ int new_id = obj_renumber[old_id];
+ writeString(QUtil::int_to_string(new_id));
+ writeString(" 0 R");
+ }
+ else
+ {
+ unparseObject(child, level, flags);
+ }
+}
+
+void
+QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev)
+{
+ QPDFObjectHandle trailer = pdf.getTrailer();
+ if (! xref_stream)
+ {
+ writeString("trailer <<");
+ }
+ writeStringQDF("\n");
+ if (which == t_lin_second)
+ {
+ writeString(" /Size ");
+ writeString(QUtil::int_to_string(size));
+ }
+ else
+ {
+ std::set<std::string> keys = trailer.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ writeStringQDF(" ");
+ writeStringNoQDF(" ");
+ writeString(QPDF_Name::normalizeName(key));
+ writeString(" ");
+ if (key == "/Size")
+ {
+ writeString(QUtil::int_to_string(size));
+ if (which == t_lin_first)
+ {
+ writeString(" /Prev ");
+ int pos = this->pipeline->getCount();
+ writeString(QUtil::int_to_string(prev));
+ int nspaces = pos + 11 - this->pipeline->getCount();
+ assert(nspaces >= 0);
+ for (int i = 0; i < nspaces; ++i)
+ {
+ writeString(" ");
+ }
+ }
+ }
+ else
+ {
+ unparseChild(trailer.getKey(key), 1, 0);
+ }
+ writeStringQDF("\n");
+ }
+ }
+
+ // Write ID
+ writeStringQDF(" ");
+ writeString(" /ID [");
+ writeString(QPDF_String(this->id1).unparse(true));
+ writeString(QPDF_String(this->id2).unparse(true));
+ writeString("]");
+
+ if (which != t_lin_second)
+ {
+ // Write reference to encryption dictionary
+ if (this->encrypted)
+ {
+ writeString(" /Encrypt ");
+ writeString(QUtil::int_to_string(this->encryption_dict_objid));
+ writeString(" 0 R");
+ }
+ }
+
+ writeStringQDF("\n");
+ writeStringNoQDF(" ");
+ writeString(">>");
+}
+
+void
+QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
+ unsigned int flags)
+{
+ unparseObject(object, level, flags, 0, false);
+}
+
+void
+QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
+ unsigned int flags, int stream_length, bool compress)
+{
+ unsigned int child_flags = flags & ~f_stream;
+
+ std::string indent;
+ for (int i = 0; i < level; ++i)
+ {
+ indent += " ";
+ }
+
+ if (object.isArray())
+ {
+ // Note: PDF spec 1.4 implementation note 121 states that
+ // Acrobat requires a space after the [ in the /H key of the
+ // linearization parameter dictionary. We'll do this
+ // unconditionally for all arrays because it looks nicer and
+ // doesn't make the files that much bigger.
+ writeString("[");
+ writeStringQDF("\n");
+ int n = object.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ writeStringQDF(indent);
+ writeStringQDF(" ");
+ writeStringNoQDF(" ");
+ unparseChild(object.getArrayItem(i), level + 1, child_flags);
+ writeStringQDF("\n");
+ }
+ writeStringQDF(indent);
+ writeStringNoQDF(" ");
+ writeString("]");
+ }
+ else if (object.isDictionary())
+ {
+ writeString("<<");
+ writeStringQDF("\n");
+ std::set<std::string> keys = object.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ if ((flags & f_filtered) &&
+ ((key == "/Filter") ||
+ (key == "/DecodeParms")))
+ {
+ continue;
+ }
+ if ((flags & f_stream) && (key == "/Length"))
+ {
+ continue;
+ }
+ writeStringQDF(indent);
+ writeStringQDF(" ");
+ writeStringNoQDF(" ");
+ writeString(QPDF_Name::normalizeName(key));
+ writeString(" ");
+ unparseChild(object.getKey(key), level + 1, child_flags);
+ writeStringQDF("\n");
+ }
+
+ if (flags & f_stream)
+ {
+ writeStringQDF(indent);
+ writeStringQDF(" ");
+ writeString(" /Length ");
+
+ if (this->direct_stream_lengths)
+ {
+ writeString(QUtil::int_to_string(stream_length));
+ }
+ else
+ {
+ writeString(
+ QUtil::int_to_string(this->cur_stream_length_id));
+ writeString(" 0 R");
+ }
+ writeStringQDF("\n");
+ if (compress && (flags & f_filtered))
+ {
+ writeStringQDF(indent);
+ writeStringQDF(" ");
+ writeString(" /Filter /FlateDecode");
+ writeStringQDF("\n");
+ }
+ }
+
+ writeStringQDF(indent);
+ writeStringNoQDF(" ");
+ writeString(">>");
+ }
+ else if (object.isStream())
+ {
+ // Write stream data to a buffer.
+ int old_id = object.getObjectID();
+ int new_id = obj_renumber[old_id];
+ if (! this->direct_stream_lengths)
+ {
+ this->cur_stream_length_id = new_id + 1;
+ }
+ QPDFObjectHandle stream_dict = object.getDict();
+
+ bool filter = (this->stream_data_mode != s_preserve);
+ if (this->stream_data_mode == s_compress)
+ {
+ // Don't filter if the stream is already compressed with
+ // FlateDecode. We don't want to make it worse by getting
+ // rid of a predictor or otherwising messing with it. We
+ // should also avoid messing with anything that's
+ // compressed with a lossy compression scheme, but we
+ // don't support any of those right now.
+ QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
+ if (filter_obj.isName() && (filter_obj.getName() == "/FlateDecode"))
+ {
+ QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
+ filter = false;
+ }
+ }
+ bool normalize = false;
+ bool compress = false;
+ if (this->normalize_content && normalized_streams.count(old_id))
+ {
+ normalize = true;
+ filter = true;
+ }
+ else if (filter && (this->stream_data_mode == s_compress))
+ {
+ compress = true;
+ QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
+ }
+
+ flags |= f_stream;
+
+ pushPipeline(new Pl_Buffer("stream data"));
+ activatePipelineStack();
+ bool filtered =
+ object.pipeStreamData(this->pipeline, filter, normalize, compress);
+ PointerHolder<Buffer> stream_data;
+ popPipelineStack(&stream_data);
+ if (filtered)
+ {
+ flags |= f_filtered;
+ }
+ else
+ {
+ compress = false;
+ }
+
+ this->cur_stream_length = stream_data.getPointer()->getSize();
+ unparseObject(stream_dict, 0, flags, this->cur_stream_length, compress);
+ writeString("\nstream\n");
+ pushEncryptionFilter();
+ writeBuffer(stream_data);
+ popPipelineStack();
+
+ if (this->qdf_mode)
+ {
+ if (this->pipeline->getLastChar() != '\n')
+ {
+ writeString("\n");
+ this->added_newline = true;
+ }
+ else
+ {
+ this->added_newline = false;
+ }
+ }
+ writeString("endstream");
+ }
+ else if (object.isString())
+ {
+ std::string val;
+ if (this->encrypted &&
+ (! (flags & f_in_ostream)) &&
+ (! this->cur_data_key.empty()))
+ {
+ val = object.getStringValue();
+ char* tmp = QUtil::copy_string(val);
+ unsigned int vlen = val.length();
+ RC4 rc4((unsigned char const*)this->cur_data_key.c_str(),
+ this->cur_data_key.length());
+ rc4.process((unsigned char*)tmp, vlen);
+ val = QPDF_String(std::string(tmp, vlen)).unparse();
+ delete [] tmp;
+ }
+ else
+ {
+ val = object.unparseResolved();
+ }
+ writeString(val);
+ }
+ else
+ {
+ writeString(object.unparseResolved());
+ }
+}
+
+void
+QPDFWriter::writeObjectStreamOffsets(std::vector<int>& offsets,
+ int first_obj)
+{
+ for (unsigned int i = 0; i < offsets.size(); ++i)
+ {
+ if (i != 0)
+ {
+ writeStringQDF("\n");
+ writeStringNoQDF(" ");
+ }
+ writeString(QUtil::int_to_string(i + first_obj));
+ writeString(" ");
+ writeString(QUtil::int_to_string(offsets[i]));
+ }
+ writeString("\n");
+}
+
+void
+QPDFWriter::writeObjectStream(QPDFObjectHandle object)
+{
+ // Note: object might be null if this is a place-holder for an
+ // object stream that we are generating from scratch.
+
+ int old_id = object.getObjectID();
+ int new_id = obj_renumber[old_id];
+
+ std::vector<int> offsets;
+ int first = 0;
+
+ // Generate stream itself. We have to do this in two passes so we
+ // can calculate offsets in the first pass.
+ PointerHolder<Buffer> stream_buffer;
+ int first_obj = -1;
+ bool compressed = false;
+ for (int pass = 1; pass <= 2; ++pass)
+ {
+ if (pass == 1)
+ {
+ pushDiscardFilter();
+ }
+ else
+ {
+ // Adjust offsets to skip over comment before first object
+
+ first = offsets[0];
+ for (std::vector<int>::iterator iter = offsets.begin();
+ iter != offsets.end(); ++iter)
+ {
+ *iter -= first;
+ }
+
+ // Take one pass at writing pairs of numbers so we can get
+ // their size information
+ pushDiscardFilter();
+ writeObjectStreamOffsets(offsets, first_obj);
+ first += this->pipeline->getCount();
+ popPipelineStack();
+
+ // Set up a stream to write the stream data into a buffer.
+ Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
+ if (! ((this->stream_data_mode == s_uncompress) || this->qdf_mode))
+ {
+ compressed = true;
+ next = pushPipeline(
+ new Pl_Flate("compress object stream", next,
+ Pl_Flate::a_deflate));
+ }
+ activatePipelineStack();
+ writeObjectStreamOffsets(offsets, first_obj);
+ }
+
+ int count = 0;
+ for (std::set<int>::iterator iter =
+ this->object_stream_to_objects[old_id].begin();
+ iter != this->object_stream_to_objects[old_id].end();
+ ++iter, ++count)
+ {
+ int obj = *iter;
+ int new_obj = this->obj_renumber[obj];
+ if (first_obj == -1)
+ {
+ first_obj = new_obj;
+ }
+ if (this->qdf_mode)
+ {
+ writeString("%% Object stream: object " +
+ QUtil::int_to_string(new_obj) + ", index " +
+ QUtil::int_to_string(count) + "\n");
+ }
+ if (pass == 1)
+ {
+ offsets.push_back(this->pipeline->getCount());
+ }
+ writeObject(this->pdf.getObjectByID(obj, 0), count);
+
+ this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
+ }
+
+ // stream_buffer will be initialized only for pass 2
+ popPipelineStack(&stream_buffer);
+ }
+
+ // Write the object
+ openObject(new_id);
+ setDataKey(new_id);
+ writeString("<<");
+ writeStringQDF("\n ");
+ writeString(" /Type /ObjStm");
+ writeStringQDF("\n ");
+ writeString(" /Length " +
+ QUtil::int_to_string(stream_buffer.getPointer()->getSize()));
+ writeStringQDF("\n ");
+ if (compressed)
+ {
+ writeString(" /Filter /FlateDecode");
+ }
+ writeString(" /N " + QUtil::int_to_string(offsets.size()));
+ writeStringQDF("\n ");
+ writeString(" /First " + QUtil::int_to_string(first));
+ if (! object.isNull())
+ {
+ // If the original object has an /Extends key, preserve it.
+ QPDFObjectHandle dict = object.getDict();
+ QPDFObjectHandle extends = dict.getKey("/Extends");
+ if (extends.isIndirect())
+ {
+ QTC::TC("qpdf", "QPDFWriter copy Extends");
+ writeStringQDF("\n ");
+ writeString(" /Extends ");
+ unparseChild(extends, 1, f_in_ostream);
+ }
+ }
+ writeStringQDF("\n");
+ writeStringNoQDF(" ");
+ writeString(">>\nstream\n");
+ if (this->encrypted)
+ {
+ QTC::TC("qpdf", "QPDFWriter encrypt object stream");
+ }
+ pushEncryptionFilter();
+ writeBuffer(stream_buffer);
+ popPipelineStack();
+ writeString("endstream");
+ this->cur_data_key.clear();
+ closeObject(new_id);
+}
+
+void
+QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
+{
+ int old_id = object.getObjectID();
+
+ if ((object_stream_index == -1) &&
+ (this->object_stream_to_objects.count(old_id)))
+ {
+ writeObjectStream(object);
+ return;
+ }
+
+ int new_id = obj_renumber[old_id];
+ if (this->qdf_mode)
+ {
+ if (this->page_object_to_seq.count(old_id))
+ {
+ writeString("%% Page ");
+ writeString(
+ QUtil::int_to_string(
+ this->page_object_to_seq[old_id]));
+ writeString("\n");
+ }
+ if (this->contents_to_page_seq.count(old_id))
+ {
+ writeString("%% Contents for page ");
+ writeString(
+ QUtil::int_to_string(
+ this->contents_to_page_seq[old_id]));
+ writeString("\n");
+ }
+ }
+ if (object_stream_index == -1)
+ {
+ openObject(new_id);
+ setDataKey(new_id);
+ unparseObject(object, 0, 0);
+ this->cur_data_key.clear();
+ closeObject(new_id);
+ }
+ else
+ {
+ unparseObject(object, 0, f_in_ostream);
+ writeString("\n");
+ }
+
+ if ((! this->direct_stream_lengths) && object.isStream())
+ {
+ if (this->qdf_mode)
+ {
+ if (this->added_newline)
+ {
+ writeString("%QDF: ignore_newline\n");
+ }
+ }
+ openObject(new_id + 1);
+ writeString(QUtil::int_to_string(this->cur_stream_length));
+ closeObject(new_id + 1);
+ }
+}
+
+void
+QPDFWriter::generateID()
+{
+ // Note: we can't call generateID() at the time of construction
+ // since the caller hasn't yet had a chance to call setStaticID(),
+ // but we need to generate it before computing encryption
+ // dictionary parameters. This is why we call this function both
+ // from setEncryptionParameters() and from write() and return
+ // immediately if the ID has already been generated.
+
+ if (! this->id2.empty())
+ {
+ return;
+ }
+
+ QPDFObjectHandle trailer = pdf.getTrailer();
+
+ std::string result;
+
+ if (this->static_id)
+ {
+ // For test suite use only...
+ static char tmp[] = {0x31, 0x41, 0x59, 0x26,
+ 0x53, 0x58, 0x97, 0x93,
+ 0x23, 0x84, 0x62, 0x64,
+ 0x33, 0x83, 0x27, 0x95,
+ 0x00};
+ result = tmp;
+ }
+ else
+ {
+ // The PDF specification has guidelines for creating IDs, but it
+ // states clearly that the only thing that's really important is
+ // that it is very likely to be unique. We can't really follow
+ // the guidelines in the spec exactly because we haven't written
+ // the file yet. This scheme should be fine though.
+
+ std::string seed;
+ seed += QUtil::int_to_string((int)time(0));
+ seed += " QPDF ";
+ seed += filename;
+ seed += " ";
+ if (trailer.hasKey("/Info"))
+ {
+ std::set<std::string> keys = trailer.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ QPDFObjectHandle obj = trailer.getKey(*iter);
+ if (obj.isString())
+ {
+ seed += " ";
+ seed += obj.getStringValue();
+ }
+ }
+ }
+
+ MD5 m;
+ m.encodeString(seed.c_str());
+ MD5::Digest digest;
+ m.digest(digest);
+ result = std::string((char*)digest, sizeof(MD5::Digest));
+ }
+
+ // If /ID already exists, follow the spec: use the original first
+ // word and generate a new second word. Otherwise, we'll use the
+ // generated ID for both.
+
+ this->id2 = result;
+ if (trailer.hasKey("/ID"))
+ {
+ // Note: keep /ID from old file even if --static-id was given.
+ this->id1 = trailer.getKey("/ID").getArrayItem(0).getStringValue();
+ }
+ else
+ {
+ this->id1 = this->id2;
+ }
+}
+
+void
+QPDFWriter::initializeSpecialStreams()
+{
+ // Mark all page content streams in case we are filtering or
+ // normalizing.
+ std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+ int num = 0;
+ for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ QPDFObjectHandle& page = *iter;
+ this->page_object_to_seq[page.getObjectID()] = ++num;
+ QPDFObjectHandle contents = page.getKey("/Contents");
+ std::vector<int> contents_objects;
+ if (contents.isArray())
+ {
+ int n = contents.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ contents_objects.push_back(
+ contents.getArrayItem(i).getObjectID());
+ }
+ }
+ else if (contents.isStream())
+ {
+ contents_objects.push_back(contents.getObjectID());
+ }
+
+ for (std::vector<int>::iterator iter = contents_objects.begin();
+ iter != contents_objects.end(); ++iter)
+ {
+ this->contents_to_page_seq[*iter] = num;
+ this->normalized_streams.insert(*iter);
+ }
+ }
+}
+
+void
+QPDFWriter::preserveObjectStreams()
+{
+ this->pdf.getObjectStreamData(this->object_to_object_stream);
+}
+
+void
+QPDFWriter::generateObjectStreams()
+{
+ // Basic strategy: make a list of objects that can go into an
+ // object stream. Then figure out how many object streams are
+ // needed so that we can distribute objects approximately evenly
+ // without having any object stream exceed 100 members. We don't
+ // have to worry about linearized files here -- if the file is
+ // linearized, we take care of excluding things that aren't
+ // allowed here later.
+
+ // This code doesn't do anything with /Extends.
+
+ std::vector<int> const& eligible = this->pdf.getCompressibleObjects();
+ unsigned int n_object_streams = (eligible.size() + 99) / 100;
+ unsigned int n_per = eligible.size() / n_object_streams;
+ if (n_per * n_object_streams < eligible.size())
+ {
+ ++n_per;
+ }
+ unsigned int n = 0;
+ int cur_ostream = 0;
+ for (std::vector<int>::const_iterator iter = eligible.begin();
+ iter != eligible.end(); ++iter)
+ {
+ if ((n % n_per) == 0)
+ {
+ if (n > 0)
+ {
+ QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
+ }
+ n = 0;
+ }
+ if (n == 0)
+ {
+ // Construct a new null object as the "original" object
+ // stream. The rest of the code knows that this means
+ // we're creating the object stream from scratch.
+ cur_ostream = this->pdf.makeIndirectObject(
+ QPDFObjectHandle::newNull()).getObjectID();
+ }
+ this->object_to_object_stream[*iter] = cur_ostream;
+ ++n;
+ }
+}
+
+void
+QPDFWriter::write()
+{
+ // Do preliminary setup
+
+ if (this->linearized)
+ {
+ this->qdf_mode = false;
+ }
+
+ if (this->qdf_mode)
+ {
+ if (! this->normalize_content_set)
+ {
+ this->normalize_content = true;
+ }
+ if (! this->stream_data_mode_set)
+ {
+ this->stream_data_mode = s_uncompress;
+ }
+ }
+
+ if (this->encrypted)
+ {
+ // Encryption has been explicitly set
+ this->preserve_encryption = false;
+ }
+ else if (this->normalize_content ||
+ (this->stream_data_mode == s_uncompress) ||
+ this->qdf_mode)
+ {
+ // Encryption makes looking at contents pretty useless. If
+ // the user explicitly encrypted though, we still obey that.
+ this->preserve_encryption = false;
+ }
+
+ if (preserve_encryption)
+ {
+ copyEncryptionParameters();
+ }
+
+ if (this->qdf_mode || this->normalize_content ||
+ (this->stream_data_mode == s_uncompress))
+ {
+ initializeSpecialStreams();
+ }
+
+ if (this->qdf_mode)
+ {
+ // Generate indirect stream lengths for qdf mode since fix-qdf
+ // uses them for storing recomputed stream length data.
+ // Certain streams such as object streams, xref streams, and
+ // hint streams always get direct stream lengths.
+ this->direct_stream_lengths = false;
+ }
+
+ switch (this->object_stream_mode)
+ {
+ case o_disable:
+ // no action required
+ break;
+
+ case o_preserve:
+ preserveObjectStreams();
+ break;
+
+ case o_generate:
+ generateObjectStreams();
+ break;
+
+ // no default so gcc will warn for missing case tag
+ }
+
+ if (this->linearized)
+ {
+ // Page dictionaries are not allowed to be compressed objects.
+ std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+ for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
+ iter != pages.end(); ++iter)
+ {
+ QPDFObjectHandle& page = *iter;
+ int objid = page.getObjectID();
+ if (this->object_to_object_stream.count(objid))
+ {
+ QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
+ this->object_to_object_stream.erase(objid);
+ }
+ }
+ }
+
+ if (this->linearized || this->encrypted)
+ {
+ // The document catalog is not allowed to be compressed in
+ // linearized files either. It also appears that Adobe Reader
+ // 8.0.0 has a bug that prevents it from being able to handle
+ // encrypted files with compressed document catalogs, so we
+ // disable them in that case as well.
+ int objid = pdf.getRoot().getObjectID();
+ if (this->object_to_object_stream.count(objid))
+ {
+ QTC::TC("qpdf", "QPDFWriter uncompressing root");
+ this->object_to_object_stream.erase(objid);
+ }
+ }
+
+ // Generate reverse mapping from object stream to objects
+ for (std::map<int, int>::iterator iter =
+ this->object_to_object_stream.begin();
+ iter != this->object_to_object_stream.end(); ++iter)
+ {
+ int obj = (*iter).first;
+ int stream = (*iter).second;
+ this->object_stream_to_objects[stream].insert(obj);
+ this->max_ostream_index =
+ std::max(this->max_ostream_index,
+ (int)this->object_stream_to_objects[stream].size() - 1);
+ }
+
+ if (! this->object_stream_to_objects.empty())
+ {
+ this->min_pdf_version = "1.5";
+ }
+
+ generateID();
+
+ pdf.trimTrailerForWrite();
+ pdf.flattenScalarReferences();
+
+ if (this->linearized)
+ {
+ writeLinearized();
+ }
+ else
+ {
+ writeStandard();
+ }
+
+ this->pipeline->finish();
+ if (this->close_file)
+ {
+ fclose(this->file);
+ }
+ this->file = 0;
+}
+
+void
+QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
+{
+ for (std::vector<QPDFObjectHandle>::iterator iter = part.begin();
+ iter != part.end(); ++iter)
+ {
+ enqueueObject(*iter);
+ }
+}
+
+void
+QPDFWriter::writeEncryptionDictionary()
+{
+ this->encryption_dict_objid = openObject(this->encryption_dict_objid);
+ writeString("<<");
+ for (std::map<std::string, std::string>::iterator iter =
+ this->encryption_dictionary.begin();
+ iter != this->encryption_dictionary.end(); ++iter)
+ {
+ writeString(" ");
+ writeString((*iter).first);
+ writeString(" ");
+ writeString((*iter).second);
+ }
+ writeString(" >>");
+ closeObject(this->encryption_dict_objid);
+}
+
+void
+QPDFWriter::writeHeader()
+{
+ std::string version = pdf.getPDFVersion();
+ if (! this->min_pdf_version.empty())
+ {
+ float ov = atof(version.c_str());
+ float mv = atof(this->min_pdf_version.c_str());
+ if (mv > ov)
+ {
+ version = this->min_pdf_version;
+ }
+ }
+
+ writeString("%PDF-");
+ writeString(version);
+ // This string of binary characters would not be valid UTF-8, so
+ // it really should be treated as binary.
+ writeString("\n%¿÷¢þ\n");
+ writeStringQDF("%QDF-1.0\n\n");
+}
+
+void
+QPDFWriter::writeHintStream(int hint_id)
+{
+ PointerHolder<Buffer> hint_buffer;
+ int S = 0;
+ int O = 0;
+ pdf.generateHintStream(
+ this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O);
+
+ openObject(hint_id);
+ setDataKey(hint_id);
+
+ unsigned char* hs = hint_buffer.getPointer()->getBuffer();
+ unsigned long hlen = hint_buffer.getPointer()->getSize();
+
+ writeString("<< /Filter /FlateDecode /S ");
+ writeString(QUtil::int_to_string(S));
+ if (O)
+ {
+ writeString(" /O ");
+ writeString(QUtil::int_to_string(O));
+ }
+ writeString(" /Length ");
+ writeString(QUtil::int_to_string(hlen));
+ writeString(" >>\nstream\n");
+
+ if (this->encrypted)
+ {
+ QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
+ }
+ pushEncryptionFilter();
+ writeBuffer(hint_buffer);
+ popPipelineStack();
+
+ if (hs[hlen - 1] != '\n')
+ {
+ writeString("\n");
+ }
+ writeString("endstream");
+ closeObject(hint_id);
+}
+
+int
+QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
+{
+ return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0);
+}
+
+int
+QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
+ int prev, bool suppress_offsets,
+ int hint_id, int hint_offset, int hint_length)
+{
+ writeString("xref\n");
+ writeString(QUtil::int_to_string(first));
+ writeString(" ");
+ writeString(QUtil::int_to_string(last - first + 1));
+ int space_before_zero = this->pipeline->getCount();
+ writeString("\n");
+ for (int i = first; i <= last; ++i)
+ {
+ if (i == 0)
+ {
+ writeString("0000000000 65535 f \n");
+ }
+ else
+ {
+ int offset = 0;
+ if (! suppress_offsets)
+ {
+ offset = this->xref[i].getOffset();
+ if ((hint_id != 0) &&
+ (i != hint_id) &&
+ (offset >= hint_offset))
+ {
+ offset += hint_length;
+ }
+ }
+ writeString(QUtil::int_to_string(offset, 10));
+ writeString(" 00000 n \n");
+ }
+ }
+ writeTrailer(which, size, false, prev);
+ writeString("\n");
+ return space_before_zero;
+}
+
+int
+QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
+ trailer_e which, int first, int last, int size)
+{
+ return writeXRefStream(objid, max_id, max_offset,
+ which, first, last, size, 0, 0, 0, 0);
+}
+
+int
+QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
+ trailer_e which, int first, int last, int size,
+ int prev, int hint_id,
+ int hint_offset, int hint_length)
+{
+ int xref_offset = this->pipeline->getCount();
+ int space_before_zero = xref_offset - 1;
+
+ // field 1 contains offsets and object stream identifiers
+ int f1_size = std::max(bytesNeeded(max_offset),
+ bytesNeeded(max_id));
+
+ // field 2 contains object stream indices
+ int f2_size = bytesNeeded(this->max_ostream_index);
+
+ unsigned int esize = 1 + f1_size + f2_size;
+
+ // Must store in xref table in advance of writing the actual data
+ // rather than waiting for openObject to do it.
+ this->xref[xref_id] = QPDFXRefEntry(1, pipeline->getCount(), 0);
+
+ Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
+ bool compressed = false;
+ if (! ((this->stream_data_mode == s_uncompress) || this->qdf_mode))
+ {
+ compressed = true;
+ p = pushPipeline(
+ new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
+ p = pushPipeline(
+ new Pl_PNGFilter(
+ "pngify xref", p, Pl_PNGFilter::a_encode, esize, 0));
+ }
+ activatePipelineStack();
+ for (int i = first; i <= last; ++i)
+ {
+ QPDFXRefEntry& e = this->xref[i];
+ switch (e.getType())
+ {
+ case 0:
+ writeBinary(0, 1);
+ writeBinary(0, f1_size);
+ writeBinary(0, f2_size);
+ break;
+
+ case 1:
+ {
+ int offset = e.getOffset();
+ if ((hint_id != 0) &&
+ (i != hint_id) &&
+ (offset >= hint_offset))
+ {
+ offset += hint_length;
+ }
+ writeBinary(1, 1);
+ writeBinary(offset, f1_size);
+ writeBinary(0, f2_size);
+ }
+ break;
+
+ case 2:
+ writeBinary(2, 1);
+ writeBinary(e.getObjStreamNumber(), f1_size);
+ writeBinary(e.getObjStreamIndex(), f2_size);
+ break;
+
+ default:
+ throw QEXC::Internal("invalid type writing xref stream");
+ break;
+ }
+ }
+ PointerHolder<Buffer> xref_data;
+ popPipelineStack(&xref_data);
+
+ openObject(xref_id);
+ writeString("<<");
+ writeStringQDF("\n ");
+ writeString(" /Type /XRef");
+ writeStringQDF("\n ");
+ writeString(" /Length " +
+ QUtil::int_to_string(xref_data.getPointer()->getSize()));
+ if (compressed)
+ {
+ writeStringQDF("\n ");
+ writeString(" /Filter /FlateDecode");
+ writeStringQDF("\n ");
+ writeString(" /DecodeParms << /Columns " +
+ QUtil::int_to_string(esize) + " /Predictor 12 >>");
+ }
+ writeStringQDF("\n ");
+ writeString(" /W [ 1 " +
+ QUtil::int_to_string(f1_size) + " " +
+ QUtil::int_to_string(f2_size) + " ]");
+ if (! ((first == 0) && (last == size - 1)))
+ {
+ writeString(" /Index [ " +
+ QUtil::int_to_string(first) + " " +
+ QUtil::int_to_string(last - first + 1) + " ]");
+ }
+ writeTrailer(which, size, true, prev);
+ writeString("\nstream\n");
+ writeBuffer(xref_data);
+ writeString("\nendstream");
+ closeObject(xref_id);
+ return space_before_zero;
+}
+
+void
+QPDFWriter::writeLinearized()
+{
+ // Optimize file and enqueue objects in order
+
+ bool need_xref_stream = (! this->object_to_object_stream.empty());
+ pdf.optimize(this->object_to_object_stream);
+
+ std::vector<QPDFObjectHandle> part4;
+ std::vector<QPDFObjectHandle> part6;
+ std::vector<QPDFObjectHandle> part7;
+ std::vector<QPDFObjectHandle> part8;
+ std::vector<QPDFObjectHandle> part9;
+ pdf.getLinearizedParts(this->object_to_object_stream,
+ part4, part6, part7, part8, part9);
+
+ // Object number sequence:
+ //
+ // second half
+ // second half uncompressed objects
+ // second half xref stream, if any
+ // second half compressed objects
+ // first half
+ // linearization dictionary
+ // first half xref stream, if any
+ // part 4 uncompresesd objects
+ // encryption dictionary, if any
+ // hint stream
+ // part 6 uncompressed objects
+ // first half compressed objects
+ //
+
+ // Second half objects
+ int second_half_uncompressed = part7.size() + part8.size() + part9.size();
+ int second_half_first_obj = 1;
+ int after_second_half = 1 + second_half_uncompressed;
+ this->next_objid = after_second_half;
+ int second_half_xref = 0;
+ if (need_xref_stream)
+ {
+ second_half_xref = this->next_objid++;
+ }
+ // Assign numbers to all compressed objects in the second half.
+ std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
+ for (int i = 0; i < 3; ++i)
+ {
+ for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
+ iter != (*vecs2[i]).end(); ++iter)
+ {
+ assignCompressedObjectNumbers((*iter).getObjectID());
+ }
+ }
+ int second_half_end = this->next_objid - 1;
+ int second_trailer_size = this->next_objid;
+
+ // First half objects
+ int first_half_start = this->next_objid;
+ int lindict_id = this->next_objid++;
+ int first_half_xref = 0;
+ if (need_xref_stream)
+ {
+ first_half_xref = this->next_objid++;
+ }
+ int part4_first_obj = this->next_objid;
+ this->next_objid += part4.size();
+ int after_part4 = this->next_objid;
+ if (this->encrypted)
+ {
+ this->encryption_dict_objid = this->next_objid++;
+ }
+ int hint_id = this->next_objid++;
+ int part6_first_obj = this->next_objid;
+ this->next_objid += part6.size();
+ int after_part6 = this->next_objid;
+ // Assign numbers to all compressed objects in the first half
+ std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
+ for (int i = 0; i < 2; ++i)
+ {
+ for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
+ iter != (*vecs1[i]).end(); ++iter)
+ {
+ assignCompressedObjectNumbers((*iter).getObjectID());
+ }
+ }
+ int first_half_end = this->next_objid - 1;
+ int first_trailer_size = this->next_objid;
+
+ int part4_end_marker = part4.back().getObjectID();
+ int part6_end_marker = part6.back().getObjectID();
+ int space_before_zero = 0;
+ int file_size = 0;
+ int part6_end_offset = 0;
+ int first_half_max_obj_offset = 0;
+ int second_xref_offset = 0;
+ int first_xref_end = 0;
+ int second_xref_end = 0;
+
+ this->next_objid = part4_first_obj;
+ enqueuePart(part4);
+ assert(this->next_objid = after_part4);
+ this->next_objid = part6_first_obj;
+ enqueuePart(part6);
+ assert(this->next_objid == after_part6);
+ this->next_objid = second_half_first_obj;
+ enqueuePart(part7);
+ enqueuePart(part8);
+ enqueuePart(part9);
+ assert(this->next_objid == after_second_half);
+
+ int hint_length = 0;
+ PointerHolder<Buffer> hint_buffer;
+
+ // Write file in two passes. Part numbers refer to PDF spec 1.4.
+
+ for (int pass = 1; pass <= 2; ++pass)
+ {
+ if (pass == 1)
+ {
+ pushDiscardFilter();
+ }
+
+ // Part 1: header
+
+ writeHeader();
+
+ // Part 2: linearization parameter dictionary. Save enough
+ // space to write real dictionary. 150 characters is enough
+ // space if all numerical values in the parameter dictionary
+ // are 10 digits long plus a few extra characters for safety.
+
+ int pos = this->pipeline->getCount();
+ openObject(lindict_id);
+ writeString("<<");
+ if (pass == 2)
+ {
+ std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
+ int first_page_object = obj_renumber[pages[0].getObjectID()];
+ int npages = pages.size();
+
+ writeString(" /Linearized 1 /L ");
+ writeString(QUtil::int_to_string(file_size + hint_length));
+ // Implementation note 121 states that a space is
+ // mandatory after this open bracket.
+ writeString(" /H [ ");
+ writeString(QUtil::int_to_string(this->xref[hint_id].getOffset()));
+ writeString(" ");
+ writeString(QUtil::int_to_string(hint_length));
+ writeString(" ] /O ");
+ writeString(QUtil::int_to_string(first_page_object));
+ writeString(" /E ");
+ writeString(QUtil::int_to_string(part6_end_offset + hint_length));
+ writeString(" /N ");
+ writeString(QUtil::int_to_string(npages));
+ writeString(" /T ");
+ writeString(QUtil::int_to_string(space_before_zero + hint_length));
+ }
+ writeString(" >>");
+ closeObject(lindict_id);
+ static int const pad = 150;
+ int spaces = (pos + pad - this->pipeline->getCount());
+ assert(spaces >= 0);
+ for (int i = 0; i < spaces; ++i)
+ {
+ writeString(" ");
+ }
+ writeString("\n");
+
+ // Part 3: first page cross reference table and trailer.
+
+ int first_xref_offset = this->pipeline->getCount();
+ int hint_offset = 0;
+ if (pass == 2)
+ {
+ hint_offset = this->xref[hint_id].getOffset();
+ }
+ if (need_xref_stream)
+ {
+ // Must pad here too.
+ if (pass == 1)
+ {
+ // first_half_max_obj_offset is very likely to fall
+ // within the first 64K of the document (thus
+ // requiring two bytes for offsets) since it is the
+ // offset of the last uncompressed object in page 1.
+ // We allow for it to do otherwise though.
+ first_half_max_obj_offset = 65535;
+ }
+ pos = this->pipeline->getCount();
+ writeXRefStream(first_half_xref, first_half_end,
+ first_half_max_obj_offset,
+ t_lin_first, first_half_start, first_half_end,
+ first_trailer_size,
+ hint_length + second_xref_offset,
+ hint_id, hint_offset, hint_length);
+ int endpos = this->pipeline->getCount();
+ if (pass == 1)
+ {
+ // Pad so we have enough room for the real xref
+ // stream. In an extremely unlikely worst case,
+ // first_half_max_obj_offset could be enough larger to
+ // require two extra bytes beyond what we calculated
+ // in pass 1. This means we need to save two extra
+ // bytes for each xref entry. To that, we'll add 10
+ // extra bytes for number length increases.
+ int possible_extra =
+ 10 + (2 * (first_half_end - first_half_start + 1));
+ for (int i = 0; i < possible_extra; ++i)
+ {
+ writeString(" ");
+ }
+ first_xref_end = this->pipeline->getCount();
+ }
+ else
+ {
+ // Pad so that the next object starts at the same
+ // place as in pass 1.
+ for (int i = 0; i < first_xref_end - endpos; ++i)
+ {
+ writeString(" ");
+ }
+ assert(this->pipeline->getCount() == first_xref_end);
+ }
+ writeString("\n");
+ }
+ else
+ {
+ writeXRefTable(t_lin_first, first_half_start, first_half_end,
+ first_trailer_size, hint_length + second_xref_offset,
+ (pass == 1), hint_id, hint_offset, hint_length);
+ writeString("startxref\n0\n%%EOF\n");
+ }
+
+ // Parts 4 through 9
+
+ for (std::list<QPDFObjectHandle>::iterator iter =
+ this->object_queue.begin();
+ iter != this->object_queue.end(); ++iter)
+ {
+ QPDFObjectHandle cur_object = (*iter);
+ if (cur_object.getObjectID() == part6_end_marker)
+ {
+ first_half_max_obj_offset = this->pipeline->getCount();
+ }
+ writeObject(cur_object);
+ if (cur_object.getObjectID() == part4_end_marker)
+ {
+ if (this->encrypted)
+ {
+ writeEncryptionDictionary();
+ }
+ if (pass == 1)
+ {
+ this->xref[hint_id] =
+ QPDFXRefEntry(1, this->pipeline->getCount(), 0);
+ }
+ else
+ {
+ // Part 5: hint stream
+ writeBuffer(hint_buffer);
+ }
+ }
+ if (cur_object.getObjectID() == part6_end_marker)
+ {
+ part6_end_offset = this->pipeline->getCount();
+ }
+ }
+
+ // Part 10: overflow hint stream -- not used
+
+ // Part 11: main cross reference table and trailer
+
+ second_xref_offset = this->pipeline->getCount();
+ if (need_xref_stream)
+ {
+ space_before_zero =
+ writeXRefStream(second_half_xref,
+ second_half_end, second_xref_offset,
+ t_lin_second, 0, second_half_end,
+ second_trailer_size);
+ if (pass == 1)
+ {
+ // Add some padding -- we need an accurate file_size
+ // number, and this could change if the pass 2 xref
+ // stream compresses differently. There shouldn't be
+ // much difference, so we'll just pad 100 characters.
+ // This is unscientific though, and may not always
+ // work. The only way we could really get around this
+ // would be to seek back to the beginning of the file
+ // and update /L in the linearization dictionary, but
+ // that would be the only thing in the design that
+ // would require the output file to be seekable.
+ for (int i = 0; i < 99; ++i)
+ {
+ writeString(" ");
+ }
+ writeString("\n");
+ second_xref_end = this->pipeline->getCount();
+ }
+ else
+ {
+ // Make the file size the same.
+ int pos = this->pipeline->getCount();
+ while (pos < second_xref_end + hint_length - 1)
+ {
+ ++pos;
+ writeString(" ");
+ }
+ writeString("\n");
+ // If this assertion fails, maybe we didn't have
+ // enough padding above.
+ assert(this->pipeline->getCount() ==
+ second_xref_end + hint_length);
+ }
+ }
+ else
+ {
+ space_before_zero =
+ writeXRefTable(t_lin_second, 0, second_half_end,
+ second_trailer_size);
+ }
+ writeString("startxref\n");
+ writeString(QUtil::int_to_string(first_xref_offset));
+ writeString("\n%%EOF\n");
+
+ if (pass == 1)
+ {
+ // Close first pass pipeline
+ file_size = this->pipeline->getCount();
+ popPipelineStack();
+
+ // Save hint offset since it will be set to zero by
+ // calling openObject.
+ int hint_offset = this->xref[hint_id].getOffset();
+
+ // Write hint stream to a buffer
+ pushPipeline(new Pl_Buffer("hint buffer"));
+ activatePipelineStack();
+ writeHintStream(hint_id);
+ popPipelineStack(&hint_buffer);
+ hint_length = hint_buffer.getPointer()->getSize();
+
+ // Restore hint offset
+ this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
+ }
+ }
+}
+
+void
+QPDFWriter::writeStandard()
+{
+ // Start writing
+
+ writeHeader();
+
+ // Put root first on queue.
+ QPDFObjectHandle trailer = pdf.getTrailer();
+ enqueueObject(trailer.getKey("/Root"));
+
+ // Next place any other objects referenced from the trailer
+ // dictionary into the queue, handling direct objects recursively.
+ // Root is already there, so enqueuing it a second time is a
+ // no-op.
+ std::set<std::string> keys = trailer.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ enqueueObject(trailer.getKey(*iter));
+ }
+
+ // Now start walking queue, output each object
+ while (this->object_queue.size())
+ {
+ QPDFObjectHandle cur_object = this->object_queue.front();
+ this->object_queue.pop_front();
+ writeObject(cur_object);
+ }
+
+ // Write out the encryption dictionary, if any
+ if (this->encrypted)
+ {
+ writeEncryptionDictionary();
+ }
+
+ // Now write out xref. next_objid is now the number of objects.
+ off_t xref_offset = this->pipeline->getCount();
+ if (this->object_stream_to_objects.empty())
+ {
+ // Write regular cross-reference table
+ // Write regular cross-reference table
+ writeXRefTable(t_normal, 0, this->next_objid - 1, this->next_objid);
+ }
+ else
+ {
+ // Write cross-reference stream.
+ int xref_id = this->next_objid++;
+ writeXRefStream(xref_id, xref_id, xref_offset, t_normal,
+ 0, this->next_objid - 1, this->next_objid);
+ }
+ writeString("startxref\n");
+ writeString(QUtil::int_to_string(xref_offset));
+ writeString("\n%%EOF\n");
+}
diff --git a/libqpdf/QPDFXRefEntry.cc b/libqpdf/QPDFXRefEntry.cc
new file mode 100644
index 00000000..669a2f13
--- /dev/null
+++ b/libqpdf/QPDFXRefEntry.cc
@@ -0,0 +1,61 @@
+
+#include <qpdf/QPDFXRefEntry.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QUtil.hh>
+
+QPDFXRefEntry::QPDFXRefEntry() :
+ type(0),
+ field1(0),
+ field2(0)
+{
+}
+
+QPDFXRefEntry::QPDFXRefEntry(int type, int field1, int field2) :
+ type(type),
+ field1(field1),
+ field2(field2)
+{
+ if ((type < 1) || (type > 2))
+ {
+ throw QPDFExc("invalid xref type " + QUtil::int_to_string(type));
+ }
+}
+
+int
+QPDFXRefEntry::getType() const
+{
+ return this->type;
+}
+
+int
+QPDFXRefEntry::getOffset() const
+{
+ if (this->type != 1)
+ {
+ throw QPDFExc(
+ "getOffset called for xref entry of type != 1");
+ }
+ return this->field1;
+}
+
+int
+QPDFXRefEntry::getObjStreamNumber() const
+{
+ if (this->type != 2)
+ {
+ throw QPDFExc(
+ "getObjStreamNumber called for xref entry of type != 2");
+ }
+ return this->field1;
+}
+
+int
+QPDFXRefEntry::getObjStreamIndex() const
+{
+ if (this->type != 2)
+ {
+ throw QPDFExc(
+ "getObjStreamIndex called for xref entry of type != 2");
+ }
+ return this->field2;
+}
diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc
new file mode 100644
index 00000000..d1edbfdd
--- /dev/null
+++ b/libqpdf/QPDF_Array.cc
@@ -0,0 +1,51 @@
+
+#include <qpdf/QPDF_Array.hh>
+
+#include <qpdf/QEXC.hh>
+
+QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& items) :
+ items(items)
+{
+}
+
+QPDF_Array::~QPDF_Array()
+{
+}
+
+std::string
+QPDF_Array::unparse()
+{
+ std::string result = "[ ";
+ for (std::vector<QPDFObjectHandle>::iterator iter = this->items.begin();
+ iter != this->items.end(); ++iter)
+ {
+ result += (*iter).unparse();
+ result += " ";
+ }
+ result += "]";
+ return result;
+}
+
+int
+QPDF_Array::getNItems() const
+{
+ return this->items.size();
+}
+
+QPDFObjectHandle
+QPDF_Array::getItem(int n) const
+{
+ if ((n < 0) || (n >= (int)this->items.size()))
+ {
+ throw QEXC::Internal("bounds array accessing QPDF_Array element");
+ }
+ return this->items[n];
+}
+
+void
+QPDF_Array::setItem(int n, QPDFObjectHandle const& oh)
+{
+ // Call getItem for bounds checking
+ (void) getItem(n);
+ this->items[n] = oh;
+}
diff --git a/libqpdf/QPDF_Bool.cc b/libqpdf/QPDF_Bool.cc
new file mode 100644
index 00000000..2b50c4c2
--- /dev/null
+++ b/libqpdf/QPDF_Bool.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/QPDF_Bool.hh>
+
+QPDF_Bool::QPDF_Bool(bool val) :
+ val(val)
+{
+}
+
+QPDF_Bool::~QPDF_Bool()
+{
+}
+
+std::string
+QPDF_Bool::unparse()
+{
+ return (val ? "true" : "false");
+}
+
+bool
+QPDF_Bool::getVal() const
+{
+ return this->val;
+}
diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc
new file mode 100644
index 00000000..654df688
--- /dev/null
+++ b/libqpdf/QPDF_Dictionary.cc
@@ -0,0 +1,84 @@
+
+#include <qpdf/QPDF_Dictionary.hh>
+
+#include <qpdf/QPDF_Null.hh>
+#include <qpdf/QPDF_Name.hh>
+
+QPDF_Dictionary::QPDF_Dictionary(
+ std::map<std::string, QPDFObjectHandle> const& items) :
+ items(items)
+{
+}
+
+QPDF_Dictionary::~QPDF_Dictionary()
+{
+}
+
+std::string
+QPDF_Dictionary::unparse()
+{
+ std::string result = "<< ";
+ for (std::map<std::string, QPDFObjectHandle>::iterator iter =
+ this->items.begin();
+ iter != this->items.end(); ++iter)
+ {
+ result += QPDF_Name::normalizeName((*iter).first) +
+ " " + (*iter).second.unparse() + " ";
+ }
+ result += ">>";
+ return result;
+}
+
+bool
+QPDF_Dictionary::hasKey(std::string const& key)
+{
+ return ((this->items.count(key) > 0) &&
+ (! this->items[key].isNull()));
+}
+
+QPDFObjectHandle
+QPDF_Dictionary::getKey(std::string const& key)
+{
+ // PDF spec says fetching a non-existent key from a dictionary
+ // returns the null object.
+ if (this->items.count(key))
+ {
+ // May be a null object
+ return (*(this->items.find(key))).second;
+ }
+ else
+ {
+ return QPDFObjectHandle::newNull();
+ }
+}
+
+std::set<std::string>
+QPDF_Dictionary::getKeys()
+{
+ std::set<std::string> result;
+ for (std::map<std::string, QPDFObjectHandle>::const_iterator iter =
+ this->items.begin();
+ iter != this->items.end(); ++iter)
+ {
+ if (hasKey((*iter).first))
+ {
+ result.insert((*iter).first);
+ }
+ }
+ return result;
+}
+
+void
+QPDF_Dictionary::replaceKey(std::string const& key,
+ QPDFObjectHandle const& value)
+{
+ // add or replace value
+ this->items[key] = value;
+}
+
+void
+QPDF_Dictionary::removeKey(std::string const& key)
+{
+ // no-op if key does not exist
+ this->items.erase(key);
+}
diff --git a/libqpdf/QPDF_Integer.cc b/libqpdf/QPDF_Integer.cc
new file mode 100644
index 00000000..988519d0
--- /dev/null
+++ b/libqpdf/QPDF_Integer.cc
@@ -0,0 +1,25 @@
+
+#include <qpdf/QPDF_Integer.hh>
+
+#include <qpdf/QUtil.hh>
+
+QPDF_Integer::QPDF_Integer(int val) :
+ val(val)
+{
+}
+
+QPDF_Integer::~QPDF_Integer()
+{
+}
+
+std::string
+QPDF_Integer::unparse()
+{
+ return QUtil::int_to_string(this->val);
+}
+
+int
+QPDF_Integer::getVal() const
+{
+ return this->val;
+}
diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc
new file mode 100644
index 00000000..f57ced04
--- /dev/null
+++ b/libqpdf/QPDF_Name.cc
@@ -0,0 +1,46 @@
+
+#include <qpdf/QPDF_Name.hh>
+
+QPDF_Name::QPDF_Name(std::string const& name) :
+ name(name)
+{
+}
+
+QPDF_Name::~QPDF_Name()
+{
+}
+
+std::string
+QPDF_Name::normalizeName(std::string const& name)
+{
+ std::string result;
+ char num[4];
+ result += name[0];
+ for (unsigned int i = 1; i < name.length(); ++i)
+ {
+ char ch = name[i];
+ // Don't use locale/ctype here; follow PDF spec guidlines.
+ if (strchr("#()<>[]{}/%", ch) || (ch < 33) || (ch > 126))
+ {
+ sprintf(num, "#%02x", (unsigned char) ch);
+ result += num;
+ }
+ else
+ {
+ result += ch;
+ }
+ }
+ return result;
+}
+
+std::string
+QPDF_Name::unparse()
+{
+ return normalizeName(this->name);
+}
+
+std::string
+QPDF_Name::getName() const
+{
+ return this->name;
+}
diff --git a/libqpdf/QPDF_Null.cc b/libqpdf/QPDF_Null.cc
new file mode 100644
index 00000000..57a78b7e
--- /dev/null
+++ b/libqpdf/QPDF_Null.cc
@@ -0,0 +1,12 @@
+
+#include <qpdf/QPDF_Null.hh>
+
+QPDF_Null::~QPDF_Null()
+{
+}
+
+std::string
+QPDF_Null::unparse()
+{
+ return "null";
+}
diff --git a/libqpdf/QPDF_Real.cc b/libqpdf/QPDF_Real.cc
new file mode 100644
index 00000000..87a19cb2
--- /dev/null
+++ b/libqpdf/QPDF_Real.cc
@@ -0,0 +1,23 @@
+
+#include <qpdf/QPDF_Real.hh>
+
+QPDF_Real::QPDF_Real(std::string const& val) :
+ val(val)
+{
+}
+
+QPDF_Real::~QPDF_Real()
+{
+}
+
+std::string
+QPDF_Real::unparse()
+{
+ return this->val;
+}
+
+std::string
+QPDF_Real::getVal()
+{
+ return this->val;
+}
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
new file mode 100644
index 00000000..9694f837
--- /dev/null
+++ b/libqpdf/QPDF_Stream.cc
@@ -0,0 +1,309 @@
+
+#include <qpdf/QPDF_Stream.hh>
+
+#include <qpdf/QEXC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pipeline.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_PNGFilter.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_ASCII85Decoder.hh>
+#include <qpdf/Pl_ASCIIHexDecoder.hh>
+#include <qpdf/Pl_LZWDecoder.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/Pl_QPDFTokenizer.hh>
+
+QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
+ QPDFObjectHandle stream_dict,
+ off_t offset, int length) :
+ qpdf(qpdf),
+ objid(objid),
+ generation(generation),
+ stream_dict(stream_dict),
+ offset(offset),
+ length(length)
+{
+ if (! stream_dict.isDictionary())
+ {
+ throw QEXC::Internal("stream object instantiated with non-dictionary "
+ "object for dictionary");
+ }
+}
+
+QPDF_Stream::~QPDF_Stream()
+{
+}
+
+std::string
+QPDF_Stream::unparse()
+{
+ // Unparse stream objects as indirect references
+ return QUtil::int_to_string(this->objid) + " " +
+ QUtil::int_to_string(this->generation) + " R";
+}
+
+QPDFObjectHandle
+QPDF_Stream::getDict() const
+{
+ return this->stream_dict;
+}
+
+PointerHolder<Buffer>
+QPDF_Stream::getStreamData()
+{
+ Pl_Buffer buf("stream data buffer");
+ if (! pipeStreamData(&buf, true, false, false))
+ {
+ throw QPDFExc("getStreamData called on unfilterable stream");
+ }
+ return buf.getBuffer();
+}
+
+bool
+QPDF_Stream::filterable(std::vector<std::string>& filters,
+ int& predictor, int& columns,
+ bool& early_code_change)
+{
+ // Initialize values to their defaults as per the PDF spec
+ predictor = 1;
+ columns = 0;
+ early_code_change = true;
+
+ bool filterable = true;
+
+ // See if we can support any decode parameters that are specified.
+
+ QPDFObjectHandle decode_obj =
+ this->stream_dict.getKey("/DecodeParms");
+ if (decode_obj.isNull())
+ {
+ // no problem
+ }
+ else if (decode_obj.isDictionary())
+ {
+ std::set<std::string> keys = decode_obj.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ if (key == "/Predictor")
+ {
+ QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
+ if (predictor_obj.isInteger())
+ {
+ predictor = predictor_obj.getIntValue();
+ if (! ((predictor == 1) || (predictor == 12)))
+ {
+ filterable = false;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ else if (key == "/EarlyChange")
+ {
+ QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
+ if (earlychange_obj.isInteger())
+ {
+ int earlychange = earlychange_obj.getIntValue();
+ early_code_change = (earlychange == 1);
+ if (! ((earlychange == 0) || (earlychange == 1)))
+ {
+ filterable = false;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ else if (key == "/Columns")
+ {
+ QPDFObjectHandle columns_obj = decode_obj.getKey(key);
+ if (columns_obj.isInteger())
+ {
+ columns = columns_obj.getIntValue();
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ else
+ {
+ filterable = false;
+ }
+ }
+ }
+ else
+ {
+ throw QPDFExc(qpdf->getFilename(), this->offset,
+ "invalid decode parameters object type for this stream");
+ }
+
+ if ((predictor > 1) && (columns == 0))
+ {
+ // invalid
+ filterable = false;
+ }
+
+ if (! filterable)
+ {
+ return false;
+ }
+
+ // Check filters
+
+ QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
+ bool filters_okay = true;
+
+ if (filter_obj.isNull())
+ {
+ // No filters
+ }
+ else if (filter_obj.isName())
+ {
+ // One filter
+ filters.push_back(filter_obj.getName());
+ }
+ else if (filter_obj.isArray())
+ {
+ // Potentially multiple filters
+ int n = filter_obj.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ QPDFObjectHandle item = filter_obj.getArrayItem(i);
+ if (item.isName())
+ {
+ filters.push_back(item.getName());
+ }
+ else
+ {
+ filters_okay = false;
+ }
+ }
+ }
+ else
+ {
+ filters_okay = false;
+ }
+
+ if (! filters_okay)
+ {
+ QTC::TC("qpdf", "QPDF_Stream invalid filter");
+ throw QPDFExc(qpdf->getFilename(), this->offset,
+ "invalid filter object type for this stream");
+ }
+
+ // `filters' now contains a list of filters to be applied in
+ // order. See which ones we can support.
+
+ for (std::vector<std::string>::iterator iter = filters.begin();
+ iter != filters.end(); ++iter)
+ {
+ std::string const& filter = *iter;
+ if (! ((filter == "/FlateDecode") ||
+ (filter == "/LZWDecode") ||
+ (filter == "/ASCII85Decode") ||
+ (filter == "/ASCIIHexDecode")))
+ {
+ filterable = false;
+ }
+ }
+
+ return filterable;
+}
+
+bool
+QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
+ bool normalize, bool compress)
+{
+ std::vector<std::string> filters;
+ int predictor = 1;
+ int columns = 0;
+ bool early_code_change = true;
+ if (filter)
+ {
+ filter = filterable(filters, predictor, columns, early_code_change);
+ }
+
+ if (pipeline == 0)
+ {
+ QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
+ return filter;
+ }
+
+ // Construct the pipeline in reverse order. Force pipelines we
+ // create to be deleted when this function finishes.
+ std::vector<PointerHolder<Pipeline> > to_delete;
+
+ if (filter)
+ {
+ if (compress)
+ {
+ pipeline = new Pl_Flate("compress object stream", pipeline,
+ Pl_Flate::a_deflate);
+ to_delete.push_back(pipeline);
+ }
+
+ if (normalize)
+ {
+ pipeline = new Pl_QPDFTokenizer("normalizer", pipeline);
+ to_delete.push_back(pipeline);
+ }
+
+ for (std::vector<std::string>::reverse_iterator iter = filters.rbegin();
+ iter != filters.rend(); ++iter)
+ {
+ std::string const& filter = *iter;
+ if (filter == "/FlateDecode")
+ {
+ if (predictor == 12)
+ {
+ QTC::TC("qpdf", "QPDF_Stream PNG filter");
+ pipeline = new Pl_PNGFilter(
+ "png decode", pipeline, Pl_PNGFilter::a_decode,
+ columns, 0 /* not used */);
+ to_delete.push_back(pipeline);
+ }
+
+ pipeline = new Pl_Flate("stream inflate",
+ pipeline, Pl_Flate::a_inflate);
+ to_delete.push_back(pipeline);
+ }
+ else if (filter == "/ASCII85Decode")
+ {
+ pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
+ to_delete.push_back(pipeline);
+ }
+ else if (filter == "/ASCIIHexDecode")
+ {
+ pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
+ to_delete.push_back(pipeline);
+ }
+ else if (filter == "/LZWDecode")
+ {
+ pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
+ early_code_change);
+ to_delete.push_back(pipeline);
+ }
+ else
+ {
+ throw QEXC::Internal("QPDFStream: unknown filter "
+ "encountered after check");
+ }
+ }
+ }
+
+ QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
+ this->offset, this->length,
+ this->stream_dict, pipeline);
+
+ return filter;
+}
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
new file mode 100644
index 00000000..cc8ca042
--- /dev/null
+++ b/libqpdf/QPDF_String.cc
@@ -0,0 +1,178 @@
+
+#include <qpdf/QPDF_String.hh>
+
+#include <qpdf/QUtil.hh>
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+#include <string.h>
+
+// See above about ctype.
+static bool is_iso_latin1_printable(unsigned char ch)
+{
+ return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
+}
+
+QPDF_String::QPDF_String(std::string const& val) :
+ val(val)
+{
+}
+
+QPDF_String::~QPDF_String()
+{
+}
+
+std::string
+QPDF_String::unparse()
+{
+ return unparse(false);
+}
+
+std::string
+QPDF_String::unparse(bool force_binary)
+{
+ bool use_hexstring = force_binary;
+ if (! use_hexstring)
+ {
+ unsigned int nonprintable = 0;
+ int consecutive_printable = 0;
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ char ch = this->val[i];
+ // Note: do not use locale to determine printability. The PDF
+ // specification accepts arbitrary binary data. Some locales
+ // imply multibyte characters. We'll consider something
+ // printable if it is printable in ISO-Latin-1. We'll code
+ // this manually rather than being rude and setting locale.
+ if ((ch == 0) || (! (is_iso_latin1_printable(ch) ||
+ strchr("\n\r\t\b\f", ch))))
+ {
+ ++nonprintable;
+ consecutive_printable = 0;
+ }
+ else
+ {
+ if (++consecutive_printable > 5)
+ {
+ // If there are more than 5 consecutive printable
+ // characters, I want to see them as such.
+ nonprintable = 0;
+ break;
+ }
+ }
+ }
+
+ // Use hex notation if more than 20% of the characters are not
+ // printable in the current locale. Uniformly distributed random
+ // characters will not pass this test even with ISO-Latin-1 in
+ // which 76% are either printable or in the set of standard
+ // escaped characters.
+ if (5 * nonprintable > val.length())
+ {
+ use_hexstring = true;
+ }
+ }
+ std::string result;
+ if (use_hexstring)
+ {
+ result += "<";
+ char num[3];
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ sprintf(num, "%02x", (unsigned char) this->val[i]);
+ result += num;
+ }
+ result += ">";
+ }
+ else
+ {
+ result += "(";
+ char num[5];
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ char ch = this->val[i];
+ switch (ch)
+ {
+ case '\n':
+ result += "\\n";
+ break;
+
+ case '\r':
+ result += "\\r";
+ break;
+
+ case '\t':
+ result += "\\t";
+ break;
+
+ case '\b':
+ result += "\\b";
+ break;
+
+ case '\f':
+ result += "\\f";
+ break;
+
+ case '(':
+ result += "\\(";
+ break;
+
+ case ')':
+ result += "\\)";
+ break;
+
+ case '\\':
+ result += "\\\\";
+ break;
+
+ default:
+ if (is_iso_latin1_printable(ch))
+ {
+ result += this->val[i];
+ }
+ else
+ {
+ sprintf(num, "\\%03o", (unsigned char)ch);
+ result += num;
+ }
+ break;
+ }
+ }
+ result += ")";
+ }
+
+ return result;
+}
+
+std::string
+QPDF_String::getVal() const
+{
+ return this->val;
+}
+
+std::string
+QPDF_String::getUTF8Val() const
+{
+ std::string result;
+ unsigned int len = this->val.length();
+ if ((len >= 2) && (len % 2 == 0) &&
+ (this->val[0] == '\xfe') && (this->val[1] == '\xff'))
+ {
+ // This is a Unicode string using big-endian UTF-16. This
+ // code is not actually correct as it doesn't properly handle
+ // characters past 0xffff.
+ for (unsigned int i = 2; i < len; i += 2)
+ {
+ result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
+ ((unsigned char) this->val[i+1]));
+ }
+ }
+ else
+ {
+ for (unsigned int i = 0; i < len; ++i)
+ {
+ result += QUtil::toUTF8((unsigned char) this->val[i]);
+ }
+ }
+ return result;
+}
diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc
new file mode 100644
index 00000000..e5e2d8be
--- /dev/null
+++ b/libqpdf/QPDF_encryption.cc
@@ -0,0 +1,441 @@
+// This file implements methods from the QPDF class that involve
+// encryption.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QPDFExc.hh>
+
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pl_RC4.hh>
+#include <qpdf/RC4.hh>
+#include <qpdf/MD5.hh>
+
+static char const padding_string[] = {
+ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
+ 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
+ 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
+ 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a
+};
+
+static unsigned int const O_key_bytes = sizeof(MD5::Digest);
+static unsigned int const id_bytes = 16;
+static unsigned int const key_bytes = 32;
+
+void
+pad_or_truncate_password(std::string const& password, char k1[key_bytes])
+{
+ int password_bytes = std::min(key_bytes, password.length());
+ int pad_bytes = key_bytes - password_bytes;
+ memcpy(k1, password.c_str(), password_bytes);
+ memcpy(k1 + password_bytes, padding_string, pad_bytes);
+}
+
+void
+QPDF::trim_user_password(std::string& user_password)
+{
+ // Although unnecessary, this routine trims the padding string
+ // from the end of a user password. Its only purpose is for
+ // recovery of user passwords which is done in the test suite.
+ char const* cstr = user_password.c_str();
+ size_t len = user_password.length();
+ if (len < key_bytes)
+ {
+ return;
+ }
+
+ char* p = 0;
+ while ((p = strchr(cstr, '\x28')) != 0)
+ {
+ if (memcmp(p, padding_string, len - (p - cstr)) == 0)
+ {
+ user_password = user_password.substr(0, p - cstr);
+ return;
+ }
+ }
+}
+
+static std::string
+pad_or_truncate_password(std::string const& password)
+{
+ char k1[key_bytes];
+ pad_or_truncate_password(password, k1);
+ return std::string(k1, key_bytes);
+}
+
+static void
+iterate_md5_digest(MD5& md5, MD5::Digest& digest, int iterations)
+{
+ md5.digest(digest);
+
+ for (int i = 0; i < iterations; ++i)
+ {
+ MD5 m;
+ m.encodeDataIncrementally((char*)digest, sizeof(digest));
+ m.digest(digest);
+ }
+}
+
+
+static void
+iterate_rc4(unsigned char* data, int data_len,
+ unsigned char* okey, int key_len,
+ int iterations, bool reverse)
+{
+ unsigned char* key = new unsigned char[key_len];
+ for (int i = 0; i < iterations; ++i)
+ {
+ int const xor_value = (reverse ? iterations - 1 - i : i);
+ for (int j = 0; j < key_len; ++j)
+ {
+ key[j] = okey[j] ^ xor_value;
+ }
+ RC4 rc4(key, key_len);
+ rc4.process(data, data_len);
+ }
+ delete [] key;
+}
+
+std::string
+QPDF::compute_data_key(std::string const& encryption_key,
+ int objid, int generation)
+{
+ // Algorithm 3.1 from the PDF 1.4 Reference Manual
+
+ std::string result = encryption_key;
+
+ // Append low three bytes of object ID and low two bytes of generation
+ result += (char) (objid & 0xff);
+ result += (char) ((objid >> 8) & 0xff);
+ result += (char) ((objid >> 16) & 0xff);
+ result += (char) (generation & 0xff);
+ result += (char) ((generation >> 8) & 0xff);
+
+ MD5 md5;
+ md5.encodeDataIncrementally(result.c_str(), result.length());
+ MD5::Digest digest;
+ md5.digest(digest);
+ return std::string((char*) digest,
+ std::min(result.length(), (size_t) 16));
+}
+
+std::string
+QPDF::compute_encryption_key(
+ std::string const& password, EncryptionData const& data)
+{
+ // Algorithm 3.2 from the PDF 1.4 Reference Manual
+
+ MD5 md5;
+ md5.encodeDataIncrementally(
+ pad_or_truncate_password(password).c_str(), key_bytes);
+ md5.encodeDataIncrementally(data.O.c_str(), key_bytes);
+ char pbytes[4];
+ pbytes[0] = (char) (data.P & 0xff);
+ pbytes[1] = (char) ((data.P >> 8) & 0xff);
+ pbytes[2] = (char) ((data.P >> 16) & 0xff);
+ pbytes[3] = (char) ((data.P >> 24) & 0xff);
+ md5.encodeDataIncrementally(pbytes, 4);
+ md5.encodeDataIncrementally(data.id1.c_str(), id_bytes);
+ MD5::Digest digest;
+ iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0));
+ return std::string((char*)digest, data.Length_bytes);
+}
+
+static void
+compute_O_rc4_key(std::string const& user_password,
+ std::string const& owner_password,
+ QPDF::EncryptionData const& data,
+ unsigned char key[O_key_bytes])
+{
+ std::string password = owner_password;
+ if (password.empty())
+ {
+ password = user_password;
+ }
+ MD5 md5;
+ md5.encodeDataIncrementally(
+ pad_or_truncate_password(password).c_str(), key_bytes);
+ MD5::Digest digest;
+ iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0));
+ memcpy(key, digest, O_key_bytes);
+}
+
+static std::string
+compute_O_value(std::string const& user_password,
+ std::string const& owner_password,
+ QPDF::EncryptionData const& data)
+{
+ // Algorithm 3.3 from the PDF 1.4 Reference Manual
+
+ unsigned char O_key[O_key_bytes];
+ compute_O_rc4_key(user_password, owner_password, data, O_key);
+
+ char upass[key_bytes];
+ pad_or_truncate_password(user_password, upass);
+ iterate_rc4((unsigned char*) upass, key_bytes,
+ O_key, data.Length_bytes, (data.R == 3) ? 20 : 1, false);
+ return std::string(upass, key_bytes);
+}
+
+static
+std::string
+compute_U_value_R2(std::string const& user_password,
+ QPDF::EncryptionData const& data)
+{
+ // Algorithm 3.4 from the PDF 1.4 Reference Manual
+
+ std::string k1 = QPDF::compute_encryption_key(user_password, data);
+ char udata[key_bytes];
+ pad_or_truncate_password("", udata);
+ iterate_rc4((unsigned char*) udata, key_bytes,
+ (unsigned char*)k1.c_str(), data.Length_bytes, 1, false);
+ return std::string(udata, key_bytes);
+}
+
+static
+std::string
+compute_U_value_R3(std::string const& user_password,
+ QPDF::EncryptionData const& data)
+{
+ // Algorithm 3.5 from the PDF 1.4 Reference Manual
+
+ std::string k1 = QPDF::compute_encryption_key(user_password, data);
+ MD5 md5;
+ md5.encodeDataIncrementally(
+ pad_or_truncate_password("").c_str(), key_bytes);
+ md5.encodeDataIncrementally(data.id1.c_str(), data.id1.length());
+ MD5::Digest digest;
+ md5.digest(digest);
+ iterate_rc4(digest, sizeof(MD5::Digest),
+ (unsigned char*) k1.c_str(), data.Length_bytes, 20, false);
+ char result[key_bytes];
+ memcpy(result, digest, sizeof(MD5::Digest));
+ // pad with arbitrary data -- make it consistent for the sake of
+ // testing
+ for (unsigned int i = sizeof(MD5::Digest); i < key_bytes; ++i)
+ {
+ result[i] = (char)((i * i) % 0xff);
+ }
+ return std::string(result, key_bytes);
+}
+
+static std::string
+compute_U_value(std::string const& user_password,
+ QPDF::EncryptionData const& data)
+{
+ if (data.R == 3)
+ {
+ return compute_U_value_R3(user_password, data);
+ }
+
+ return compute_U_value_R2(user_password, data);
+}
+
+static bool
+check_user_password(std::string const& user_password,
+ QPDF::EncryptionData const& data)
+{
+ // Algorithm 3.6 from the PDF 1.4 Reference Manual
+
+ std::string u_value = compute_U_value(user_password, data);
+ int to_compare = ((data.R == 3) ? sizeof(MD5::Digest) : key_bytes);
+ return (memcmp(data.U.c_str(), u_value.c_str(), to_compare) == 0);
+}
+
+static bool
+check_owner_password(std::string& user_password,
+ std::string const& owner_password,
+ QPDF::EncryptionData const& data)
+{
+ // Algorithm 3.7 from the PDF 1.4 Reference Manual
+
+ unsigned char key[O_key_bytes];
+ compute_O_rc4_key(user_password, owner_password, data, key);
+ unsigned char O_data[key_bytes];
+ memcpy(O_data, (unsigned char*) data.O.c_str(), key_bytes);
+ iterate_rc4(O_data, key_bytes, key, data.Length_bytes,
+ (data.R == 3) ? 20 : 1, true);
+ std::string new_user_password =
+ std::string((char*)O_data, key_bytes);
+ bool result = false;
+ if (check_user_password(new_user_password, data))
+ {
+ result = true;
+ user_password = new_user_password;
+ }
+ return result;
+}
+
+void
+QPDF::initializeEncryption()
+{
+ if (this->encryption_initialized)
+ {
+ return;
+ }
+ this->encryption_initialized = true;
+
+ // After we initialize encryption parameters, we must used stored
+ // key information and never look at /Encrypt again. Otherwise,
+ // things could go wrong if someone mutates the encryption
+ // dictionary.
+
+ if (! this->trailer.hasKey("/Encrypt"))
+ {
+ return;
+ }
+
+ QPDFObjectHandle id_obj = this->trailer.getKey("/ID");
+ if (! (id_obj.isArray() &&
+ (id_obj.getArrayNItems() == 2) &&
+ id_obj.getArrayItem(0).isString()))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "invalid /ID in trailer dictionary");
+ }
+
+ std::string id1 = id_obj.getArrayItem(0).getStringValue();
+ if (id1.length() != id_bytes)
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "first /ID string in trailer dictionary has "
+ "incorrect length");
+ }
+
+ QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt");
+ if (! encryption_dict.isDictionary())
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "/Encrypt in trailer dictionary is not a dictionary");
+ }
+
+ if (! (encryption_dict.getKey("/Filter").isName() &&
+ (encryption_dict.getKey("/Filter").getName() == "/Standard")))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "unsupported encryption filter");
+ }
+
+ if (! (encryption_dict.getKey("/V").isInteger() &&
+ encryption_dict.getKey("/R").isInteger() &&
+ encryption_dict.getKey("/O").isString() &&
+ encryption_dict.getKey("/U").isString() &&
+ encryption_dict.getKey("/P").isInteger()))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "some encryption dictionary parameters are missing "
+ "or the wrong type");
+ }
+
+ int V = encryption_dict.getKey("/V").getIntValue();
+ int R = encryption_dict.getKey("/R").getIntValue();
+ std::string O = encryption_dict.getKey("/O").getStringValue();
+ std::string U = encryption_dict.getKey("/U").getStringValue();
+ unsigned int P = (unsigned int) encryption_dict.getKey("/P").getIntValue();
+
+ if (! (((R == 2) || (R == 3)) &&
+ ((V == 1) || (V == 2))))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "Unsupported /R or /V in encryption dictionary");
+ }
+
+ if (! ((O.length() == key_bytes) && (U.length() == key_bytes)))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "incorrect length for /O and/or /P in "
+ "encryption dictionary");
+ }
+
+ int Length = 40;
+ if (encryption_dict.getKey("/Length").isInteger())
+ {
+ Length = encryption_dict.getKey("/Length").getIntValue();
+ if ((Length % 8) || (Length < 40) || (Length > 128))
+ {
+ throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
+ "invalid /Length value in encryption dictionary");
+ }
+ }
+
+ EncryptionData data(V, R, Length / 8, P, O, U, id1);
+ if (check_owner_password(this->user_password, this->provided_password, data))
+ {
+ // password supplied was owner password; user_password has
+ // been initialized
+ }
+ else if (check_user_password(this->provided_password, data))
+ {
+ this->user_password = this->provided_password;
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName() + ": invalid password");
+ }
+
+ this->encrypted = true;
+ this->encryption_key = compute_encryption_key(this->user_password, data);
+}
+
+std::string
+QPDF::getKeyForObject(int objid, int generation)
+{
+ if (! this->encrypted)
+ {
+ throw QEXC::Internal("request for encryption key in non-encrypted PDF");
+ }
+
+ if (! ((objid == this->cached_key_objid) &&
+ (generation == this->cached_key_generation)))
+ {
+ this->cached_object_encryption_key =
+ compute_data_key(this->encryption_key, objid, generation);
+ this->cached_key_objid = objid;
+ this->cached_key_generation = generation;
+ }
+
+ return this->cached_object_encryption_key;
+}
+
+void
+QPDF::decryptString(std::string& str, int objid, int generation)
+{
+ if (objid == 0)
+ {
+ return;
+ }
+ std::string key = getKeyForObject(objid, generation);
+ char* tmp = QUtil::copy_string(str);
+ unsigned int vlen = str.length();
+ RC4 rc4((unsigned char const*)key.c_str(), key.length());
+ rc4.process((unsigned char*)tmp, vlen);
+ str = std::string(tmp, vlen);
+ delete [] tmp;
+}
+
+void
+QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
+ std::vector<PointerHolder<Pipeline> >& heap)
+{
+ std::string key = getKeyForObject(objid, generation);
+ pipeline = new Pl_RC4("stream decryption", pipeline,
+ (unsigned char*) key.c_str(), key.length());
+ heap.push_back(pipeline);
+}
+
+void
+QPDF::compute_encryption_O_U(
+ char const* user_password, char const* owner_password,
+ int V, int R, int key_len, unsigned long P,
+ std::string const& id1, std::string& O, std::string& U)
+{
+ EncryptionData data(V, R, key_len, P, "", "", id1);
+ data.O = compute_O_value(user_password, owner_password, data);
+ O = data.O;
+ U = compute_U_value(user_password, data);
+}
+
+std::string const&
+QPDF::getUserPassword() const
+{
+ return this->user_password;
+}
diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc
new file mode 100644
index 00000000..6c0cf3be
--- /dev/null
+++ b/libqpdf/QPDF_linearization.cc
@@ -0,0 +1,2103 @@
+// See doc/linearization.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QTC.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/PCRE.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/Pl_Flate.hh>
+#include <qpdf/Pl_Count.hh>
+#include <qpdf/BitWriter.hh>
+#include <qpdf/BitStream.hh>
+
+#include <iostream>
+#include <algorithm>
+#include <assert.h>
+#include <math.h>
+
+template <class T>
+static void
+load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
+ int bits_wanted, int T::*field)
+{
+ // nitems times, read bits_wanted from the given bit stream,
+ // storing results in the ith vector entry.
+
+ for (int i = 0; i < nitems; ++i)
+ {
+ vec[i].*field = bit_stream.getBits(bits_wanted);
+ }
+ // The PDF spec says that each hint table starts at a byte
+ // boundary. Each "row" actually must start on a byte boundary.
+ bit_stream.skipToNextByte();
+}
+
+template <class T>
+static void
+load_vector_vector(BitStream& bit_stream,
+ int nitems1, std::vector<T>& vec1, int T::*nitems2,
+ int bits_wanted, std::vector<int> T::*vec2)
+{
+ // nitems1 times, read nitems2 (from the ith element of vec1) items
+ // into the vec2 vector field of the ith item of vec1.
+ for (int i1 = 0; i1 < nitems1; ++i1)
+ {
+ for (int i2 = 0; i2 < vec1[i1].*nitems2; ++i2)
+ {
+ (vec1[i1].*vec2).push_back(bit_stream.getBits(bits_wanted));
+ }
+ }
+ bit_stream.skipToNextByte();
+}
+
+bool
+QPDF::checkLinearization()
+{
+ bool result = false;
+ try
+ {
+ readLinearizationData();
+ result = checkLinearizationInternal();
+ }
+ catch (QPDFExc& e)
+ {
+ std::cout << e.what() << std::endl;
+ }
+ return result;
+}
+
+bool
+QPDF::isLinearized()
+{
+ // If the first object in the file is a dictionary with a suitable
+ // /Linearized key and has an /L key that accurately indicates the
+ // file size, initialize this->lindict and return true.
+
+ // A linearized PDF spec's first object will be contained within
+ // the first 1024 bytes of the file and will be a dictionary with
+ // a valid /Linearized key. This routine looks for that and does
+ // no additional validation.
+
+ // The PDF spec says the linearization dictionary must be
+ // completely contained within the first 1024 bytes of the file.
+ // Add a byte for a null terminator.
+ static int const tbuf_size = 1025;
+
+ char* buf = new char[tbuf_size];
+ this->file.seek(0, SEEK_SET);
+ PointerHolder<char> b(buf); // guarantee deletion
+ memset(buf, '\0', tbuf_size);
+ this->file.read(buf, tbuf_size - 1);
+
+ static PCRE lindict_re("(?s:(\\d+)\\s+0\\s+obj\\s*<<)");
+
+ off_t offset = -1;
+ int lindict_obj = 0;
+ char* p = buf;
+ while (lindict_obj == 0)
+ {
+ PCRE::Match m(lindict_re.match(p));
+ if (m)
+ {
+ offset = m.getOffset(0) + (p - buf);
+ lindict_obj = atoi(m.getMatch(1).c_str());
+ if (m.getMatch(0).find('\n') != std::string::npos)
+ {
+ QTC::TC("qpdf", "QPDF lindict found newline");
+ }
+ }
+ else
+ {
+ if ((p = (char*)memchr(p, '\0', tbuf_size - (p - buf))) != 0)
+ {
+ QTC::TC("qpdf", "QPDF lindict null found");
+ while ((p - buf < tbuf_size) && (*p == 0))
+ {
+ ++p;
+ }
+ if ((p - buf) == tbuf_size)
+ {
+ break;
+ }
+ QTC::TC("qpdf", "QPDF lindict searching after null");
+ }
+ }
+ }
+
+ if (lindict_obj == 0)
+ {
+ return false;
+ }
+
+ QPDFObjectHandle candidate = QPDFObjectHandle::Factory::newIndirect(
+ this, lindict_obj, 0);
+ if (! candidate.isDictionary())
+ {
+ return false;
+ }
+
+ QPDFObjectHandle linkey = candidate.getKey("/Linearized");
+ if (! (linkey.isNumber() && ((int)floor(linkey.getNumericValue()) == 1)))
+ {
+ return false;
+ }
+
+ QPDFObjectHandle L = candidate.getKey("/L");
+ if (L.isInteger())
+ {
+ int Li = L.getIntValue();
+ this->file.seek(0, SEEK_END);
+ if (Li != this->file.tell())
+ {
+ QTC::TC("qpdf", "QPDF /L mismatch");
+ return false;
+ }
+ else
+ {
+ this->linp.file_size = Li;
+ }
+ }
+
+ this->lindict = candidate;
+
+ return true;
+}
+
+void
+QPDF::readLinearizationData()
+{
+ // This function throws an exception (which is trapped by
+ // checkLinearization()) for any errors that prevent loading.
+
+ // Hint table parsing code needs at least 32 bits in a long.
+ assert(sizeof(long) >= 4);
+
+ if (! isLinearized())
+ {
+ throw QPDFExc(this->file.getName() + " is not linearized");
+ }
+
+ // /L is read and stored in linp by isLinearized()
+ QPDFObjectHandle H = lindict.getKey("/H");
+ QPDFObjectHandle O = lindict.getKey("/O");
+ QPDFObjectHandle E = lindict.getKey("/E");
+ QPDFObjectHandle N = lindict.getKey("/N");
+ QPDFObjectHandle T = lindict.getKey("/T");
+ QPDFObjectHandle P = lindict.getKey("/P");
+
+ if (! (H.isArray() &&
+ O.isInteger() &&
+ E.isInteger() &&
+ N.isInteger() &&
+ T.isInteger() &&
+ (P.isInteger() || P.isNull())))
+ {
+ throw QPDFExc("some keys in linearization dictionary are of "
+ "the wrong type");
+ }
+
+ // Hint table array: offset length [ offset length ]
+ unsigned int n_H_items = H.getArrayNItems();
+ if (! ((n_H_items == 2) || (n_H_items == 4)))
+ {
+ throw QPDFExc("H has the wrong number of items");
+ }
+
+ std::vector<int> H_items;
+ for (unsigned int i = 0; i < n_H_items; ++i)
+ {
+ QPDFObjectHandle oh(H.getArrayItem(i));
+ if (oh.isInteger())
+ {
+ H_items.push_back(oh.getIntValue());
+ }
+ else
+ {
+ throw QPDFExc("some H items are of the wrong type");
+ }
+ }
+
+ // H: hint table offset/length for primary and overflow hint tables
+ int H0_offset = H_items[0];
+ int H0_length = H_items[1];
+ int H1_offset = 0;
+ int H1_length = 0;
+ if (H_items.size() == 4)
+ {
+ // Acrobat doesn't read or write these (as PDF 1.4), so we
+ // don't have a way to generate a test case.
+ // QTC::TC("qpdf", "QPDF overflow hint table");
+ H1_offset = H_items[2];
+ H1_length = H_items[3];
+ }
+
+ // P: first page number
+ int first_page = 0;
+ if (P.isInteger())
+ {
+ QTC::TC("qpdf", "QPDF P present in lindict");
+ first_page = P.getIntValue();
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF P absent in lindict");
+ }
+
+ // Store linearization parameter data
+
+ // file_size initialized by isLinearized()
+ this->linp.first_page_object = O.getIntValue();
+ this->linp.first_page_end = E.getIntValue();
+ this->linp.npages = N.getIntValue();
+ this->linp.xref_zero_offset = T.getIntValue();
+ this->linp.first_page = first_page;
+ this->linp.H_offset = H0_offset;
+ this->linp.H_length = H0_length;
+
+ // Read hint streams
+
+ Pl_Buffer pb("hint buffer");
+ QPDFObjectHandle H0 = readHintStream(pb, H0_offset, H0_length);
+ if (H1_offset)
+ {
+ (void) readHintStream(pb, H1_offset, H1_length);
+ }
+
+ // PDF 1.4 hint tables that we ignore:
+
+ // /T thumbnail
+ // /A thread information
+ // /E named destination
+ // /V interactive form
+ // /I information dictionary
+ // /C logical structure
+ // /L page label
+
+ // Individual hint table offsets
+ QPDFObjectHandle HS = H0.getKey("/S"); // shared object
+ QPDFObjectHandle HO = H0.getKey("/O"); // outline
+
+ PointerHolder<Buffer> hbp = pb.getBuffer();
+ Buffer* hb = hbp.getPointer();
+ unsigned char const* h_buf = hb->getBuffer();
+ int h_size = hb->getSize();
+
+ readHPageOffset(BitStream(h_buf, h_size));
+
+ int HSi = HS.getIntValue();
+ readHSharedObject(BitStream(h_buf + HSi, h_size - HSi));
+
+ if (HO.isInteger())
+ {
+ int HOi = HO.getIntValue();
+ readHGeneric(BitStream(h_buf + HOi, h_size - HOi),
+ this->outline_hints);
+ }
+}
+
+QPDFObjectHandle
+QPDF::readHintStream(Pipeline& pl, off_t offset, size_t length)
+{
+ int obj;
+ int gen;
+ QPDFObjectHandle H = readObjectAtOffset(offset, 0, 0, obj, gen);
+ ObjCache& oc = this->obj_cache[ObjGen(obj, gen)];
+ off_t min_end_offset = oc.end_before_space;
+ off_t max_end_offset = oc.end_after_space;
+ if (! H.isStream())
+ {
+ throw QPDFExc("hint table is not a stream");
+ }
+
+ QPDFObjectHandle Hdict = H.getDict();
+
+ // Some versions of Acrobat make /Length indirect and place it
+ // immediately after the stream, increasing length to cover it,
+ // even though the specification says all objects in the
+ // linearization parameter dictionary must be direct. We have to
+ // get the file position of the end of length in this case.
+ QPDFObjectHandle length_obj = Hdict.getKey("/Length");
+ if (length_obj.isIndirect())
+ {
+ QTC::TC("qpdf", "QPDF hint table length indirect");
+ // Force resolution
+ (void) length_obj.getIntValue();
+ ObjCache& oc = this->obj_cache
+ [ObjGen(length_obj.getObjectID(),
+ length_obj.getGeneration())];
+ min_end_offset = oc.end_before_space;
+ max_end_offset = oc.end_after_space;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF hint table length direct");
+ }
+ off_t computed_end = offset + length;
+ if ((computed_end < min_end_offset) ||
+ (computed_end > max_end_offset))
+ {
+ std::cout << "expected = " << computed_end
+ << "; actual = " << min_end_offset << ".."
+ << max_end_offset << std::endl;
+ throw QPDFExc("hint table length mismatch");
+ }
+ H.pipeStreamData(&pl, true, false, false);
+ return Hdict;
+}
+
+void
+QPDF::readHPageOffset(BitStream h)
+{
+ // All comments referring to the PDF spec refer to the spec for
+ // version 1.4.
+
+ HPageOffset& t = this->page_offset_hints;
+
+ t.min_nobjects = h.getBits(32); // 1
+ t.first_page_offset = h.getBits(32); // 2
+ t.nbits_delta_nobjects = h.getBits(16); // 3
+ t.min_page_length = h.getBits(32); // 4
+ t.nbits_delta_page_length = h.getBits(16); // 5
+ t.min_content_offset = h.getBits(32); // 6
+ t.nbits_delta_content_offset = h.getBits(16); // 7
+ t.min_content_length = h.getBits(32); // 8
+ t.nbits_delta_content_length = h.getBits(16); // 9
+ t.nbits_nshared_objects = h.getBits(16); // 10
+ t.nbits_shared_identifier = h.getBits(16); // 11
+ t.nbits_shared_numerator = h.getBits(16); // 12
+ t.shared_denominator = h.getBits(16); // 13
+
+ unsigned int nitems = this->linp.npages;
+ std::vector<HPageOffsetEntry>& entries = t.entries;
+ entries = std::vector<HPageOffsetEntry>(nitems);
+
+ load_vector_int(h, nitems, entries,
+ t.nbits_delta_nobjects,
+ &HPageOffsetEntry::delta_nobjects);
+ load_vector_int(h, nitems, entries,
+ t.nbits_delta_page_length,
+ &HPageOffsetEntry::delta_page_length);
+ load_vector_int(h, nitems, entries,
+ t.nbits_nshared_objects,
+ &HPageOffsetEntry::nshared_objects);
+ load_vector_vector(h, nitems, entries,
+ &HPageOffsetEntry::nshared_objects,
+ t.nbits_shared_identifier,
+ &HPageOffsetEntry::shared_identifiers);
+ load_vector_vector(h, nitems, entries,
+ &HPageOffsetEntry::nshared_objects,
+ t.nbits_shared_numerator,
+ &HPageOffsetEntry::shared_numerators);
+ load_vector_int(h, nitems, entries,
+ t.nbits_delta_content_offset,
+ &HPageOffsetEntry::delta_content_offset);
+ load_vector_int(h, nitems, entries,
+ t.nbits_delta_content_length,
+ &HPageOffsetEntry::delta_content_length);
+}
+
+void
+QPDF::readHSharedObject(BitStream h)
+{
+ HSharedObject& t = this->shared_object_hints;
+
+ t.first_shared_obj = h.getBits(32); // 1
+ t.first_shared_offset = h.getBits(32); // 2
+ t.nshared_first_page = h.getBits(32); // 3
+ t.nshared_total = h.getBits(32); // 4
+ t.nbits_nobjects = h.getBits(16); // 5
+ t.min_group_length = h.getBits(32); // 6
+ t.nbits_delta_group_length = h.getBits(16); // 7
+
+ QTC::TC("qpdf", "QPDF lin nshared_total > nshared_first_page",
+ (t.nshared_total > t.nshared_first_page) ? 1 : 0);
+
+ int nitems = t.nshared_total;
+ std::vector<HSharedObjectEntry>& entries = t.entries;
+ entries = std::vector<HSharedObjectEntry>(nitems);
+
+ load_vector_int(h, nitems, entries,
+ t.nbits_delta_group_length,
+ &HSharedObjectEntry::delta_group_length);
+ load_vector_int(h, nitems, entries,
+ 1, &HSharedObjectEntry::signature_present);
+ for (int i = 0; i < nitems; ++i)
+ {
+ if (entries[i].signature_present)
+ {
+ // Skip 128-bit MD5 hash. These are not supported by
+ // acrobat, so they should probably never be there. We
+ // have no test case for this.
+ for (int j = 0; j < 4; ++j)
+ {
+ (void) h.getBits(32);
+ }
+ }
+ }
+ load_vector_int(h, nitems, entries,
+ t.nbits_nobjects,
+ &HSharedObjectEntry::nobjects_minus_one);
+}
+
+void
+QPDF::readHGeneric(BitStream h, HGeneric& t)
+{
+ t.first_object = h.getBits(32); // 1
+ t.first_object_offset = h.getBits(32); // 2
+ t.nobjects = h.getBits(32); // 3
+ t.group_length = h.getBits(32); // 4
+}
+
+bool
+QPDF::checkLinearizationInternal()
+{
+ // All comments referring to the PDF spec refer to the spec for
+ // version 1.4.
+
+ std::list<std::string> errors;
+ std::list<std::string> warnings;
+
+ // Check all values in linearization parameter dictionary
+
+ LinParameters& p = this->linp;
+
+ // L: file size in bytes -- checked by isLinearized
+
+ // O: object number of first page
+ std::vector<QPDFObjectHandle> const& pages = getAllPages();
+ if (p.first_page_object != pages[0].getObjectID())
+ {
+ QTC::TC("qpdf", "QPDF err /O mismatch");
+ errors.push_back("first page object (/O) mismatch");
+ }
+
+ // N: number of pages
+ int npages = pages.size();
+ if (p.npages != npages)
+ {
+ // Not tested in the test suite
+ errors.push_back("page count (/N) mismatch");
+ }
+
+ for (int i = 0; i < npages; ++i)
+ {
+ QPDFObjectHandle const& page = pages[i];
+ ObjGen og(page.getObjectID(), page.getGeneration());
+ if (this->xref_table[og].getType() == 2)
+ {
+ errors.push_back("page dictionary for page " +
+ QUtil::int_to_string(i) + " is compressed");
+ }
+ }
+
+ // T: offset of whitespace character preceding xref entry for object 0
+ this->file.seek(p.xref_zero_offset, SEEK_SET);
+ while (1)
+ {
+ char ch;
+ this->file.read(&ch, 1);
+ if (! ((ch == ' ') || (ch == '\r') || (ch == '\n')))
+ {
+ this->file.seek(-1, SEEK_CUR);
+ break;
+ }
+ }
+ if (this->file.tell() != this->first_xref_item_offset)
+ {
+ QTC::TC("qpdf", "QPDF err /T mismatch");
+ errors.push_back("space before first xref item (/T) mismatch "
+ "(computed = " +
+ QUtil::int_to_string(this->first_xref_item_offset) +
+ "; file = " + QUtil::int_to_string(this->file.tell()));
+ }
+
+ // P: first page number -- Implementation note 124 says Acrobat
+ // ignores this value, so we will too.
+
+ // Check numbering of compressed objects in each xref section.
+ // For linearized files, all compressed objects are supposed to be
+ // at the end of the containing xref section if any object streams
+ // are in use.
+
+ if (this->uncompressed_after_compressed)
+ {
+ errors.push_back("linearized file contains an uncompressed object"
+ " after a compressed one in a cross-reference stream");
+ }
+
+ // Further checking requires optimization and order calculation.
+ // Don't allow optimization to make changes. If it has to, then
+ // the file is not properly linearized. We use the xref table to
+ // figure out which objects are compressed and which are
+ // uncompressed.
+ { // local scope
+ std::map<int, int> object_stream_data;
+ for (std::map<ObjGen, QPDFXRefEntry>::const_iterator iter =
+ this->xref_table.begin();
+ iter != this->xref_table.end(); ++iter)
+ {
+ ObjGen const& og = (*iter).first;
+ QPDFXRefEntry const& entry = (*iter).second;
+ if (entry.getType() == 2)
+ {
+ object_stream_data[og.obj] = entry.getObjStreamNumber();
+ }
+ }
+ optimize(object_stream_data, false);
+ calculateLinearizationData(object_stream_data);
+ }
+
+ // E: offset of end of first page -- Implementation note 123 says
+ // Acrobat includes on extra object here by mistake. pdlin fails
+ // to place thumbnail images in section 9, so when thumbnails are
+ // present, it also gets the wrong value for /E. It also doesn't
+ // count outlines here when it should even though it places them
+ // in part 6. This code fails to put thread information
+ // dictionaries in part 9, so it actually gets the wrong value for
+ // E when threads are present. In that case, it would probably
+ // agree with pdlin. As of this writing, the test suite doesn't
+ // contain any files with threads.
+
+ assert(! this->part6.empty());
+ int min_E = -1;
+ int max_E = -1;
+ for (std::vector<QPDFObjectHandle>::iterator iter = this->part6.begin();
+ iter != this->part6.end(); ++iter)
+ {
+ ObjGen og((*iter).getObjectID(), (*iter).getGeneration());
+ // All objects have to have been dereferenced to be classified.
+ assert(this->obj_cache.count(og) > 0);
+ ObjCache const& oc = this->obj_cache[og];
+ min_E = std::max(min_E, (int)oc.end_before_space);
+ max_E = std::max(max_E, (int)oc.end_after_space);
+ }
+ if ((p.first_page_end < min_E) || (p.first_page_end > max_E))
+ {
+ QTC::TC("qpdf", "QPDF warn /E mismatch");
+ warnings.push_back("end of first page section (/E) mismatch: /E = " +
+ QUtil::int_to_string(p.first_page_end) +
+ "; computed = " +
+ QUtil::int_to_string(min_E) + ".." +
+ QUtil::int_to_string(max_E));
+ }
+
+ // Check hint tables
+
+ std::map<int, int> shared_idx_to_obj;
+ checkHSharedObject(errors, warnings, pages, shared_idx_to_obj);
+ checkHPageOffset(errors, warnings, pages, shared_idx_to_obj);
+ checkHOutlines(warnings);
+
+ // Report errors
+
+ bool result = true;
+
+ if (! errors.empty())
+ {
+ result = false;
+ for (std::list<std::string>::iterator iter = errors.begin();
+ iter != errors.end(); ++iter)
+ {
+ std::cout << "ERROR: " << (*iter) << std::endl;
+ }
+ }
+
+ if (! warnings.empty())
+ {
+ result = false;
+ for (std::list<std::string>::iterator iter = warnings.begin();
+ iter != warnings.end(); ++iter)
+ {
+ std::cout << "WARNING: " << (*iter) << std::endl;
+ }
+ }
+
+ return result;
+}
+
+int
+QPDF::maxEnd(ObjUser const& ou)
+{
+ assert(this->obj_user_to_objects.count(ou) > 0);
+ std::set<ObjGen> const& ogs = this->obj_user_to_objects[ou];
+ int end = 0;
+ for (std::set<ObjGen>::iterator iter = ogs.begin();
+ iter != ogs.end(); ++iter)
+ {
+ ObjGen const& og = *iter;
+ assert(this->obj_cache.count(og) > 0);
+ end = std::max(
+ end, (int)(this->obj_cache[og].end_after_space));
+ }
+ return end;
+}
+
+int
+QPDF::getLinearizationOffset(ObjGen const& og)
+{
+ QPDFXRefEntry entry = this->xref_table[og];
+ int result = 0;
+ switch (entry.getType())
+ {
+ case 1:
+ result = entry.getOffset();
+ break;
+
+ case 2:
+ // For compressed objects, return the offset of the object
+ // stream that contains them.
+ result = getLinearizationOffset(ObjGen(entry.getObjStreamNumber(), 0));
+ break;
+
+ default:
+ throw QPDFExc(
+ this->file.getName(), 0,
+ "getLinearizationOffset called for xref entry not of type 1 or 2");
+ break;
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDF::getUncompressedObject(QPDFObjectHandle& obj,
+ std::map<int, int> const& object_stream_data)
+{
+ if (obj.isNull() || (object_stream_data.count(obj.getObjectID()) == 0))
+ {
+ return obj;
+ }
+ else
+ {
+ int repl = (*(object_stream_data.find(obj.getObjectID()))).second;
+ return objGenToIndirect(ObjGen(repl, 0));
+ }
+}
+
+int
+QPDF::lengthNextN(int first_object, int n,
+ std::list<std::string>& errors)
+{
+ int length = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ ObjGen og(first_object + i, 0);
+ if (this->xref_table.count(og) == 0)
+ {
+ errors.push_back(
+ "no xref table entry for " +
+ QUtil::int_to_string(first_object + i) + " 0");
+ }
+ else
+ {
+ assert(this->obj_cache.count(og) > 0);
+ length += this->obj_cache[og].end_after_space -
+ getLinearizationOffset(og);
+ }
+ }
+ return length;
+}
+
+void
+QPDF::checkHPageOffset(std::list<std::string>& errors,
+ std::list<std::string>& warnings,
+ std::vector<QPDFObjectHandle> const& pages,
+ std::map<int, int>& shared_idx_to_obj)
+{
+ // Implementation note 126 says Acrobat always sets
+ // delta_content_offset and delta_content_length in the page
+ // offset header dictionary to 0. It also states that
+ // min_content_offset in the per-page information is always 0,
+ // which is an incorrect value.
+
+ // Implementation note 127 explains that Acrobat always sets item
+ // 8 (min_content_length) to zero, item 9
+ // (nbits_delta_content_length) to the value of item 5
+ // (nbits_delta_page_length), and item 7 of each per-page hint
+ // table (delta_content_length) to item 2 (delta_page_length) of
+ // that entry. Acrobat ignores these values when reading files.
+
+ // Empirically, it also seems that Acrobat sometimes puts items
+ // under a page's /Resources dictionary in with shared objects
+ // even when they are private.
+
+ unsigned int npages = pages.size();
+ int table_offset = adjusted_offset(
+ this->page_offset_hints.first_page_offset);
+ ObjGen first_page_og(pages[0].getObjectID(), pages[0].getGeneration());
+ assert(this->xref_table.count(first_page_og) > 0);
+ int offset = getLinearizationOffset(first_page_og);
+ if (table_offset != offset)
+ {
+ warnings.push_back("first page object offset mismatch");
+ }
+
+ for (unsigned int pageno = 0; pageno < npages; ++pageno)
+ {
+ ObjGen page_og(pages[pageno].getObjectID(),
+ pages[pageno].getGeneration());
+ int first_object = page_og.obj;
+ assert(this->xref_table.count(page_og) > 0);
+ offset = getLinearizationOffset(page_og);
+
+ HPageOffsetEntry& he = this->page_offset_hints.entries[pageno];
+ CHPageOffsetEntry& ce = this->c_page_offset_data.entries[pageno];
+ int h_nobjects = he.delta_nobjects +
+ this->page_offset_hints.min_nobjects;
+ if (h_nobjects != ce.nobjects)
+ {
+ // This happens with pdlin when there are thumbnails.
+ warnings.push_back(
+ "object count mismatch for page " +
+ QUtil::int_to_string(pageno) + ": hint table = " +
+ QUtil::int_to_string(h_nobjects) + "; computed = " +
+ QUtil::int_to_string(ce.nobjects));
+ }
+
+ // Use value for number of objects in hint table rather than
+ // computed value if there is a discrepancy.
+ int length = lengthNextN(first_object, h_nobjects, errors);
+ int h_length = he.delta_page_length +
+ this->page_offset_hints.min_page_length;
+ if (length != h_length)
+ {
+ // This condition almost certainly indicates a bad hint
+ // table or a bug in this code.
+ errors.push_back(
+ "page length mismatch for page " +
+ QUtil::int_to_string(pageno) + ": hint table = " +
+ QUtil::int_to_string(h_length) + "; computed length = " +
+ QUtil::int_to_string(length) + " (offset = " +
+ QUtil::int_to_string(offset) + ")");
+ }
+
+ offset += h_length;
+
+ // Translate shared object indexes to object numbers.
+ std::set<int> hint_shared;
+ std::set<int> computed_shared;
+
+ if ((pageno == 0) && (he.nshared_objects > 0))
+ {
+ // pdlin and Acrobat both do this even though the spec
+ // states clearly and unambiguously that they should not.
+ warnings.push_back("page 0 has shared identifier entries");
+ }
+
+ for (int i = 0; i < he.nshared_objects; ++i)
+ {
+ int idx = he.shared_identifiers[i];
+ assert(shared_idx_to_obj.count(idx) > 0);
+ hint_shared.insert(shared_idx_to_obj[idx]);
+ }
+
+ for (int i = 0; i < ce.nshared_objects; ++i)
+ {
+ int idx = ce.shared_identifiers[i];
+ assert(idx < this->c_shared_object_data.nshared_total);
+ int obj = this->c_shared_object_data.entries[idx].object;
+ computed_shared.insert(obj);
+ }
+
+ for (std::set<int>::iterator iter = hint_shared.begin();
+ iter != hint_shared.end(); ++iter)
+ {
+ if (! computed_shared.count(*iter))
+ {
+ // pdlin puts thumbnails here even though it shouldn't
+ warnings.push_back(
+ "page " + QUtil::int_to_string(pageno) +
+ ": shared object " + QUtil::int_to_string(*iter) +
+ ": in hint table but not computed list");
+ }
+ }
+
+ for (std::set<int>::iterator iter = computed_shared.begin();
+ iter != computed_shared.end(); ++iter)
+ {
+ if (! hint_shared.count(*iter))
+ {
+ // Acrobat does not put some things including at least
+ // built-in fonts and procsets here, at least in some
+ // cases.
+ warnings.push_back(
+ "page " + QUtil::int_to_string(pageno) +
+ ": shared object " + QUtil::int_to_string(*iter) +
+ ": in computed list but not hint table");
+ }
+ }
+ }
+}
+
+void
+QPDF::checkHSharedObject(std::list<std::string>& errors,
+ std::list<std::string>& warnings,
+ std::vector<QPDFObjectHandle> const& pages,
+ std::map<int, int>& idx_to_obj)
+{
+ // Implementation note 125 says shared object groups always
+ // contain only one object. Implementation note 128 says that
+ // Acrobat always nbits_nobjects to zero. Implementation note 130
+ // says that Acrobat does not support more than one shared object
+ // per group. These are all consistent.
+
+ // Implementation note 129 states that MD5 signatures are not
+ // implemented in Acrobat, so signature_present must always be
+ // zero.
+
+ // Implementation note 131 states that first_shared_obj and
+ // first_shared_offset have meaningless values for single-page
+ // files.
+
+ // Empirically, Acrobat and pdlin generate incorrect values for
+ // these whenever there are no shared objects not referenced by
+ // the first page (i.e., nshared_total == nshared_first_page).
+
+ HSharedObject& so = this->shared_object_hints;
+ if (so.nshared_total < so.nshared_first_page)
+ {
+ errors.push_back("shared object hint table: ntotal < nfirst_page");
+ }
+ else
+ {
+ // The first nshared_first_page objects are consecutive
+ // objects starting with the first page object. The rest are
+ // consecutive starting from the first_shared_obj object.
+ int cur_object = pages[0].getObjectID();
+ for (int i = 0; i < so.nshared_total; ++i)
+ {
+ if (i == so.nshared_first_page)
+ {
+ QTC::TC("qpdf", "QPDF lin check shared past first page");
+ if (this->part8.empty())
+ {
+ errors.push_back(
+ "part 8 is empty but nshared_total > "
+ "nshared_first_page");
+ }
+ else
+ {
+ int obj = this->part8[0].getObjectID();
+ if (obj != so.first_shared_obj)
+ {
+ errors.push_back(
+ "first shared object number mismatch: "
+ "hint table = " +
+ QUtil::int_to_string(so.first_shared_obj) +
+ "; computed = " +
+ QUtil::int_to_string(obj));
+ }
+ }
+
+ cur_object = so.first_shared_obj;
+
+ ObjGen og(cur_object, 0);
+ assert(this->xref_table.count(og) > 0);
+ int offset = getLinearizationOffset(og);
+ int h_offset = adjusted_offset(so.first_shared_offset);
+ if (offset != h_offset)
+ {
+ errors.push_back(
+ "first shared object offset mismatch: hint table = " +
+ QUtil::int_to_string(h_offset) + "; computed = " +
+ QUtil::int_to_string(offset));
+ }
+ }
+
+ idx_to_obj[i] = cur_object;
+ HSharedObjectEntry& se = so.entries[i];
+ int nobjects = se.nobjects_minus_one + 1;
+ int length = lengthNextN(cur_object, nobjects, errors);
+ int h_length = so.min_group_length + se.delta_group_length;
+ if (length != h_length)
+ {
+ errors.push_back(
+ "shared object " + QUtil::int_to_string(i) +
+ " length mismatch: hint table = " +
+ QUtil::int_to_string(h_length) + "; computed = " +
+ QUtil::int_to_string(length));
+ }
+ cur_object += nobjects;
+ }
+ }
+}
+
+void
+QPDF::checkHOutlines(std::list<std::string>& warnings)
+{
+ // Empirically, Acrobat generates the correct value for the object
+ // number but incorrectly stores the next object number's offset
+ // as the offset, at least when outlines appear in part 6. It
+ // also generates an incorrect value for length (specifically, the
+ // length that would cover the correct number of objects from the
+ // wrong starting place). pdlin appears to generate correct
+ // values in those cases.
+
+ if (this->c_outline_data.nobjects == this->outline_hints.nobjects)
+ {
+ if (this->c_outline_data.nobjects == 0)
+ {
+ return;
+ }
+
+ if (this->c_outline_data.first_object ==
+ this->outline_hints.first_object)
+ {
+ // Check length and offset. Acrobat gets these wrong.
+ QPDFObjectHandle outlines = getRoot().getKey("/Outlines");
+ ObjGen og(outlines.getObjectID(), outlines.getGeneration());
+ assert(this->xref_table.count(og) > 0);
+ int offset = getLinearizationOffset(og);
+ ObjUser ou(ObjUser::ou_root_key, "/Outlines");
+ int length = maxEnd(ou) - offset;
+ int table_offset =
+ adjusted_offset(this->outline_hints.first_object_offset);
+ if (offset != table_offset)
+ {
+ warnings.push_back(
+ "incorrect offset in outlines table: hint table = " +
+ QUtil::int_to_string(table_offset) +
+ "; computed = " + QUtil::int_to_string(offset));
+ }
+ int table_length = this->outline_hints.group_length;
+ if (length != table_length)
+ {
+ warnings.push_back(
+ "incorrect length in outlines table: hint table = " +
+ QUtil::int_to_string(table_length) +
+ "; computed = " + QUtil::int_to_string(length));
+ }
+ }
+ else
+ {
+ warnings.push_back("incorrect first object number in outline "
+ "hints table.");
+ }
+ }
+ else
+ {
+ warnings.push_back("incorrect object count in outline hint table");
+ }
+}
+
+void
+QPDF::showLinearizationData()
+{
+ try
+ {
+ readLinearizationData();
+ checkLinearizationInternal();
+ dumpLinearizationDataInternal();
+ }
+ catch (QPDFExc& e)
+ {
+ std::cout << e.what() << std::endl;
+ }
+}
+
+void
+QPDF::dumpLinearizationDataInternal()
+{
+ std::cout << this->file.getName() << ": linearization data:" << std::endl
+ << std::endl;
+
+ std::cout
+ << "file_size: " << this->linp.file_size << std::endl
+ << "first_page_object: " << this->linp.first_page_object << std::endl
+ << "first_page_end: " << this->linp.first_page_end << std::endl
+ << "npages: " << this->linp.npages << std::endl
+ << "xref_zero_offset: " << this->linp.xref_zero_offset << std::endl
+ << "first_page: " << this->linp.first_page << std::endl
+ << "H_offset: " << this->linp.H_offset << std::endl
+ << "H_length: " << this->linp.H_length << std::endl
+ << std::endl;
+
+ std::cout << "Page Offsets Hint Table" << std::endl
+ << std::endl;
+ dumpHPageOffset();
+ std::cout << std::endl
+ << "Shared Objects Hint Table" << std::endl
+ << std::endl;
+ dumpHSharedObject();
+
+ if (this->outline_hints.nobjects > 0)
+ {
+ std::cout << std::endl
+ << "Outlines Hint Table" << std::endl
+ << std::endl;
+ dumpHGeneric(this->outline_hints);
+ }
+}
+
+int
+QPDF::adjusted_offset(int offset)
+{
+ // All offsets >= H_offset have to be increased by H_length
+ // since all hint table location values disregard the hint table
+ // itself.
+ if (offset >= this->linp.H_offset)
+ {
+ return offset + this->linp.H_length;
+ }
+ return offset;
+}
+
+
+void
+QPDF::dumpHPageOffset()
+{
+ HPageOffset& t = this->page_offset_hints;
+ std::cout
+ << "min_nobjects: " << t.min_nobjects
+ << std::endl
+ << "first_page_offset: " << adjusted_offset(t.first_page_offset)
+ << std::endl
+ << "nbits_delta_nobjects: " << t.nbits_delta_nobjects
+ << std::endl
+ << "min_page_length: " << t.min_page_length
+ << std::endl
+ << "nbits_delta_page_length: " << t.nbits_delta_page_length
+ << std::endl
+ << "min_content_offset: " << t.min_content_offset
+ << std::endl
+ << "nbits_delta_content_offset: " << t.nbits_delta_content_offset
+ << std::endl
+ << "min_content_length: " << t.min_content_length
+ << std::endl
+ << "nbits_delta_content_length: " << t.nbits_delta_content_length
+ << std::endl
+ << "nbits_nshared_objects: " << t.nbits_nshared_objects
+ << std::endl
+ << "nbits_shared_identifier: " << t.nbits_shared_identifier
+ << std::endl
+ << "nbits_shared_numerator: " << t.nbits_shared_numerator
+ << std::endl
+ << "shared_denominator: " << t.shared_denominator
+ << std::endl;
+
+ for (int i1 = 0; i1 < this->linp.npages; ++i1)
+ {
+ HPageOffsetEntry& pe = t.entries[i1];
+ std::cout
+ << "Page " << i1 << ":" << std::endl
+ << " nobjects: " << pe.delta_nobjects + t.min_nobjects
+ << std::endl
+ << " length: " << pe.delta_page_length + t.min_page_length
+ << std::endl
+ // content offset is relative to page, not file
+ << " content_offset: "
+ << pe.delta_content_offset + t.min_content_offset << std::endl
+ << " content_length: "
+ << pe.delta_content_length + t.min_content_length << std::endl
+ << " nshared_objects: " << pe.nshared_objects << std::endl;
+ for (int i2 = 0; i2 < pe.nshared_objects; ++i2)
+ {
+ std::cout << " identifier " << i2 << ": "
+ << pe.shared_identifiers[i2] << std::endl;
+ std::cout << " numerator " << i2 << ": "
+ << pe.shared_numerators[i2] << std::endl;
+ }
+ }
+}
+
+void
+QPDF::dumpHSharedObject()
+{
+ HSharedObject& t = this->shared_object_hints;
+ std::cout
+ << "first_shared_obj: " << t.first_shared_obj
+ << std::endl
+ << "first_shared_offset: " << adjusted_offset(t.first_shared_offset)
+ << std::endl
+ << "nshared_first_page: " << t.nshared_first_page
+ << std::endl
+ << "nshared_total: " << t.nshared_total
+ << std::endl
+ << "nbits_nobjects: " << t.nbits_nobjects
+ << std::endl
+ << "min_group_length: " << t.min_group_length
+ << std::endl
+ << "nbits_delta_group_length: " << t.nbits_delta_group_length
+ << std::endl;
+
+ for (int i = 0; i < t.nshared_total; ++i)
+ {
+ HSharedObjectEntry& se = t.entries[i];
+ std::cout << "Shared Object " << i << ":" << std::endl;
+ std::cout << " group length: "
+ << se.delta_group_length + t.min_group_length << std::endl;
+ // PDF spec says signature present nobjects_minus_one are
+ // always 0, so print them only if they have a non-zero value.
+ if (se.signature_present)
+ {
+ std::cout << " signature present" << std::endl;
+ }
+ if (se.nobjects_minus_one != 0)
+ {
+ std::cout << " nobjects: "
+ << se.nobjects_minus_one + 1 << std::endl;
+ }
+ }
+}
+
+void
+QPDF::dumpHGeneric(HGeneric& t)
+{
+ std::cout
+ << "first_object: " << t.first_object
+ << std::endl
+ << "first_object_offset: " << adjusted_offset(t.first_object_offset)
+ << std::endl
+ << "nobjects: " << t.nobjects
+ << std::endl
+ << "group_length: " << t.group_length
+ << std::endl;
+}
+
+QPDFObjectHandle
+QPDF::objGenToIndirect(ObjGen const& og)
+{
+ return getObjectByID(og.obj, og.gen);
+}
+
+void
+QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
+{
+ // This function calculates the ordering of objects, divides them
+ // into the appropriate parts, and computes some values for the
+ // linearization parameter dictionary and hint tables. The file
+ // must be optimized (via calling optimize()) prior to calling
+ // this function. Note that actual offsets and lengths are not
+ // computed here, but anything related to object ordering is.
+
+ if (this->object_to_obj_users.empty())
+ {
+ // Note that we can't call optimize here because we don't know
+ // whether it should be called with or without allow changes.
+ throw QEXC::Internal("QPDF::calculateLinearizationData "
+ "called before optimize()");
+ }
+
+ // Separate objects into the categories sufficient for us to
+ // determine which part of the linearized file should contain the
+ // object. This categorization is useful for other purposes as
+ // well. Part numbers refer to version 1.4 of the PDF spec.
+
+ // Parts 1, 3, 5, 10, and 11 don't contain any objects from the
+ // original file (except the trailer dictionary in part 11).
+
+ // Part 4 is the document catalog (root) and the following root
+ // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction,
+ // /AcroForm, /Encrypt. Note that Thread information dictionaries
+ // are supposed to appear in part 9, but we are disregarding that
+ // recommendation for now.
+
+ // Part 6 is the first page section. It includes all remaining
+ // objects referenced by the first page including shared objects
+ // but not including thumbnails. Additionally, if /PageMode is
+ // /Outlines, then information from /Outlines also appears here.
+
+ // Part 7 contains remaining objects private to pages other than
+ // the first page.
+
+ // Part 8 contains all remaining shared objects except those that
+ // are shared only within thumbnails.
+
+ // Part 9 contains all remaining objects.
+
+ // We sort objects into the following categories:
+
+ // * open_document: part 4
+
+ // * first_page_private: part 6
+
+ // * first_page_shared: part 6
+
+ // * other_page_private: part 7
+
+ // * other_page_shared: part 8
+
+ // * thumbnail_private: part 9
+
+ // * thumbnail_shared: part 9
+
+ // * other: part 9
+
+ // * outlines: part 6 or 9
+
+ QPDFObjectHandle root = getRoot();
+ bool outlines_in_first_page = false;
+ QPDFObjectHandle pagemode = root.getKey("/PageMode");
+ QTC::TC("qpdf", "QPDF categorize pagemode present",
+ pagemode.isName() ? 1 : 0);
+ if (pagemode.isName())
+ {
+ if (pagemode.getName() == "/UseOutlines")
+ {
+ if (root.hasKey("/Outlines"))
+ {
+ outlines_in_first_page = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF UseOutlines but no Outlines");
+ }
+ }
+ QTC::TC("qpdf", "QPDF categorize pagemode outlines",
+ outlines_in_first_page ? 1 : 0);
+ }
+
+ std::set<std::string> open_document_keys;
+ open_document_keys.insert("/ViewerPreferences");
+ open_document_keys.insert("/PageMode");
+ open_document_keys.insert("/Threads");
+ open_document_keys.insert("/OpenAction");
+ open_document_keys.insert("/AcroForm");
+
+ std::set<ObjGen> lc_open_document;
+ std::set<ObjGen> lc_first_page_private;
+ std::set<ObjGen> lc_first_page_shared;
+ std::set<ObjGen> lc_other_page_private;
+ std::set<ObjGen> lc_other_page_shared;
+ std::set<ObjGen> lc_thumbnail_private;
+ std::set<ObjGen> lc_thumbnail_shared;
+ std::set<ObjGen> lc_other;
+ std::set<ObjGen> lc_outlines;
+ std::set<ObjGen> lc_root;
+
+ for (std::map<ObjGen, std::set<ObjUser> >::iterator oiter =
+ this->object_to_obj_users.begin();
+ oiter != this->object_to_obj_users.end(); ++oiter)
+ {
+ ObjGen const& og = (*oiter).first;
+
+ std::set<ObjUser>& ous = (*oiter).second;
+
+ bool in_open_document = false;
+ bool in_first_page = false;
+ int other_pages = 0;
+ int thumbs = 0;
+ int others = 0;
+ bool in_outlines = false;
+ bool is_root = false;
+
+ for (std::set<ObjUser>::iterator uiter = ous.begin();
+ uiter != ous.end(); ++uiter)
+ {
+ ObjUser const& ou = *uiter;
+ switch (ou.ou_type)
+ {
+ case ObjUser::ou_trailer_key:
+ if (ou.key == "/Encrypt")
+ {
+ in_open_document = true;
+ }
+ else
+ {
+ ++others;
+ }
+ break;
+
+ case ObjUser::ou_thumb:
+ ++thumbs;
+ break;
+
+ case ObjUser::ou_root_key:
+ if (open_document_keys.count(ou.key) > 0)
+ {
+ in_open_document = true;
+ }
+ else if (ou.key == "/Outlines")
+ {
+ in_outlines = true;
+ }
+ else
+ {
+ ++others;
+ }
+ break;
+
+ case ObjUser::ou_page:
+ if (ou.pageno == 0)
+ {
+ in_first_page = true;
+ }
+ else
+ {
+ ++other_pages;
+ }
+ break;
+
+ case ObjUser::ou_root:
+ is_root = true;
+ break;
+
+ case ObjUser::ou_bad:
+ throw QEXC::Internal("QPDF::calculateLinearizationData: "
+ "invalid user type");
+ break;
+ }
+ }
+
+ if (is_root)
+ {
+ lc_root.insert(og);
+ }
+ else if (in_outlines)
+ {
+ lc_outlines.insert(og);
+ }
+ else if (in_open_document)
+ {
+ lc_open_document.insert(og);
+ }
+ else if ((in_first_page) &&
+ (others == 0) && (other_pages == 0) && (thumbs == 0))
+ {
+ lc_first_page_private.insert(og);
+ }
+ else if (in_first_page)
+ {
+ lc_first_page_shared.insert(og);
+ }
+ else if ((other_pages == 1) && (others == 0) && (thumbs == 0))
+ {
+ lc_other_page_private.insert(og);
+ }
+ else if (other_pages > 1)
+ {
+ lc_other_page_shared.insert(og);
+ }
+ else if ((thumbs == 1) && (others == 0))
+ {
+ lc_thumbnail_private.insert(og);
+ }
+ else if (thumbs > 1)
+ {
+ lc_thumbnail_shared.insert(og);
+ }
+ else
+ {
+ lc_other.insert(og);
+ }
+ }
+
+ // Generate ordering for objects in the output file. Sometimes we
+ // just dump right from a set into a vector. Rather than
+ // optimizing this by going straight into the vector, we'll leave
+ // these phases separate for now. That way, this section can be
+ // concerned only with ordering, and the above section can be
+ // considered only with categorization. Note that sets of ObjGens
+ // are sorted by ObjGen. In a linearized file, objects appear in
+ // sequence with the possible exception of hints tables which we
+ // won't see here anyway. That means that running
+ // calculateLinearizationData() on a linearized file should give
+ // results identical to the original file ordering.
+
+ // We seem to traverse the page tree a lot in this code, but we
+ // can address this for a future code optimization if necessary.
+ // Premature optimization is the root of all evil.
+ std::vector<QPDFObjectHandle> pages;
+ { // local scope
+ // Map all page objects to the containing object stream. This
+ // should be a no-op in a properly linearized file.
+ std::vector<QPDFObjectHandle> t = getAllPages();
+ for (std::vector<QPDFObjectHandle>::iterator iter = t.begin();
+ iter != t.end(); ++iter)
+ {
+ pages.push_back(getUncompressedObject(*iter, object_stream_data));
+ }
+ }
+ unsigned int npages = pages.size();
+
+ // We will be initializing some values of the computed hint
+ // tables. Specifically, we can initialize any items that deal
+ // with object numbers or counts but not any items that deal with
+ // lengths or offsets. The code that writes linearized files will
+ // have to fill in these values during the first pass. The
+ // validation code can compute them relatively easily given the
+ // rest of the information.
+
+ this->c_linp.npages = npages;
+ this->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(npages);
+
+ // Part 4: open document objects. We don't care about the order.
+
+ assert(lc_root.size() == 1);
+ this->part4.push_back(objGenToIndirect(*(lc_root.begin())));
+ for (std::set<ObjGen>::iterator iter = lc_open_document.begin();
+ iter != lc_open_document.end(); ++iter)
+ {
+ this->part4.push_back(objGenToIndirect(*iter));
+ }
+
+ // Part 6: first page objects. Note: implementation note 124
+ // states that Acrobat always treats page 0 as the first page for
+ // linearization regardless of /OpenAction. pdlin doesn't provide
+ // any option to set this and also disregards /OpenAction. We
+ // will do the same.
+
+ // First, place the actual first page object itself.
+ ObjGen first_page_og(pages[0].getObjectID(), pages[0].getGeneration());
+ if (! lc_first_page_private.count(first_page_og))
+ {
+ throw QEXC::Internal("QPDF::calculateLinearizationData: first page "
+ "object not in lc_first_page_private");
+ }
+ lc_first_page_private.erase(first_page_og);
+ this->c_linp.first_page_object = pages[0].getObjectID();
+ this->part6.push_back(pages[0]);
+
+ // The PDF spec "recommends" an order for the rest of the objects,
+ // but we are going to disregard it except to the extent that it
+ // groups private and shared objects contiguously for the sake of
+ // hint tables.
+
+ for (std::set<ObjGen>::iterator iter = lc_first_page_private.begin();
+ iter != lc_first_page_private.end(); ++iter)
+ {
+ this->part6.push_back(objGenToIndirect(*iter));
+ }
+
+ for (std::set<ObjGen>::iterator iter = lc_first_page_shared.begin();
+ iter != lc_first_page_shared.end(); ++iter)
+ {
+ this->part6.push_back(objGenToIndirect(*iter));
+ }
+
+ // Place the outline dictionary if it goes in the first page section.
+ if (outlines_in_first_page)
+ {
+ pushOutlinesToPart(this->part6, lc_outlines, object_stream_data);
+ }
+
+ // Fill in page offset hint table information for the first page.
+ // The PDF spec says that nshared_objects should be zero for the
+ // first page. pdlin does not appear to obey this, but it fills
+ // in garbage values for all the shared object identifiers on the
+ // first page.
+
+ this->c_page_offset_data.entries[0].nobjects = this->part6.size();
+
+ // Part 7: other pages' private objects
+
+ // For each page in order:
+ for (unsigned int i = 1; i < npages; ++i)
+ {
+ // Place this page's page object
+
+ ObjGen page_og(pages[i].getObjectID(), pages[i].getGeneration());
+ if (! lc_other_page_private.count(page_og))
+ {
+ throw QEXC::Internal(
+ "QPDF::calculateLinearizationData: page object for page " +
+ QUtil::int_to_string(i) + " not in lc_other_page_private");
+ }
+ lc_other_page_private.erase(page_og);
+ this->part7.push_back(pages[i]);
+
+ // Place all non-shared objects referenced by this page,
+ // updating the page object count for the hint table.
+
+ this->c_page_offset_data.entries[i].nobjects = 1;
+
+ ObjUser ou(ObjUser::ou_page, i);
+ assert(this->obj_user_to_objects.count(ou) > 0);
+ std::set<ObjGen> ogs = this->obj_user_to_objects[ou];
+ for (std::set<ObjGen>::iterator iter = ogs.begin();
+ iter != ogs.end(); ++iter)
+ {
+ ObjGen const& og = (*iter);
+ if (lc_other_page_private.count(og))
+ {
+ lc_other_page_private.erase(og);
+ this->part7.push_back(objGenToIndirect(og));
+ ++this->c_page_offset_data.entries[i].nobjects;
+ }
+ }
+ }
+ // That should have covered all part7 objects.
+ if (! lc_other_page_private.empty())
+ {
+ throw QEXC::Internal(
+ "QPDF::calculateLinearizationData: lc_other_page_private is "
+ "not empty after generation of part7");
+ }
+
+ // Part 8: other pages' shared objects
+
+ // Order is unimportant.
+ for (std::set<ObjGen>::iterator iter = lc_other_page_shared.begin();
+ iter != lc_other_page_shared.end(); ++iter)
+ {
+ this->part8.push_back(objGenToIndirect(*iter));
+ }
+
+ // Part 9: other objects
+
+ // The PDF specification makes recommendations on ordering here.
+ // We follow them only to a limited extent. Specifically, we put
+ // the pages tree first, then private thumbnail objects in page
+ // order, then shared thumbnail objects, and then outlines (unless
+ // in part 6). After that, we throw all remaining objects in
+ // arbitrary order.
+
+ // Place the pages tree.
+ std::set<ObjGen> pages_ogs =
+ this->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")];
+ assert(! pages_ogs.empty());
+ for (std::set<ObjGen>::iterator iter = pages_ogs.begin();
+ iter != pages_ogs.end(); ++iter)
+ {
+ ObjGen const& og = *iter;
+ if (lc_other.count(og))
+ {
+ lc_other.erase(og);
+ this->part9.push_back(objGenToIndirect(og));
+ }
+ }
+
+ // Place private thumbnail images in page order. Slightly more
+ // information would be required if we were going to bother with
+ // thumbnail hint tables.
+ for (unsigned int i = 0; i < npages; ++i)
+ {
+ QPDFObjectHandle thumb = pages[i].getKey("/Thumb");
+ thumb = getUncompressedObject(thumb, object_stream_data);
+ if (! thumb.isNull())
+ {
+ // Output the thumbnail itself
+ ObjGen thumb_og(thumb.getObjectID(), thumb.getGeneration());
+ if (lc_thumbnail_private.count(thumb_og))
+ {
+ lc_thumbnail_private.erase(thumb_og);
+ this->part9.push_back(thumb);
+ }
+ else
+ {
+ // No internal error this time...there's nothing to
+ // stop this object from having been referred to
+ // somewhere else outside of a page's /Thumb, and if
+ // it had been, there's nothing to prevent it from
+ // having been in some set other than
+ // lc_thumbnail_private.
+ }
+ std::set<ObjGen>& ogs =
+ this->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, i)];
+ for (std::set<ObjGen>::iterator iter = ogs.begin();
+ iter != ogs.end(); ++iter)
+ {
+ ObjGen const& og = *iter;
+ if (lc_thumbnail_private.count(og))
+ {
+ lc_thumbnail_private.erase(og);
+ this->part9.push_back(objGenToIndirect(og));
+ }
+ }
+ }
+ }
+ if (! lc_thumbnail_private.empty())
+ {
+ throw QEXC::Internal(
+ "QPDF::calculateLinearizationData: lc_thumbnail_private "
+ "not empty after placing thumbnails");
+ }
+
+ // Place shared thumbnail objects
+ for (std::set<ObjGen>::iterator iter = lc_thumbnail_shared.begin();
+ iter != lc_thumbnail_shared.end(); ++iter)
+ {
+ this->part9.push_back(objGenToIndirect(*iter));
+ }
+
+ // Place outlines unless in first page
+ if (! outlines_in_first_page)
+ {
+ pushOutlinesToPart(this->part9, lc_outlines, object_stream_data);
+ }
+
+ // Place all remaining objects
+ for (std::set<ObjGen>::iterator iter = lc_other.begin();
+ iter != lc_other.end(); ++iter)
+ {
+ this->part9.push_back(objGenToIndirect(*iter));
+ }
+
+ // Make sure we got everything exactly once.
+
+ unsigned int num_placed = this->part4.size() + this->part6.size() +
+ this->part7.size() + this->part8.size() + this->part9.size();
+ unsigned int num_wanted = this->object_to_obj_users.size();
+ if (num_placed != num_wanted)
+ {
+ throw QEXC::Internal("QPDF::calculateLinearizationData: wrong "
+ "number of objects placed (num_placed = " +
+ QUtil::int_to_string(num_placed) +
+ "; number of objects: " +
+ QUtil::int_to_string(num_wanted));
+ }
+
+ // Calculate shared object hint table information including
+ // references to shared objects from page offset hint data.
+
+ // The shared object hint table consists of all part 6 (whether
+ // shared or not) in order followed by all part 8 objects in
+ // order. Add the objects to shared object data keeping a map of
+ // object number to index. Then populate the shared object
+ // information for the pages.
+
+ // Note that two objects never have the same object number, so we
+ // can map from object number only without regards to generation.
+ std::map<int, int> obj_to_index;
+
+ this->c_shared_object_data.nshared_first_page = this->part6.size();
+ this->c_shared_object_data.nshared_total =
+ this->c_shared_object_data.nshared_first_page +
+ this->part8.size();
+
+ std::vector<CHSharedObjectEntry>& shared =
+ this->c_shared_object_data.entries;
+ for (std::vector<QPDFObjectHandle>::iterator iter = this->part6.begin();
+ iter != this->part6.end(); ++iter)
+ {
+ QPDFObjectHandle& oh = *iter;
+ int obj = oh.getObjectID();
+ obj_to_index[obj] = shared.size();
+ shared.push_back(CHSharedObjectEntry(obj));
+ }
+ QTC::TC("qpdf", "QPDF lin part 8 empty", this->part8.empty() ? 1 : 0);
+ if (! this->part8.empty())
+ {
+ this->c_shared_object_data.first_shared_obj =
+ this->part8[0].getObjectID();
+ for (std::vector<QPDFObjectHandle>::iterator iter =
+ this->part8.begin();
+ iter != this->part8.end(); ++iter)
+ {
+ QPDFObjectHandle& oh = *iter;
+ int obj = oh.getObjectID();
+ obj_to_index[obj] = shared.size();
+ shared.push_back(CHSharedObjectEntry(obj));
+ }
+ }
+ assert(this->c_shared_object_data.nshared_total ==
+ (int) this->c_shared_object_data.entries.size());
+
+ // Now compute the list of shared objects for each page after the
+ // first page.
+
+ for (unsigned int i = 1; i < npages; ++i)
+ {
+ CHPageOffsetEntry& pe = this->c_page_offset_data.entries[i];
+ ObjUser ou(ObjUser::ou_page, i);
+ assert(this->obj_user_to_objects.count(ou) > 0);
+ std::set<ObjGen> const& ogs = this->obj_user_to_objects[ou];
+ for (std::set<ObjGen>::const_iterator iter = ogs.begin();
+ iter != ogs.end(); ++iter)
+ {
+ ObjGen const& og = *iter;
+ if ((this->object_to_obj_users[og].size() > 1) &&
+ (obj_to_index.count(og.obj) > 0))
+ {
+ int idx = obj_to_index[og.obj];
+ ++pe.nshared_objects;
+ pe.shared_identifiers.push_back(idx);
+ }
+ }
+ }
+}
+
+void
+QPDF::pushOutlinesToPart(
+ std::vector<QPDFObjectHandle>& part,
+ std::set<ObjGen>& lc_outlines,
+ std::map<int, int> const& object_stream_data)
+{
+ QPDFObjectHandle root = getRoot();
+ QPDFObjectHandle outlines = root.getKey("/Outlines");
+ if (outlines.isNull())
+ {
+ return;
+ }
+ outlines = getUncompressedObject(outlines, object_stream_data);
+ ObjGen outlines_og(outlines.getObjectID(), outlines.getGeneration());
+ QTC::TC("qpdf", "QPDF lin outlines in part",
+ ((&part == (&this->part6)) ? 0
+ : (&part == (&this->part9)) ? 1
+ : 9999)); // can't happen
+ this->c_outline_data.first_object = outlines_og.obj;
+ this->c_outline_data.nobjects = 1;
+ lc_outlines.erase(outlines_og);
+ part.push_back(outlines);
+ for (std::set<ObjGen>::iterator iter = lc_outlines.begin();
+ iter != lc_outlines.end(); ++iter)
+ {
+ part.push_back(objGenToIndirect(*iter));
+ ++this->c_outline_data.nobjects;
+ }
+}
+
+void
+QPDF::getLinearizedParts(
+ std::map<int, int> const& object_stream_data,
+ std::vector<QPDFObjectHandle>& part4,
+ std::vector<QPDFObjectHandle>& part6,
+ std::vector<QPDFObjectHandle>& part7,
+ std::vector<QPDFObjectHandle>& part8,
+ std::vector<QPDFObjectHandle>& part9)
+{
+ calculateLinearizationData(object_stream_data);
+ part4 = this->part4;
+ part6 = this->part6;
+ part7 = this->part7;
+ part8 = this->part8;
+ part9 = this->part9;
+}
+
+static inline int nbits(int val)
+{
+ return (val == 0 ? 0 : (1 + nbits(val >> 1)));
+}
+
+int
+QPDF::outputLengthNextN(
+ int in_object, int n,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber)
+{
+ // Figure out the length of a series of n consecutive objects in
+ // the output file starting with whatever object in_object from
+ // the input file mapped to.
+
+ assert(obj_renumber.count(in_object) > 0);
+ int first = (*(obj_renumber.find(in_object))).second;
+ int length = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ assert(lengths.count(first + i) > 0);
+ length += (*(lengths.find(first + i))).second;
+ }
+ return length;
+}
+
+void
+QPDF::calculateHPageOffset(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber)
+{
+ // Page Offset Hint Table
+
+ // We are purposely leaving some values set to their initial zero
+ // values.
+
+ std::vector<QPDFObjectHandle> const& pages = getAllPages();
+ unsigned int npages = pages.size();
+ CHPageOffset& cph = this->c_page_offset_data;
+ std::vector<CHPageOffsetEntry>& cphe = cph.entries;
+
+ // Calculate minimum and maximum values for number of objects per
+ // page and page length.
+
+ int min_nobjects = cphe[0].nobjects;
+ int max_nobjects = min_nobjects;
+ int min_length = outputLengthNextN(
+ pages[0].getObjectID(), min_nobjects, lengths, obj_renumber);
+ int max_length = min_length;
+ int max_shared = cphe[0].nshared_objects;
+
+ HPageOffset& ph = this->page_offset_hints;
+ std::vector<HPageOffsetEntry>& phe = ph.entries;
+ phe = std::vector<HPageOffsetEntry>(npages);
+
+ for (unsigned int i = 0; i < npages; ++i)
+ {
+ // Calculate values for each page, assigning full values to
+ // the delta items. They will be adjusted later.
+
+ // Repeat calculations for page 0 so we can assign to phe[i]
+ // without duplicating those assignments.
+
+ int nobjects = cphe[i].nobjects;
+ int length = outputLengthNextN(
+ pages[i].getObjectID(), nobjects, lengths, obj_renumber);
+ int nshared = cphe[i].nshared_objects;
+
+ min_nobjects = std::min(min_nobjects, nobjects);
+ max_nobjects = std::max(max_nobjects, nobjects);
+ min_length = std::min(min_length, length);
+ max_length = std::max(max_length, length);
+ max_shared = std::max(max_shared, nshared);
+
+ phe[i].delta_nobjects = nobjects;
+ phe[i].delta_page_length = length;
+ phe[i].nshared_objects = nshared;
+ }
+
+ ph.min_nobjects = min_nobjects;
+ int in_page0_id = pages[0].getObjectID();
+ int out_page0_id = (*(obj_renumber.find(in_page0_id))).second;
+ ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset();
+ ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects);
+ ph.min_page_length = min_length;
+ ph.nbits_delta_page_length = nbits(max_length - min_length);
+ ph.nbits_nshared_objects = nbits(max_shared);
+ ph.nbits_shared_identifier =
+ nbits(this->c_shared_object_data.nshared_total);
+ ph.shared_denominator = 4; // doesn't matter
+
+ // It isn't clear how to compute content offset and content
+ // length. Since we are not interleaving page objects with the
+ // content stream, we'll use the same values for content length as
+ // page length. We will use 0 as content offset because this is
+ // what Adobe does (implementation note 127) and pdlin as well.
+ ph.nbits_delta_content_length = ph.nbits_delta_page_length;
+ ph.min_content_length = ph.min_page_length;
+
+ for (unsigned int i = 0; i < npages; ++i)
+ {
+ // Adjust delta entries
+ assert(phe[i].delta_nobjects >= min_nobjects);
+ assert(phe[i].delta_page_length >= min_length);
+ phe[i].delta_nobjects -= min_nobjects;
+ phe[i].delta_page_length -= min_length;
+ phe[i].delta_content_length = phe[i].delta_page_length;
+
+ for (int j = 0; j < cphe[i].nshared_objects; ++j)
+ {
+ phe[i].shared_identifiers.push_back(
+ cphe[i].shared_identifiers[j]);
+ phe[i].shared_numerators.push_back(0);
+ }
+ }
+}
+
+void
+QPDF::calculateHSharedObject(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber)
+{
+ CHSharedObject& cso = this->c_shared_object_data;
+ std::vector<CHSharedObjectEntry>& csoe = cso.entries;
+ HSharedObject& so = this->shared_object_hints;
+ std::vector<HSharedObjectEntry>& soe = so.entries;
+ soe = std::vector<HSharedObjectEntry>(cso.nshared_total);
+
+ int min_length = outputLengthNextN(
+ csoe[0].object, 1, lengths, obj_renumber);
+ int max_length = min_length;
+
+ for (int i = 0; i < cso.nshared_total; ++i)
+ {
+ // Assign absolute numbers to deltas; adjust later
+ int length = outputLengthNextN(
+ csoe[i].object, 1, lengths, obj_renumber);
+ min_length = std::min(min_length, length);
+ max_length = std::max(max_length, length);
+ soe[i].delta_group_length = length;
+ }
+
+ so.nshared_total = cso.nshared_total;
+ so.nshared_first_page = cso.nshared_first_page;
+ if (so.nshared_total > so.nshared_first_page)
+ {
+ so.first_shared_obj =
+ (*(obj_renumber.find(cso.first_shared_obj))).second;
+ so.first_shared_offset =
+ (*(xref.find(so.first_shared_obj))).second.getOffset();
+ }
+ so.min_group_length = min_length;
+ so.nbits_delta_group_length = nbits(max_length - min_length);
+
+ for (int i = 0; i < cso.nshared_total; ++i)
+ {
+ // Adjust deltas
+ assert(soe[i].delta_group_length >= min_length);
+ soe[i].delta_group_length -= min_length;
+ }
+}
+
+void
+QPDF::calculateHOutline(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber)
+{
+ HGeneric& cho = this->c_outline_data;
+
+ if (cho.nobjects == 0)
+ {
+ return;
+ }
+
+ HGeneric& ho = this->outline_hints;
+
+ ho.first_object =
+ (*(obj_renumber.find(cho.first_object))).second;
+ ho.first_object_offset =
+ (*(xref.find(ho.first_object))).second.getOffset();
+ ho.nobjects = cho.nobjects;
+ ho.group_length = outputLengthNextN(
+ cho.first_object, ho.nobjects, lengths, obj_renumber);
+}
+
+template <class T>
+static void
+write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec,
+ int bits, int T::*field)
+{
+ // nitems times, write bits bits from the given field of the ith
+ // vector to the given bit writer.
+
+ for (int i = 0; i < nitems; ++i)
+ {
+ w.writeBits(vec[i].*field, bits);
+ }
+ // The PDF spec says that each hint table starts at a byte
+ // boundary. Each "row" actually must start on a byte boundary.
+ w.flush();
+}
+
+template <class T>
+static void
+write_vector_vector(BitWriter& w,
+ int nitems1, std::vector<T>& vec1, int T::*nitems2,
+ int bits, std::vector<int> T::*vec2)
+{
+ // nitems1 times, write nitems2 (from the ith element of vec1) items
+ // from the vec2 vector field of the ith item of vec1.
+ for (int i1 = 0; i1 < nitems1; ++i1)
+ {
+ for (int i2 = 0; i2 < vec1[i1].*nitems2; ++i2)
+ {
+ w.writeBits((vec1[i1].*vec2)[i2], bits);
+ }
+ }
+ w.flush();
+}
+
+
+void
+QPDF::writeHPageOffset(BitWriter& w)
+{
+ HPageOffset& t = this->page_offset_hints;
+
+ w.writeBits(t.min_nobjects, 32); // 1
+ w.writeBits(t.first_page_offset, 32); // 2
+ w.writeBits(t.nbits_delta_nobjects, 16); // 3
+ w.writeBits(t.min_page_length, 32); // 4
+ w.writeBits(t.nbits_delta_page_length, 16); // 5
+ w.writeBits(t.min_content_offset, 32); // 6
+ w.writeBits(t.nbits_delta_content_offset, 16); // 7
+ w.writeBits(t.min_content_length, 32); // 8
+ w.writeBits(t.nbits_delta_content_length, 16); // 9
+ w.writeBits(t.nbits_nshared_objects, 16); // 10
+ w.writeBits(t.nbits_shared_identifier, 16); // 11
+ w.writeBits(t.nbits_shared_numerator, 16); // 12
+ w.writeBits(t.shared_denominator, 16); // 13
+
+ unsigned int nitems = getAllPages().size();
+ std::vector<HPageOffsetEntry>& entries = t.entries;
+
+ write_vector_int(w, nitems, entries,
+ t.nbits_delta_nobjects,
+ &HPageOffsetEntry::delta_nobjects);
+ write_vector_int(w, nitems, entries,
+ t.nbits_delta_page_length,
+ &HPageOffsetEntry::delta_page_length);
+ write_vector_int(w, nitems, entries,
+ t.nbits_nshared_objects,
+ &HPageOffsetEntry::nshared_objects);
+ write_vector_vector(w, nitems, entries,
+ &HPageOffsetEntry::nshared_objects,
+ t.nbits_shared_identifier,
+ &HPageOffsetEntry::shared_identifiers);
+ write_vector_vector(w, nitems, entries,
+ &HPageOffsetEntry::nshared_objects,
+ t.nbits_shared_numerator,
+ &HPageOffsetEntry::shared_numerators);
+ write_vector_int(w, nitems, entries,
+ t.nbits_delta_content_offset,
+ &HPageOffsetEntry::delta_content_offset);
+ write_vector_int(w, nitems, entries,
+ t.nbits_delta_content_length,
+ &HPageOffsetEntry::delta_content_length);
+}
+
+void
+QPDF::writeHSharedObject(BitWriter& w)
+{
+ HSharedObject& t = this->shared_object_hints;
+
+ w.writeBits(t.first_shared_obj, 32); // 1
+ w.writeBits(t.first_shared_offset, 32); // 2
+ w.writeBits(t.nshared_first_page, 32); // 3
+ w.writeBits(t.nshared_total, 32); // 4
+ w.writeBits(t.nbits_nobjects, 16); // 5
+ w.writeBits(t.min_group_length, 32); // 6
+ w.writeBits(t.nbits_delta_group_length, 16); // 7
+
+ QTC::TC("qpdf", "QPDF lin write nshared_total > nshared_first_page",
+ (t.nshared_total > t.nshared_first_page) ? 1 : 0);
+
+ int nitems = t.nshared_total;
+ std::vector<HSharedObjectEntry>& entries = t.entries;
+
+ write_vector_int(w, nitems, entries,
+ t.nbits_delta_group_length,
+ &HSharedObjectEntry::delta_group_length);
+ write_vector_int(w, nitems, entries,
+ 1, &HSharedObjectEntry::signature_present);
+ for (int i = 0; i < nitems; ++i)
+ {
+ // If signature were present, we'd have to write a 128-bit hash.
+ assert(entries[i].signature_present == 0);
+ }
+ write_vector_int(w, nitems, entries,
+ t.nbits_nobjects,
+ &HSharedObjectEntry::nobjects_minus_one);
+}
+
+void
+QPDF::writeHGeneric(BitWriter& w, HGeneric& t)
+{
+ w.writeBits(t.first_object, 32); // 1
+ w.writeBits(t.first_object_offset, 32); // 2
+ w.writeBits(t.nobjects, 32); // 3
+ w.writeBits(t.group_length, 32); // 4
+}
+
+void
+QPDF::generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber,
+ PointerHolder<Buffer>& hint_buffer,
+ int& S, int& O)
+{
+ // Populate actual hint table values
+ calculateHPageOffset(xref, lengths, obj_renumber);
+ calculateHSharedObject(xref, lengths, obj_renumber);
+ calculateHOutline(xref, lengths, obj_renumber);
+
+ // Write the hint stream itself into a compressed memory buffer.
+ // Write through a couter so we can get offsets.
+ Pl_Buffer hint_stream("hint stream");
+ Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
+ Pl_Count c("count", &f);
+ BitWriter w(&c);
+
+ writeHPageOffset(w);
+ S = c.getCount();
+ writeHSharedObject(w);
+ O = 0;
+ if (this->outline_hints.nobjects > 0)
+ {
+ O = c.getCount();
+ writeHGeneric(w, this->outline_hints);
+ }
+ c.finish();
+
+ hint_buffer = hint_stream.getBuffer();
+}
diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc
new file mode 100644
index 00000000..8797445c
--- /dev/null
+++ b/libqpdf/QPDF_optimization.cc
@@ -0,0 +1,490 @@
+// See doc/optimization.
+
+#include <qpdf/QPDF.hh>
+
+#include <qpdf/QTC.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDF_Dictionary.hh>
+#include <qpdf/QPDF_Array.hh>
+#include <assert.h>
+
+QPDF::ObjUser::ObjUser() :
+ ou_type(ou_bad),
+ pageno(0)
+{
+}
+
+QPDF::ObjUser::ObjUser(user_e type) :
+ ou_type(type),
+ pageno(0)
+{
+ assert(type == ou_root);
+}
+
+QPDF::ObjUser::ObjUser(user_e type, int pageno) :
+ ou_type(type),
+ pageno(pageno)
+{
+ assert((type == ou_page) || (type == ou_thumb));
+}
+
+QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
+ ou_type(type),
+ pageno(0),
+ key(key)
+{
+ assert((type == ou_trailer_key) || (type == ou_root_key));
+}
+
+bool
+QPDF::ObjUser::operator<(ObjUser const& rhs) const
+{
+ if (this->ou_type < rhs.ou_type)
+ {
+ return true;
+ }
+ else if (this->ou_type == rhs.ou_type)
+ {
+ if (this->pageno < rhs.pageno)
+ {
+ return true;
+ }
+ else if (this->pageno == rhs.pageno)
+ {
+ return (this->key < rhs.key);
+ }
+ }
+
+ return false;
+}
+
+void
+QPDF::flattenScalarReferences()
+{
+ // Do a traversal of the entire PDF file structure replacing all
+ // indirect objects that are not arrays, streams, or dictionaries
+ // with direct objects.
+
+ std::list<QPDFObjectHandle> queue;
+ queue.push_back(this->trailer);
+ std::set<ObjGen> visited;
+
+ while (! queue.empty())
+ {
+ QPDFObjectHandle node = queue.front();
+ queue.pop_front();
+ if (node.isIndirect())
+ {
+ if (node.isScalar())
+ {
+ throw QEXC::Internal(
+ "flattenScalarReferences landed at indirect scalar");
+ }
+ ObjGen og(node.getObjectID(), node.getGeneration());
+ if (visited.count(og) > 0)
+ {
+ continue;
+ }
+ visited.insert(og);
+ }
+
+ if (node.isArray())
+ {
+ int nitems = node.getArrayNItems();
+ for (int i = 0; i < nitems; ++i)
+ {
+ QPDFObjectHandle oh = node.getArrayItem(i);
+ if (oh.isScalar())
+ {
+ QTC::TC("qpdf", "QPDF opt flatten array scalar");
+ oh.makeDirect();
+ node.setArrayItem(i, oh);
+ }
+ else
+ {
+ queue.push_back(oh);
+ }
+ }
+ }
+ else if (node.isDictionary() || node.isStream())
+ {
+ QPDFObjectHandle dict = node;
+ if (node.isStream())
+ {
+ dict = node.getDict();
+ }
+ std::set<std::string> keys = dict.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ QPDFObjectHandle oh = dict.getKey(key);
+ if (oh.isNull())
+ {
+ // QPDF_Dictionary.getKeys() never returns null
+ // keys.
+ throw QEXC::Internal("dictionary with null key found");
+ }
+ else if (oh.isScalar())
+ {
+ QTC::TC("qpdf", "QPDF opt flatten dict scalar");
+ oh.makeDirect();
+ dict.replaceKey(key, oh);
+ }
+ else
+ {
+ queue.push_back(oh);
+ }
+ }
+ }
+ }
+}
+
+void
+QPDF::optimize(std::map<int, int> const& object_stream_data,
+ bool allow_changes)
+{
+ if (! this->obj_user_to_objects.empty())
+ {
+ // already optimized
+ return;
+ }
+
+ // Traverse pages tree pushing all inherited resources down to the
+ // page level.
+
+ // key_ancestors is a mapping of page attribute keys to a stack of
+ // Pages nodes that contain values for them. pageno is the
+ // current page sequence number numbered from 0.
+ std::map<std::string, std::vector<QPDFObjectHandle> > key_ancestors;
+ int pageno = 0;
+ optimizePagesTree(this->trailer.getKey("/Root").getKey("/Pages"),
+ key_ancestors, pageno, allow_changes);
+ assert(key_ancestors.empty());
+
+ // Traverse document-level items
+ std::set<std::string> keys = this->trailer.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ if (key == "/Root")
+ {
+ // handled separately
+ }
+ else
+ {
+ updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key),
+ this->trailer.getKey(key));
+ }
+ }
+
+ QPDFObjectHandle root = getRoot();
+ keys = root.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ // Technically, /I keys from /Thread dictionaries are supposed
+ // to be handled separately, but we are going to disregard
+ // that specification for now. There is loads of evidence
+ // that pdlin and Acrobat both disregard things like this from
+ // time to time, so this is almost certain not to cause any
+ // problems.
+
+ std::string const& key = *iter;
+ updateObjectMaps(ObjUser(ObjUser::ou_root_key, key),
+ root.getKey(key));
+ }
+
+ ObjUser root_ou = ObjUser(ObjUser::ou_root);
+ ObjGen root_og = ObjGen(root.getObjectID(), root.getGeneration());
+ obj_user_to_objects[root_ou].insert(root_og);
+ object_to_obj_users[root_og].insert(root_ou);
+
+ filterCompressedObjects(object_stream_data);
+}
+
+void
+QPDF::optimizePagesTree(
+ QPDFObjectHandle cur_pages,
+ std::map<std::string, std::vector<QPDFObjectHandle> >& key_ancestors,
+ int& pageno, bool allow_changes)
+{
+ // Extract the underlying dictionary object
+ std::string type = cur_pages.getKey("/Type").getName();
+
+ if (type == "/Pages")
+ {
+ // Make a list of inheritable keys. Any key other than /Type,
+ // /Parent, Kids, or /Count is an inheritable attribute. Push
+ // this object onto the stack of pages nodes that have values
+ // for this attribute.
+
+ std::set<std::string> inheritable_keys;
+ std::set<std::string> keys = cur_pages.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ if (! ((key == "/Type") || (key == "/Parent") ||
+ (key == "/Kids") || (key == "/Count")))
+ {
+ if (! allow_changes)
+ {
+ throw QPDFExc(this->file.getName() +
+ ": optimize detected an "
+ "inheritable resource");
+ }
+
+ // This is an inheritable resource
+ inheritable_keys.insert(key);
+ QPDFObjectHandle oh = cur_pages.getKey(key);
+ QTC::TC("qpdf", "QPDF opt direct pages resource",
+ oh.isIndirect() ? 0 : 1);
+ if (! oh.isIndirect())
+ {
+ if (! oh.isScalar())
+ {
+ // Replace shared direct object non-scalar
+ // resources with indirect objects to avoid
+ // copying large structures around.
+ cur_pages.replaceKey(key, makeIndirectObject(oh));
+ oh = cur_pages.getKey(key);
+ }
+ else
+ {
+ // Don't defeat flattenScalarReferences which
+ // would have already been called by this
+ // time.
+ QTC::TC("qpdf", "QPDF opt inherited scalar");
+ }
+ }
+ key_ancestors[key].push_back(oh);
+ if (key_ancestors[key].size() > 1)
+ {
+ QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
+ }
+ // Remove this resource from this node. It will be
+ // reattached at the page level.
+ cur_pages.removeKey(key);
+ }
+ }
+
+ // Visit descendant nodes.
+ QPDFObjectHandle kids = cur_pages.getKey("/Kids");
+ int n = kids.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ optimizePagesTree(kids.getArrayItem(i), key_ancestors, pageno,
+ allow_changes);
+ }
+
+ // For each inheritable key, pop the stack. If the stack
+ // becomes empty, remove it from the map. That way, the
+ // invariant that the list of keys in key_ancestors is exactly
+ // those keys for which inheritable attributes are available.
+
+ if (! inheritable_keys.empty())
+ {
+ QTC::TC("qpdf", "QPDF opt inheritable keys");
+ for (std::set<std::string>::iterator iter =
+ inheritable_keys.begin();
+ iter != inheritable_keys.end(); ++iter)
+ {
+ std::string const& key = (*iter);
+ key_ancestors[key].pop_back();
+ if (key_ancestors[key].empty())
+ {
+ QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
+ key_ancestors.erase(key);
+ }
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF opt no inheritable keys");
+ }
+ }
+ else if (type == "/Page")
+ {
+ // Add all available inheritable attributes not present in
+ // this object to this object.
+ for (std::map<std::string, std::vector<QPDFObjectHandle> >::iterator
+ iter = key_ancestors.begin();
+ iter != key_ancestors.end(); ++iter)
+ {
+ std::string const& key = (*iter).first;
+ if (! cur_pages.hasKey(key))
+ {
+ QTC::TC("qpdf", "QPDF opt resource inherited");
+ cur_pages.replaceKey(key, (*iter).second.back());
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
+ }
+ }
+
+ // Traverse from this point, updating the mappings of object
+ // users to objects and objects to object users.
+
+ updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), cur_pages);
+
+ // Increment pageno so that its value will be correct for the
+ // next page.
+ ++pageno;
+ }
+ else
+ {
+ throw QPDFExc(this->file.getName() + ": invalid Type in page tree");
+ }
+}
+
+void
+QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh)
+{
+ std::set<ObjGen> visited;
+ updateObjectMapsInternal(ou, oh, visited, true);
+}
+
+void
+QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
+ std::set<ObjGen>& visited, bool top)
+{
+ // Traverse the object tree from this point taking care to avoid
+ // crossing page boundaries.
+
+ bool is_page_node = false;
+
+ if (oh.isDictionary() && oh.hasKey("/Type"))
+ {
+ std::string type = oh.getKey("/Type").getName();
+ if (type == "/Page")
+ {
+ is_page_node = true;
+ if (! top)
+ {
+ return;
+ }
+ }
+ }
+
+ if (oh.isIndirect())
+ {
+ ObjGen og(oh.getObjectID(), oh.getGeneration());
+ if (visited.count(og))
+ {
+ QTC::TC("qpdf", "QPDF opt loop detected");
+ return;
+ }
+ this->obj_user_to_objects[ou].insert(og);
+ this->object_to_obj_users[og].insert(ou);
+ visited.insert(og);
+ }
+
+ if (oh.isArray())
+ {
+ int n = oh.getArrayNItems();
+ for (int i = 0; i < n; ++i)
+ {
+ updateObjectMapsInternal(ou, oh.getArrayItem(i), visited, false);
+ }
+ }
+ else if (oh.isDictionary() || oh.isStream())
+ {
+ QPDFObjectHandle dict = oh;
+ if (oh.isStream())
+ {
+ dict = oh.getDict();
+ }
+
+ std::set<std::string> keys = dict.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string const& key = *iter;
+ if (is_page_node && (key == "/Thumb"))
+ {
+ // Traverse page thumbnail dictionaries as a special
+ // case.
+ updateObjectMaps(ObjUser(ObjUser::ou_thumb, ou.pageno),
+ dict.getKey(key));
+ }
+ else if (is_page_node && (key == "/Parent"))
+ {
+ // Don't traverse back up the page tree
+ }
+ else
+ {
+ updateObjectMapsInternal(ou, dict.getKey(key),
+ visited, false);
+ }
+ }
+ }
+}
+
+void
+QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data)
+{
+ if (object_stream_data.empty())
+ {
+ return;
+ }
+
+ // Transform object_to_obj_users and obj_user_to_objects so that
+ // they refer only to uncompressed objects. If something is a
+ // user of a compressed object, then it is really a user of the
+ // object stream that contains it.
+
+ std::map<ObjUser, std::set<ObjGen> > t_obj_user_to_objects;
+ std::map<ObjGen, std::set<ObjUser> > t_object_to_obj_users;
+
+ for (std::map<ObjUser, std::set<ObjGen> >::iterator i1 =
+ this->obj_user_to_objects.begin();
+ i1 != this->obj_user_to_objects.end(); ++i1)
+ {
+ ObjUser const& ou = (*i1).first;
+ std::set<ObjGen> const& objects = (*i1).second;
+ for (std::set<ObjGen>::const_iterator i2 = objects.begin();
+ i2 != objects.end(); ++i2)
+ {
+ ObjGen const& og = (*i2);
+ std::map<int, int>::const_iterator i3 =
+ object_stream_data.find(og.obj);
+ if (i3 == object_stream_data.end())
+ {
+ t_obj_user_to_objects[ou].insert(og);
+ }
+ else
+ {
+ t_obj_user_to_objects[ou].insert(ObjGen((*i3).second, 0));
+ }
+ }
+ }
+
+ for (std::map<ObjGen, std::set<ObjUser> >::iterator i1 =
+ this->object_to_obj_users.begin();
+ i1 != this->object_to_obj_users.end(); ++i1)
+ {
+ ObjGen const& og = (*i1).first;
+ std::set<ObjUser> const& objusers = (*i1).second;
+ for (std::set<ObjUser>::const_iterator i2 = objusers.begin();
+ i2 != objusers.end(); ++i2)
+ {
+ ObjUser const& ou = (*i2);
+ std::map<int, int>::const_iterator i3 =
+ object_stream_data.find(og.obj);
+ if (i3 == object_stream_data.end())
+ {
+ t_object_to_obj_users[og].insert(ou);
+ }
+ else
+ {
+ t_object_to_obj_users[ObjGen((*i3).second, 0)].insert(ou);
+ }
+ }
+ }
+
+ this->obj_user_to_objects = t_obj_user_to_objects;
+ this->object_to_obj_users = t_object_to_obj_users;
+}
diff --git a/libqpdf/QTC.cc b/libqpdf/QTC.cc
new file mode 100644
index 00000000..b8328b2e
--- /dev/null
+++ b/libqpdf/QTC.cc
@@ -0,0 +1,46 @@
+
+#include <qpdf/QTC.hh>
+
+#include <set>
+#include <stdio.h>
+#include <qpdf/QUtil.hh>
+
+static bool tc_active(char const* const scope)
+{
+ std::string value;
+ return (QUtil::get_env("TC_SCOPE", &value) && (value == scope));
+}
+
+void QTC::TC(char const* const scope, char const* const ccase, int n)
+{
+ static std::set<std::pair<std::string, int> > cache;
+
+ if (! tc_active(scope))
+ {
+ return;
+ }
+
+ std::string filename;
+#ifdef _WIN32
+# define TC_ENV "TC_WIN_FILENAME"
+#else
+# define TC_ENV "TC_FILENAME"
+#endif
+ if (! QUtil::get_env(TC_ENV, &filename))
+ {
+ return;
+ }
+#undef TC_ENV
+
+ if (cache.count(std::make_pair(ccase, n)))
+ {
+ return;
+ }
+ cache.insert(std::make_pair(ccase, n));
+
+ FILE* tc =
+ QUtil::fopen_wrapper("open test coverage file (" + filename + ")",
+ fopen(filename.c_str(), "ab"));
+ fprintf(tc, "%s %d\n", ccase, n);
+ fclose(tc);
+}
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
new file mode 100644
index 00000000..c0de95f7
--- /dev/null
+++ b/libqpdf/QUtil.cc
@@ -0,0 +1,198 @@
+
+#include <qpdf/QUtil.hh>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <stdlib.h>
+#ifdef _WIN32
+#include <Windows.h>
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+
+std::string
+QUtil::int_to_string(int num, int fullpad)
+{
+ // This routine will need to be recompiled if an int can be longer than
+ // 49 digits.
+ char t[50];
+
+ // -2 or -1 to leave space for the possible negative sign and for NUL...
+ if (abs(fullpad) > (int)sizeof(t) - ((num < 0)?2:1))
+ {
+ throw QEXC::Internal("Util::int_to_string has been called with "
+ "a padding value greater than its internal "
+ "limit");
+ }
+
+ if (fullpad)
+ {
+ sprintf(t, "%0*d", fullpad, num);
+ }
+ else
+ {
+ sprintf(t, "%d", num);
+ }
+
+ return std::string(t);
+}
+
+std::string
+QUtil::double_to_string(double num, int decimal_places)
+{
+ // This routine will need to be recompiled if a double can be longer than
+ // 99 digits.
+ char t[100];
+
+ std::string lhs = int_to_string((int)num);
+
+ // lhs.length() gives us the length of the part on the right hand
+ // side of the dot + 1 for the dot + decimal_places: total size of
+ // the required string. -1 on the sizeof side to allow for NUL at
+ // the end.
+ //
+ // If decimal_places <= 0, it is as if no precision was provided
+ // so trust the buffer is big enough. The following test will
+ // always pass in those cases.
+ if (decimal_places + 1 + (int)lhs.length() > (int)sizeof(t) - 1)
+ {
+ throw QEXC::Internal("Util::double_to_string has been called with "
+ "a number and a decimal places specification "
+ "that would break an internal limit");
+ }
+
+ if (decimal_places)
+ {
+ sprintf(t, "%.*f", decimal_places, num);
+ }
+ else
+ {
+ sprintf(t, "%f", num);
+ }
+ return std::string(t);
+}
+
+int
+QUtil::os_wrapper(std::string const& description, int status) throw (QEXC::System)
+{
+ if (status == -1)
+ {
+ throw QEXC::System(description, errno);
+ }
+ return status;
+}
+
+FILE*
+QUtil::fopen_wrapper(std::string const& description, FILE* f) throw (QEXC::System)
+{
+ if (f == 0)
+ {
+ throw QEXC::System(description, errno);
+ }
+ return f;
+}
+
+char*
+QUtil::copy_string(std::string const& str)
+{
+ char* result = new char[str.length() + 1];
+ // Use memcpy in case string contains nulls
+ result[str.length()] = '\0';
+ memcpy(result, str.c_str(), str.length());
+ return result;
+}
+
+bool
+QUtil::get_env(std::string const& var, std::string* value)
+{
+ // This was basically ripped out of wxWindows.
+#ifdef _WIN32
+ // first get the size of the buffer
+ DWORD len = ::GetEnvironmentVariable(var.c_str(), NULL, 0);
+ if (len == 0)
+ {
+ // this means that there is no such variable
+ return false;
+ }
+
+ if (value)
+ {
+ char* t = new char[len + 1];
+ ::GetEnvironmentVariable(var.c_str(), t, len);
+ *value = t;
+ delete [] t;
+ }
+
+ return true;
+#else
+ char* p = getenv(var.c_str());
+ if (p == 0)
+ {
+ return false;
+ }
+ if (value)
+ {
+ *value = p;
+ }
+
+ return true;
+#endif
+}
+
+std::string
+QUtil::toUTF8(unsigned long uval)
+{
+ std::string result;
+
+ // A UTF-8 encoding of a Unicode value is a single byte for
+ // Unicode values <= 127. For larger values, the first byte of
+ // the UTF-8 encoding has '1' as each of its n highest bits and
+ // '0' for its (n+1)th highest bit where n is the total number of
+ // bytes required. Subsequent bytes start with '10' and have the
+ // remaining 6 bits free for encoding. For example, an 11-bit
+ // unicode value can be stored in two bytes where the first is
+ // 110zzzzz, the second is 10zzzzzz, and the z's represent the
+ // remaining bits.
+
+ if (uval > 0x7fffffff)
+ {
+ throw QEXC::General("bounds error in QUtil::toUTF8");
+ }
+ else if (uval < 128)
+ {
+ result += (char)(uval);
+ }
+ else
+ {
+ unsigned char bytes[7];
+ bytes[6] = '\0';
+ unsigned char* cur_byte = &bytes[5];
+
+ // maximum value that will fit in the current number of bytes
+ unsigned char maxval = 0x3f; // six bits
+
+ while (uval > maxval)
+ {
+ // Assign low six bits plus 10000000 to lowest unused
+ // byte position, then shift
+ *cur_byte = (unsigned char) (0x80 + (uval & 0x3f));
+ uval >>= 6;
+ // Maximum that will fit in high byte now shrinks by one bit
+ maxval >>= 1;
+ // Slide to the left one byte
+ --cur_byte;
+ if (cur_byte < bytes)
+ {
+ throw QEXC::Internal("QUtil::toUTF8: overflow error");
+ }
+ }
+ // If maxval is k bits long, the high (7 - k) bits of the
+ // resulting byte must be high.
+ *cur_byte = (unsigned char)((0xff - (1 + (maxval << 1))) + uval);
+
+ result += (char*)cur_byte;
+ }
+
+ return result;
+}
diff --git a/libqpdf/RC4.cc b/libqpdf/RC4.cc
new file mode 100644
index 00000000..74b538b5
--- /dev/null
+++ b/libqpdf/RC4.cc
@@ -0,0 +1,56 @@
+
+#include <qpdf/RC4.hh>
+
+#include <string.h>
+
+static void swap_byte(unsigned char &a, unsigned char &b)
+{
+ unsigned char t;
+
+ t = a;
+ a = b;
+ b = t;
+}
+
+RC4::RC4(unsigned char const* key_data, int key_len)
+{
+ if (key_len == -1)
+ {
+ key_len = strlen((char*)key_data);
+ }
+
+ for (int i = 0; i < 256; ++i)
+ {
+ key.state[i] = i;
+ }
+ key.x = 0;
+ key.y = 0;
+
+ int i1 = 0;
+ int i2 = 0;
+ for (int i = 0; i < 256; ++i)
+ {
+ i2 = (key_data[i1] + key.state[i] + i2) % 256;
+ swap_byte(key.state[i], key.state[i2]);
+ i1 = (i1 + 1) % key_len;
+ }
+}
+
+void
+RC4::process(unsigned char *in_data, int len, unsigned char* out_data)
+{
+ if (out_data == 0)
+ {
+ // Convert in place
+ out_data = in_data;
+ }
+
+ for (int i = 0; i < len; ++i)
+ {
+ key.x = (key.x + 1) % 256;
+ key.y = (key.state[key.x] + key.y) % 256;
+ swap_byte(key.state[key.x], key.state[key.y]);
+ int xor_index = (key.state[key.x] + key.state[key.y]) % 256;
+ out_data[i] = in_data[i] ^ key.state[xor_index];
+ }
+}
diff --git a/libqpdf/bits.icc b/libqpdf/bits.icc
new file mode 100644
index 00000000..465bf5b9
--- /dev/null
+++ b/libqpdf/bits.icc
@@ -0,0 +1,149 @@
+
+#ifndef __BITS_CC__
+#define __BITS_CC__
+
+#include <algorithm>
+#include <qpdf/QTC.hh>
+#include <qpdf/QEXC.hh>
+#include <qpdf/Pipeline.hh>
+
+// These functions may be run at places where the function call
+// overhead from test coverage testing would be too high. Therefore,
+// we make the test coverage cases conditional upon a preprocessor
+// symbol. BitStream.cc includes this file without defining the
+// symbol, and the specially designed test code that fully exercises
+// this code includes with the symbol defined.
+
+#ifdef BITS_READ
+static unsigned long
+read_bits(unsigned char const*& p, unsigned int& bit_offset,
+ unsigned int& bits_available, unsigned int bits_wanted)
+{
+ // View p as a stream of bits:
+
+ // 76543210 76543210 ....
+
+ // bit_offset is the bit number within the first byte that marks
+ // the first bit that we would read.
+
+ if (bits_wanted > bits_available)
+ {
+ throw QEXC::General("overflow reading bit stream");
+ }
+ if (bits_wanted > 32)
+ {
+ throw QEXC::Internal("read_bits: too many bits requested");
+ }
+
+ unsigned long result = 0;
+#ifdef BITS_TESTING
+ if (bits_wanted == 0)
+ {
+ QTC::TC("libtests", "bits zero bits wanted");
+ }
+#endif
+ while (bits_wanted > 0)
+ {
+ // Grab bits from the first byte clearing anything before
+ // bit_offset.
+ unsigned char byte = *p & ((1 << (bit_offset + 1)) - 1);
+
+ // There are bit_offset + 1 bits available in the first byte.
+ unsigned int to_copy = std::min(bits_wanted, bit_offset + 1);
+ unsigned int leftover = (bit_offset + 1) - to_copy;
+
+#ifdef BITS_TESTING
+ QTC::TC("libtests", "bits bit_offset",
+ ((bit_offset == 0) ? 0 :
+ (bit_offset == 7) ? 1 :
+ 2));
+ QTC::TC("libtests", "bits leftover", (leftover > 0) ? 1 : 0);
+#endif
+
+ // Right shift so that all the bits we want are right justified.
+ byte >>= leftover;
+
+ // Copy the bits into result
+ result <<= to_copy;
+ result |= byte;
+
+ // Update pointers
+ if (leftover)
+ {
+ bit_offset = leftover - 1;
+ }
+ else
+ {
+ bit_offset = 7;
+ ++p;
+ }
+ bits_wanted -= to_copy;
+ bits_available -= to_copy;
+
+#ifdef BITS_TESTING
+ QTC::TC("libtests", "bits iterations",
+ ((bits_wanted > 8) ? 0 :
+ (bits_wanted > 0) ? 1 :
+ 2));
+#endif
+ }
+
+ return result;
+}
+#endif
+
+#ifdef BITS_WRITE
+static void
+write_bits(unsigned char& ch, unsigned int& bit_offset,
+ unsigned long val, unsigned bits, Pipeline* pipeline)
+{
+ if (bits > 32)
+ {
+ throw QEXC::Internal("write_bits: too many bits requested");
+ }
+
+ // bit_offset + 1 is the number of bits left in ch
+#ifdef BITS_TESTING
+ if (bits == 0)
+ {
+ QTC::TC("libtests", "bits write zero bits");
+ }
+#endif
+ while (bits > 0)
+ {
+ int bits_to_write = std::min(bits, bit_offset + 1);
+ unsigned char newval =
+ (val >> (bits - bits_to_write)) & ((1 << bits_to_write) - 1);
+ int bits_left_in_ch = bit_offset + 1 - bits_to_write;
+ newval <<= bits_left_in_ch;
+ ch |= newval;
+ if (bits_left_in_ch == 0)
+ {
+#ifdef BITS_TESTING
+ QTC::TC("libtests", "bits write pipeline");
+#endif
+ pipeline->write(&ch, 1);
+ bit_offset = 7;
+ ch = 0;
+ }
+ else
+ {
+#ifdef BITS_TESTING
+ QTC::TC("libtests", "bits write leftover");
+#endif
+ bit_offset -= bits_to_write;
+ }
+ bits -= bits_to_write;
+#ifdef BITS_TESTING
+ QTC::TC("libtests", "bits write iterations",
+ ((bits > 8) ? 0 :
+ (bits > 0) ? 1 :
+ 2));
+#endif
+ }
+
+}
+#endif
+
+
+#endif // __BITS_CC__
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
new file mode 100644
index 00000000..9733cb9f
--- /dev/null
+++ b/libqpdf/build.mk
@@ -0,0 +1,73 @@
+TARGETS_libqpdf = \
+ libqpdf/$(OUTPUT_DIR)/libqpdf.la
+
+INCLUDES_libqpdf = include libqpdf
+
+SRCS_libqpdf = \
+ libqpdf/BitStream.cc \
+ libqpdf/BitWriter.cc \
+ libqpdf/Buffer.cc \
+ libqpdf/MD5.cc \
+ libqpdf/PCRE.cc \
+ libqpdf/Pipeline.cc \
+ libqpdf/Pl_ASCII85Decoder.cc \
+ libqpdf/Pl_ASCIIHexDecoder.cc \
+ libqpdf/Pl_Buffer.cc \
+ libqpdf/Pl_Count.cc \
+ libqpdf/Pl_Discard.cc \
+ libqpdf/Pl_Flate.cc \
+ libqpdf/Pl_LZWDecoder.cc \
+ libqpdf/Pl_MD5.cc \
+ libqpdf/Pl_PNGFilter.cc \
+ libqpdf/Pl_QPDFTokenizer.cc \
+ libqpdf/Pl_RC4.cc \
+ libqpdf/Pl_StdioFile.cc \
+ libqpdf/QEXC.cc \
+ libqpdf/QPDF.cc \
+ libqpdf/QPDFExc.cc \
+ libqpdf/QPDFObject.cc \
+ libqpdf/QPDFObjectHandle.cc \
+ libqpdf/QPDFTokenizer.cc \
+ libqpdf/QPDFWriter.cc \
+ libqpdf/QPDFXRefEntry.cc \
+ libqpdf/QPDF_Array.cc \
+ libqpdf/QPDF_Bool.cc \
+ libqpdf/QPDF_Dictionary.cc \
+ libqpdf/QPDF_Integer.cc \
+ libqpdf/QPDF_Name.cc \
+ libqpdf/QPDF_Null.cc \
+ libqpdf/QPDF_Real.cc \
+ libqpdf/QPDF_Stream.cc \
+ libqpdf/QPDF_String.cc \
+ libqpdf/QPDF_encryption.cc \
+ libqpdf/QPDF_linearization.cc \
+ libqpdf/QPDF_optimization.cc \
+ libqpdf/QTC.cc \
+ libqpdf/QUtil.cc \
+ libqpdf/RC4.cc
+
+# -----
+
+OBJS_libqpdf = $(call src_to_lobj,$(SRCS_libqpdf))
+
+ifeq ($(GENDEPS),1)
+-include $(call lobj_to_dep,$(OBJS_libqpdf))
+endif
+
+$(OBJS_libqpdf): libqpdf/$(OUTPUT_DIR)/%.lo: libqpdf/%.cc
+ $(call libcompile,$<,$(INCLUDES_libqpdf))
+
+# Last three arguments to makelib are CURRENT,REVISION,AGE.
+#
+# * If any interfaces have been removed or changed, we are not binary
+# compatible. Increment CURRENT, and set AGE and REVISION to 0.
+#
+# * Otherwise, if any interfaces have been added since the last
+# public release, then increment CURRENT and AGE, and set REVISION
+# to 0.
+#
+# * Otherwise, increment REVISION
+
+libqpdf/$(OUTPUT_DIR)/libqpdf.la: $(OBJS_libqpdf)
+ $(call makelib,$(OBJS_libqpdf),$@,1,0,0)
+
diff --git a/libqpdf/qpdf/BitStream.hh b/libqpdf/qpdf/BitStream.hh
new file mode 100644
index 00000000..d02eea42
--- /dev/null
+++ b/libqpdf/qpdf/BitStream.hh
@@ -0,0 +1,23 @@
+// Read bits from a bit stream. See BitWriter for writing.
+
+#ifndef __BITSTREAM_HH__
+#define __BITSTREAM_HH__
+
+class BitStream
+{
+ public:
+ BitStream(unsigned char const* p, int nbytes);
+ void reset();
+ unsigned long getBits(int nbits);
+ void skipToNextByte();
+
+ private:
+ unsigned char const* start;
+ int nbytes;
+
+ unsigned char const* p;
+ unsigned int bit_offset;
+ unsigned int bits_available;
+};
+
+#endif // __BITSTREAM_HH__
diff --git a/libqpdf/qpdf/BitWriter.hh b/libqpdf/qpdf/BitWriter.hh
new file mode 100644
index 00000000..1efd498a
--- /dev/null
+++ b/libqpdf/qpdf/BitWriter.hh
@@ -0,0 +1,24 @@
+// Write bits into a bit stream. See BitStream for reading.
+
+#ifndef __THIS_FILE_Q__
+#define __THIS_FILE_Q__
+
+class Pipeline;
+
+class BitWriter
+{
+ public:
+ // Write bits to the pipeline. It is the caller's responsibility
+ // to eventually call finish on the pipeline.
+ BitWriter(Pipeline* pl);
+ void writeBits(unsigned long val, int bits);
+ // Force any partial byte to be written to the pipeline.
+ void flush();
+
+ private:
+ Pipeline* pl;
+ unsigned char ch;
+ unsigned int bit_offset;
+};
+
+#endif // __THIS_FILE_Q__
diff --git a/libqpdf/qpdf/MD5.hh b/libqpdf/qpdf/MD5.hh
new file mode 100644
index 00000000..0ae15da9
--- /dev/null
+++ b/libqpdf/qpdf/MD5.hh
@@ -0,0 +1,73 @@
+
+#ifndef __MD5_HH__
+#define __MD5_HH__
+
+#include <string>
+#include <qpdf/QEXC.hh>
+
+class MD5
+{
+ public:
+ typedef unsigned char Digest[16];
+
+ MD5();
+ void reset();
+
+ // encodes string and finalizes
+ void encodeString(char const* input_string);
+
+ // encodes file and finalizes
+ void encodeFile(char const* filename, int up_to_size = -1)
+ throw(QEXC::System);
+
+ // appends string to current md5 object
+ void appendString(char const* input_string);
+
+ // appends arbitrary data to current md5 object
+ void encodeDataIncrementally(char const* input_data, int len);
+
+ // computes a raw digest
+ void digest(Digest);
+
+ // prints the digest to stdout terminated with \r\n (primarily for
+ // testing)
+ void print();
+
+ // returns the digest as a hexademical string
+ std::string unparse();
+
+ // Convenience functions
+ static std::string getDataChecksum(char const* buf, int len);
+ static std::string getFileChecksum(char const* filename, int up_to_size = -1);
+ static bool checkDataChecksum(char const* const checksum,
+ char const* buf, int len);
+ static bool checkFileChecksum(char const* const checksum,
+ char const* filename, int up_to_size = -1);
+
+ private:
+ // POINTER defines a generic pointer type
+ typedef void *POINTER;
+
+ // UINT2 defines a two byte word
+ typedef unsigned short int UINT2;
+
+ // UINT4 defines a four byte word
+ typedef unsigned long int UINT4;
+
+ void init();
+ void update(unsigned char *, unsigned int);
+ void final();
+
+ static void transform(UINT4 [4], unsigned char [64]);
+ static void encode(unsigned char *, UINT4 *, unsigned int);
+ static void decode(UINT4 *, unsigned char *, unsigned int);
+
+ UINT4 state[4]; // state (ABCD)
+ UINT4 count[2]; // number of bits, modulo 2^64 (lsb first)
+ unsigned char buffer[64]; // input buffer
+
+ bool finalized;
+ Digest digest_val;
+};
+
+#endif // __MD5_HH__
diff --git a/libqpdf/qpdf/PCRE.hh b/libqpdf/qpdf/PCRE.hh
new file mode 100644
index 00000000..a226aa19
--- /dev/null
+++ b/libqpdf/qpdf/PCRE.hh
@@ -0,0 +1,107 @@
+// This is a C++ wrapper class around Philip Hazel's perl-compatible
+// regular expressions library.
+//
+
+#ifndef __PCRE_HH__
+#define __PCRE_HH__
+
+#include <pcre.h>
+#include <string>
+
+#include <qpdf/QEXC.hh>
+
+// Note: this class does not encapsulate all features of the PCRE
+// package -- only those that I actually need right now are here.
+
+class PCRE
+{
+ public:
+ class Exception: public QEXC::General
+ {
+ public:
+ Exception(std::string const& message);
+ virtual ~Exception() throw() {}
+ };
+
+ // This is thrown when an attempt is made to access a non-existent
+ // back reference.
+ class NoBackref: public Exception
+ {
+ public:
+ NoBackref();
+ virtual ~NoBackref() throw() {}
+ };
+
+ class Match
+ {
+ friend class PCRE;
+ public:
+ Match(int nbackrefs, char const* subject);
+ Match(Match const&);
+ Match& operator=(Match const&);
+ ~Match();
+ operator bool();
+
+ // All the back reference accessing routines may throw the
+ // special exception NoBackref (derived from Exception) if the
+ // back reference does not exist. Exception will be thrown
+ // for other error conditions. This allows callers to trap
+ // this condition explicitly when they care about the
+ // difference between a backreference matching an empty string
+ // and not matching at all.
+
+ // see getMatch flags below
+ std::string getMatch(int n, int flags = 0)
+ throw(QEXC::General, Exception);
+ void getOffsetLength(int n, int& offset, int& length) throw(Exception);
+ int getOffset(int n) throw(Exception);
+ int getLength(int n) throw(Exception);
+
+ // nMatches returns the number of available matches including
+ // match 0 which is the whole string. In other words, if you
+ // have one backreference in your expression and the
+ // expression matches, nMatches() will return 2, getMatch(0)
+ // will return the whole string, getMatch(1) will return the
+ // text that matched the backreference, and getMatch(2) will
+ // throw an exception because it is out of range.
+ int nMatches() const;
+
+ // Flags for getMatch
+
+ // getMatch on a substring that didn't match should return
+ // empty string instead of throwing an exception
+ static int const gm_no_substring_returns_empty = (1 << 0);
+
+ private:
+ void init(int nmatches, int nbackrefs, char const* subject);
+ void copy(Match const&);
+ void destroy();
+
+ int nbackrefs;
+ char const* subject;
+ int* ovector;
+ int ovecsize;
+ int nmatches;
+ };
+
+ // The value passed in as options is passed to pcre_exec. See man
+ // pcreapi for details.
+ PCRE(char const* pattern, int options = 0) throw(Exception);
+ ~PCRE();
+
+ Match match(char const* subject, int options = 0, int startoffset = 0,
+ int size = -1)
+ throw(QEXC::General, Exception);
+
+ static void test(int n = 0);
+
+ private:
+ // prohibit copying and assignment
+ PCRE(PCRE const&);
+ PCRE& operator=(PCRE const&);
+
+ pcre* code;
+ int nbackrefs;
+};
+
+#endif // __PCRE_HH__
diff --git a/libqpdf/qpdf/Pl_ASCII85Decoder.hh b/libqpdf/qpdf/Pl_ASCII85Decoder.hh
new file mode 100644
index 00000000..9883a58e
--- /dev/null
+++ b/libqpdf/qpdf/Pl_ASCII85Decoder.hh
@@ -0,0 +1,23 @@
+
+#ifndef __PL_ASCII85DECODER_HH__
+#define __PL_ASCII85DECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_ASCII85Decoder: public Pipeline
+{
+ public:
+ Pl_ASCII85Decoder(char const* identifier, Pipeline* next);
+ virtual ~Pl_ASCII85Decoder();
+ virtual void write(unsigned char* buf, int len);
+ virtual void finish();
+
+ private:
+ void flush();
+
+ char inbuf[5];
+ int pos;
+ int eod;
+};
+
+#endif // __PL_ASCII85DECODER_HH__
diff --git a/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh b/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh
new file mode 100644
index 00000000..36272328
--- /dev/null
+++ b/libqpdf/qpdf/Pl_ASCIIHexDecoder.hh
@@ -0,0 +1,23 @@
+
+#ifndef __PL_ASCIIHEXDECODER_HH__
+#define __PL_ASCIIHEXDECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_ASCIIHexDecoder: public Pipeline
+{
+ public:
+ Pl_ASCIIHexDecoder(char const* identifier, Pipeline* next);
+ virtual ~Pl_ASCIIHexDecoder();
+ virtual void write(unsigned char* buf, int len);
+ virtual void finish();
+
+ private:
+ void flush();
+
+ char inbuf[3];
+ int pos;
+ bool eod;
+};
+
+#endif // __PL_ASCIIHEXDECODER_HH__
diff --git a/libqpdf/qpdf/Pl_LZWDecoder.hh b/libqpdf/qpdf/Pl_LZWDecoder.hh
new file mode 100644
index 00000000..95ec55b3
--- /dev/null
+++ b/libqpdf/qpdf/Pl_LZWDecoder.hh
@@ -0,0 +1,40 @@
+
+#ifndef __PL_LZWDECODER_HH__
+#define __PL_LZWDECODER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/Buffer.hh>
+#include <vector>
+
+class Pl_LZWDecoder: public Pipeline
+{
+ public:
+ Pl_LZWDecoder(char const* identifier, Pipeline* next,
+ bool early_code_change);
+ virtual ~Pl_LZWDecoder();
+ virtual void write(unsigned char* buf, int len);
+ virtual void finish();
+
+ private:
+ void sendNextCode();
+ void handleCode(int code);
+ unsigned char getFirstChar(int code);
+ void addToTable(unsigned char next);
+
+ // members used for converting bits to codes
+ unsigned char buf[3];
+ int code_size;
+ int next;
+ int byte_pos;
+ int bit_pos; // left to right: 01234567
+ int bits_available;
+
+ // members used for handle LZW decompression
+ bool code_change_delta;
+ bool eod;
+ std::vector<Buffer> table;
+ int last_code;
+};
+
+#endif // __PL_LZWDECODER_HH__
diff --git a/libqpdf/qpdf/Pl_MD5.hh b/libqpdf/qpdf/Pl_MD5.hh
new file mode 100644
index 00000000..2d9d11fd
--- /dev/null
+++ b/libqpdf/qpdf/Pl_MD5.hh
@@ -0,0 +1,30 @@
+
+#ifndef __PL_MD5_HH__
+#define __PL_MD5_HH__
+
+// This pipeline sends its output to its successor unmodified. After
+// calling finish, the MD5 checksum of the data that passed through
+// the pipeline is available.
+
+// This pipeline is reusable; i.e., it is safe to call write() after
+// calling finish(). The first call to write() after a call to
+// finish() initializes a new MD5 object.
+
+#include <qpdf/Pipeline.hh>
+#include <qpdf/MD5.hh>
+
+class Pl_MD5: public Pipeline
+{
+ public:
+ Pl_MD5(char const* identifier, Pipeline* next);
+ virtual ~Pl_MD5();
+ virtual void write(unsigned char*, int);
+ virtual void finish();
+ std::string getHexDigest();
+
+ private:
+ bool in_progress;
+ MD5 md5;
+};
+
+#endif // __PL_MD5_HH__
diff --git a/libqpdf/qpdf/Pl_PNGFilter.hh b/libqpdf/qpdf/Pl_PNGFilter.hh
new file mode 100644
index 00000000..1ecc7060
--- /dev/null
+++ b/libqpdf/qpdf/Pl_PNGFilter.hh
@@ -0,0 +1,62 @@
+
+#ifndef __PL_PNGFILTER_HH__
+#define __PL_PNGFILTER_HH__
+
+// This pipeline applies or reverses the application of a PNG filter
+// as described in the PNG specification.
+
+// NOTE: In its initial implementation, it only encodes and decodes
+// filters "none" and "up". The primary motivation of this code is to
+// encode and decode PDF 1.5+ XRef streams which are often encoded
+// with Flate predictor 12, which corresponds to the PNG up filter.
+// At present, the bytes_per_pixel parameter is ignored, and an
+// exception is thrown if any row of the file has a filter of other
+// than 0 or 2. Finishing the implementation would not be difficult.
+// See chapter 6 of the PNG specification for a description of the
+// filter algorithms.
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_PNGFilter: public Pipeline
+{
+ public:
+ class Exception: public Pipeline::Exception
+ {
+ public:
+ Exception(std::string const& message) :
+ Pipeline::Exception(message)
+ {
+ }
+
+ virtual ~Exception() throw ()
+ {
+ }
+ };
+
+ // Encoding is not presently supported
+ enum action_e { a_encode, a_decode };
+
+ Pl_PNGFilter(char const* identifier, Pipeline* next,
+ action_e action, unsigned int columns,
+ unsigned int bytes_per_pixel);
+ virtual ~Pl_PNGFilter();
+
+ virtual void write(unsigned char* data, int len);
+ virtual void finish();
+
+ private:
+ void processRow();
+ void encodeRow();
+ void decodeRow();
+
+ action_e action;
+ unsigned int columns;
+ unsigned char* cur_row;
+ unsigned char* prev_row;
+ unsigned char* buf1;
+ unsigned char* buf2;
+ int pos;
+ int incoming;
+};
+
+#endif // __PL_PNGFILTER_HH__
diff --git a/libqpdf/qpdf/Pl_QPDFTokenizer.hh b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
new file mode 100644
index 00000000..448dbb18
--- /dev/null
+++ b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -0,0 +1,40 @@
+
+#ifndef __PL_QPDFTOKENIZER_HH__
+#define __PL_QPDFTOKENIZER_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/QPDFTokenizer.hh>
+
+//
+// Treat incoming text as a stream consisting of valid PDF tokens, but
+// output bad tokens just the same. The idea here is to be able to
+// use pipeline for content streams to normalize newlines without
+// interfering with meaningful newlines such as those that occur
+// inside of strings.
+//
+
+class Pl_QPDFTokenizer: public Pipeline
+{
+ public:
+ Pl_QPDFTokenizer(char const* identifier, Pipeline* next);
+ virtual ~Pl_QPDFTokenizer();
+ virtual void write(unsigned char* buf, int len);
+ virtual void finish();
+
+ private:
+ void processChar(char ch);
+ void checkUnread();
+ void writeNext(char const*, int len);
+ void writeToken(QPDFTokenizer::Token&);
+
+ QPDFTokenizer tokenizer;
+ bool newline_after_next_token;
+ bool just_wrote_nl;
+ bool last_char_was_cr;
+ bool unread_char;
+ char char_to_unread;
+ bool pass_through;
+};
+
+#endif // __PL_QPDFTOKENIZER_HH__
diff --git a/libqpdf/qpdf/Pl_RC4.hh b/libqpdf/qpdf/Pl_RC4.hh
new file mode 100644
index 00000000..6bebe5aa
--- /dev/null
+++ b/libqpdf/qpdf/Pl_RC4.hh
@@ -0,0 +1,42 @@
+
+#ifndef __PL_RC4_HH__
+#define __PL_RC4_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <qpdf/RC4.hh>
+
+class Pl_RC4: public Pipeline
+{
+ public:
+ class Exception: public Pipeline::Exception
+ {
+ public:
+ Exception(std::string const& message) :
+ Pipeline::Exception(message)
+ {
+ }
+
+ virtual ~Exception() throw()
+ {
+ }
+ };
+
+ static int const def_bufsize = 65536;
+
+ // key_len of -1 means treat key_data as a null-terminated string
+ Pl_RC4(char const* identifier, Pipeline* next,
+ unsigned char const* key_data, int key_len = -1,
+ int out_bufsize = def_bufsize);
+ virtual ~Pl_RC4();
+
+ virtual void write(unsigned char* data, int len);
+ virtual void finish();
+
+ private:
+ unsigned char* outbuf;
+ int out_bufsize;
+ RC4 rc4;
+};
+
+#endif // __PL_RC4_HH__
diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh
new file mode 100644
index 00000000..371be50e
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Array.hh
@@ -0,0 +1,24 @@
+
+#ifndef __QPDF_ARRAY_HH__
+#define __QPDF_ARRAY_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <vector>
+#include <qpdf/QPDFObjectHandle.hh>
+
+class QPDF_Array: public QPDFObject
+{
+ public:
+ QPDF_Array(std::vector<QPDFObjectHandle> const& items);
+ virtual ~QPDF_Array();
+ virtual std::string unparse();
+ int getNItems() const;
+ QPDFObjectHandle getItem(int n) const;
+ void setItem(int, QPDFObjectHandle const&);
+
+ private:
+ std::vector<QPDFObjectHandle> items;
+};
+
+#endif // __QPDF_ARRAY_HH__
diff --git a/libqpdf/qpdf/QPDF_Bool.hh b/libqpdf/qpdf/QPDF_Bool.hh
new file mode 100644
index 00000000..06aca822
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Bool.hh
@@ -0,0 +1,19 @@
+
+#ifndef __QPDF_BOOL_HH__
+#define __QPDF_BOOL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Bool: public QPDFObject
+{
+ public:
+ QPDF_Bool(bool val);
+ virtual ~QPDF_Bool();
+ virtual std::string unparse();
+ bool getVal() const;
+
+ private:
+ bool val;
+};
+
+#endif // __QPDF_BOOL_HH__
diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh
new file mode 100644
index 00000000..6a79fb69
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Dictionary.hh
@@ -0,0 +1,35 @@
+
+#ifndef __QPDF_DICTIONARY_HH__
+#define __QPDF_DICTIONARY_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <set>
+#include <map>
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+class QPDF_Dictionary: public QPDFObject
+{
+ public:
+ QPDF_Dictionary(std::map<std::string, QPDFObjectHandle> const& items);
+ virtual ~QPDF_Dictionary();
+ virtual std::string unparse();
+
+ // hasKey() and getKeys() treat keys with null values as if they
+ // aren't there. getKey() returns null for the value of a
+ // non-existent key. This is as per the PDF spec.
+ bool hasKey(std::string const&);
+ QPDFObjectHandle getKey(std::string const&);
+ std::set<std::string> getKeys();
+
+ // Repalce value of key, adding it if it does not exist
+ void replaceKey(std::string const& key, QPDFObjectHandle const&);
+ // Remove key, doing nothing if key does not exist
+ void removeKey(std::string const& key);
+
+ private:
+ std::map<std::string, QPDFObjectHandle> items;
+};
+
+#endif // __QPDF_DICTIONARY_HH__
diff --git a/libqpdf/qpdf/QPDF_Integer.hh b/libqpdf/qpdf/QPDF_Integer.hh
new file mode 100644
index 00000000..fb6360b2
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Integer.hh
@@ -0,0 +1,19 @@
+
+#ifndef __QPDF_INTEGER_HH__
+#define __QPDF_INTEGER_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Integer: public QPDFObject
+{
+ public:
+ QPDF_Integer(int val);
+ virtual ~QPDF_Integer();
+ virtual std::string unparse();
+ int getVal() const;
+
+ private:
+ int val;
+};
+
+#endif // __QPDF_INTEGER_HH__
diff --git a/libqpdf/qpdf/QPDF_Name.hh b/libqpdf/qpdf/QPDF_Name.hh
new file mode 100644
index 00000000..a32f6f4f
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Name.hh
@@ -0,0 +1,22 @@
+
+#ifndef __QPDF_NAME_HH__
+#define __QPDF_NAME_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Name: public QPDFObject
+{
+ public:
+ QPDF_Name(std::string const& name);
+ virtual ~QPDF_Name();
+ virtual std::string unparse();
+ std::string getName() const;
+
+ // Put # into strings with characters unsuitable for name token
+ static std::string normalizeName(std::string const& name);
+
+ private:
+ std::string name;
+};
+
+#endif // __QPDF_NAME_HH__
diff --git a/libqpdf/qpdf/QPDF_Null.hh b/libqpdf/qpdf/QPDF_Null.hh
new file mode 100644
index 00000000..60c1ae35
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Null.hh
@@ -0,0 +1,14 @@
+
+#ifndef __QPDF_NULL_HH__
+#define __QPDF_NULL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Null: public QPDFObject
+{
+ public:
+ virtual ~QPDF_Null();
+ std::string unparse();
+};
+
+#endif // __QPDF_NULL_HH__
diff --git a/libqpdf/qpdf/QPDF_Real.hh b/libqpdf/qpdf/QPDF_Real.hh
new file mode 100644
index 00000000..b950c569
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Real.hh
@@ -0,0 +1,20 @@
+
+#ifndef __QPDF_REAL_HH__
+#define __QPDF_REAL_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+class QPDF_Real: public QPDFObject
+{
+ public:
+ QPDF_Real(std::string const& val);
+ virtual ~QPDF_Real();
+ std::string unparse();
+ std::string getVal();
+
+ private:
+ // Store reals as strings to avoid roundoff errors.
+ std::string val;
+};
+
+#endif // __QPDF_REAL_HH__
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
new file mode 100644
index 00000000..71381fd3
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -0,0 +1,42 @@
+
+#ifndef __QPDF_STREAM_HH__
+#define __QPDF_STREAM_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+#include <qpdf/QPDFObjectHandle.hh>
+
+class Pipeline;
+class QPDF;
+
+class QPDF_Stream: public QPDFObject
+{
+ public:
+ QPDF_Stream(QPDF*, int objid, int generation,
+ QPDFObjectHandle stream_dict,
+ off_t offset, int length);
+ virtual ~QPDF_Stream();
+ virtual std::string unparse();
+ QPDFObjectHandle getDict() const;
+
+ // See comments in QPDFObjectHandle.hh
+ bool pipeStreamData(Pipeline*, bool filter,
+ bool normalize, bool compress);
+
+ // See comments in QPDFObjectHandle.hh
+ PointerHolder<Buffer> getStreamData();
+
+ private:
+ bool filterable(std::vector<std::string>& filters,
+ int& predictor, int& columns, bool& early_code_change);
+
+
+ QPDF* qpdf;
+ int objid;
+ int generation;
+ QPDFObjectHandle stream_dict;
+ off_t offset;
+ int length;
+};
+
+#endif // __QPDF_STREAM_HH__
diff --git a/libqpdf/qpdf/QPDF_String.hh b/libqpdf/qpdf/QPDF_String.hh
new file mode 100644
index 00000000..f3063c50
--- /dev/null
+++ b/libqpdf/qpdf/QPDF_String.hh
@@ -0,0 +1,23 @@
+
+#ifndef __QPDF_STRING_HH__
+#define __QPDF_STRING_HH__
+
+#include <qpdf/QPDFObject.hh>
+
+// QPDF_Strings may included embedded null characters.
+
+class QPDF_String: public QPDFObject
+{
+ public:
+ QPDF_String(std::string const& val);
+ virtual ~QPDF_String();
+ virtual std::string unparse();
+ std::string unparse(bool force_binary);
+ std::string getVal() const;
+ std::string getUTF8Val() const;
+
+ private:
+ std::string val;
+};
+
+#endif // __QPDF_STRING_HH__
diff --git a/libqpdf/qpdf/RC4.hh b/libqpdf/qpdf/RC4.hh
new file mode 100644
index 00000000..657bf35b
--- /dev/null
+++ b/libqpdf/qpdf/RC4.hh
@@ -0,0 +1,26 @@
+
+#ifndef __RC4_HH__
+#define __RC4_HH__
+
+class RC4
+{
+ public:
+ // key_len of -1 means treat key_data as a null-terminated string
+ RC4(unsigned char const* key_data, int key_len = -1);
+
+ // out_data = 0 means to encrypt/decrypt in place
+ void process(unsigned char* in_data, int len, unsigned char* out_data = 0);
+
+ private:
+ class RC4Key
+ {
+ public:
+ unsigned char state[256];
+ unsigned char x;
+ unsigned char y;
+ };
+
+ RC4Key key;
+};
+
+#endif // __RC4_HH__