aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2008-04-29 14:55:25 +0200
committerJay Berkenbilt <ejb@ql.org>2008-04-29 14:55:25 +0200
commit9a0b88bf7777c153dc46ace22db74ef24d51583a (patch)
treef567ac1cf2bf5071a611eb49323a935b6ac938ff /include
downloadqpdf-9a0b88bf7777c153dc46ace22db74ef24d51583a.tar.zst
update release date to actual daterelease-qpdf-2.0
git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/Buffer.hh32
-rw-r--r--include/qpdf/Pipeline.hh73
-rw-r--r--include/qpdf/Pl_Buffer.hh46
-rw-r--r--include/qpdf/Pl_Count.hh34
-rw-r--r--include/qpdf/Pl_Discard.hh28
-rw-r--r--include/qpdf/Pl_Flate.hh53
-rw-r--r--include/qpdf/Pl_StdioFile.hh49
-rw-r--r--include/qpdf/PointerHolder.hh170
-rw-r--r--include/qpdf/QEXC.hh119
-rw-r--r--include/qpdf/QPDF.hh750
-rw-r--r--include/qpdf/QPDFExc.hh22
-rw-r--r--include/qpdf/QPDFObject.hh20
-rw-r--r--include/qpdf/QPDFObjectHandle.hh221
-rw-r--r--include/qpdf/QPDFTokenizer.hh141
-rw-r--r--include/qpdf/QPDFWriter.hh243
-rw-r--r--include/qpdf/QPDFXRefEntry.hh34
-rw-r--r--include/qpdf/QTC.hh16
-rw-r--r--include/qpdf/QUtil.hh45
18 files changed, 2096 insertions, 0 deletions
diff --git a/include/qpdf/Buffer.hh b/include/qpdf/Buffer.hh
new file mode 100644
index 00000000..703dee3e
--- /dev/null
+++ b/include/qpdf/Buffer.hh
@@ -0,0 +1,32 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __BUFFER_HH__
+#define __BUFFER_HH__
+
+class Buffer
+{
+ public:
+ Buffer();
+ Buffer(unsigned long size);
+ Buffer(Buffer const&);
+ Buffer& operator=(Buffer const&);
+ ~Buffer();
+ unsigned long getSize() const;
+ unsigned char const* getBuffer() const;
+ unsigned char* getBuffer();
+
+ private:
+ void init(unsigned long size);
+ void copy(Buffer const&);
+ void destroy();
+
+ unsigned long size;
+ unsigned char* buf;
+};
+
+#endif // __BUFFER_HH__
diff --git a/include/qpdf/Pipeline.hh b/include/qpdf/Pipeline.hh
new file mode 100644
index 00000000..acbc2d98
--- /dev/null
+++ b/include/qpdf/Pipeline.hh
@@ -0,0 +1,73 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+// Generalized Pipeline interface. By convention, subclasses of
+// Pipeline are called Pl_Something.
+//
+// When an instance of Pipeline is created with a pointer to a next
+// pipeline, that pipeline writes its data to the next one when it
+// finishes with it. In order to make possible a usage style in which
+// a pipeline may be passed to a function which may stick other
+// pipelines in front of it, the allocator of a pipeline is
+// responsible for its destruction. In other words, one pipeline
+// object does not attempt to manage the memory of its successor.
+//
+// The client is required to call finish() before destroying a
+// Pipeline in order to avoid loss of data. A Pipeline class should
+// not throw an exception in the destructor if this hasn't been done
+// though since doing so causes too mcuh trouble when deleting
+// pipelines during error conditions.
+//
+// Some pipelines are resuable (i.e., you can call write() after
+// calling finish() and can call finish() multiple times) while others
+// are not. It is up to the caller to use a pipeline according to its
+// own restrictions.
+
+#ifndef __PIPELINE_HH__
+#define __PIPELINE_HH__
+
+#include <qpdf/QEXC.hh>
+
+class Pipeline
+{
+ public:
+ class Exception: public QEXC::General
+ {
+ public:
+ Exception(std::string const& message) :
+ QEXC::General(message)
+ {
+ }
+
+ virtual ~Exception() throw()
+ {
+ }
+ };
+
+ Pipeline(char const* identifier, Pipeline* next);
+
+ virtual ~Pipeline();
+
+ // Subclasses should implement write and finish to do their jobs
+ // and then, if they are not end-of-line pipelines, call
+ // getNext()->write or getNext()->finish.
+ virtual void write(unsigned char* data, int len) = 0;
+ virtual void finish() = 0;
+
+ protected:
+ Pipeline* getNext(bool allow_null = false);
+ std::string identifier;
+
+ private:
+ // Do not implement copy or assign
+ Pipeline(Pipeline const&);
+ Pipeline& operator=(Pipeline const&);
+
+ Pipeline* next;
+};
+
+#endif // __PIPELINE_HH__
diff --git a/include/qpdf/Pl_Buffer.hh b/include/qpdf/Pl_Buffer.hh
new file mode 100644
index 00000000..e78b5a17
--- /dev/null
+++ b/include/qpdf/Pl_Buffer.hh
@@ -0,0 +1,46 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __PL_BUFFER_HH__
+#define __PL_BUFFER_HH__
+
+// This pipeline accumulates the data passed to it into a memory
+// buffer. Each subsequent use of this buffer appends to the data
+// accumulated so far. getBuffer() may be called only after calling
+// finish() and before calling any subsequent write(). At that point,
+// a dynamically allocated Buffer object is returned and the internal
+// buffer is reset. The caller is responseible for deleting the
+// returned Buffer.
+//
+// For this pipeline, "next" may be null. If a next pointer is
+// provided, this pipeline will also pass the data through to it.
+
+#include <qpdf/Pipeline.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+#include <list>
+
+class Pl_Buffer: public Pipeline
+{
+ public:
+ Pl_Buffer(char const* identifier, Pipeline* next = 0);
+ virtual ~Pl_Buffer();
+ virtual void write(unsigned char*, int);
+ virtual void finish();
+
+ // Each call to getBuffer() resets this object -- see notes above.
+ // The caller is responsible for deleting the returned Buffer
+ // object.
+ Buffer* getBuffer();
+
+ private:
+ bool ready;
+ std::list<PointerHolder<Buffer> > data;
+ size_t total_size;
+};
+
+#endif // __PL_BUFFER_HH__
diff --git a/include/qpdf/Pl_Count.hh b/include/qpdf/Pl_Count.hh
new file mode 100644
index 00000000..287b8297
--- /dev/null
+++ b/include/qpdf/Pl_Count.hh
@@ -0,0 +1,34 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __PL_COUNT_HH__
+#define __PL_COUNT_HH__
+
+// This pipeline is reusable; i.e., it is safe to call write() after
+// calling finish().
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_Count: public Pipeline
+{
+ public:
+ Pl_Count(char const* identifier, Pipeline* next);
+ virtual ~Pl_Count();
+ virtual void write(unsigned char*, int);
+ virtual void finish();
+ // Returns the number of bytes written
+ int getCount() const;
+ // Returns the last character written, or '\0' if no characters
+ // have been written (in which case getCount() returns 0)
+ unsigned char getLastChar() const;
+
+ private:
+ int count;
+ unsigned char last_char;
+};
+
+#endif // __PL_COUNT_HH__
diff --git a/include/qpdf/Pl_Discard.hh b/include/qpdf/Pl_Discard.hh
new file mode 100644
index 00000000..cd0865a8
--- /dev/null
+++ b/include/qpdf/Pl_Discard.hh
@@ -0,0 +1,28 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __PL_DISCARD_HH__
+#define __PL_DISCARD_HH__
+
+// This pipeline discards its output. It is an end-of-line pipeline
+// (with no next).
+
+// This pipeline is reusable; i.e., it is safe to call write() after
+// calling finish().
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_Discard: public Pipeline
+{
+ public:
+ Pl_Discard();
+ virtual ~Pl_Discard();
+ virtual void write(unsigned char*, int);
+ virtual void finish();
+};
+
+#endif // __PL_DISCARD_HH__
diff --git a/include/qpdf/Pl_Flate.hh b/include/qpdf/Pl_Flate.hh
new file mode 100644
index 00000000..16058d37
--- /dev/null
+++ b/include/qpdf/Pl_Flate.hh
@@ -0,0 +1,53 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __PL_FLATE_HH__
+#define __PL_FLATE_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <zlib.h>
+
+class Pl_Flate: public Pipeline
+{
+ public:
+ class Exception: public Pipeline::Exception
+ {
+ public:
+ Exception(std::string const& message) :
+ Pipeline::Exception(message)
+ {
+ }
+
+ virtual ~Exception() throw ()
+ {
+ }
+ };
+
+ static int const def_bufsize = 65536;
+
+ enum action_e { a_inflate, a_deflate };
+
+ Pl_Flate(char const* identifier, Pipeline* next,
+ action_e action, int out_bufsize = def_bufsize);
+ virtual ~Pl_Flate();
+
+ virtual void write(unsigned char* data, int len);
+ virtual void finish();
+
+ private:
+ void handleData(unsigned char* data, int len, int flush);
+ void checkError(char const* prefix, int error_code);
+
+ unsigned char* outbuf;
+ int out_bufsize;
+ action_e action;
+ bool initialized;
+ z_stream zstream;
+};
+
+#endif // __PL_FLATE_HH__
diff --git a/include/qpdf/Pl_StdioFile.hh b/include/qpdf/Pl_StdioFile.hh
new file mode 100644
index 00000000..d74ded3a
--- /dev/null
+++ b/include/qpdf/Pl_StdioFile.hh
@@ -0,0 +1,49 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+// End-of-line pipeline that simply writes its data to a stdio FILE* object.
+
+#ifndef __PL_STDIOFILE_HH__
+#define __PL_STDIOFILE_HH__
+
+#include <qpdf/Pipeline.hh>
+
+#include <stdio.h>
+
+//
+// This pipeline is reusable.
+//
+
+class Pl_StdioFile: public Pipeline
+{
+ public:
+ class Exception: public Pipeline::Exception
+ {
+ public:
+ Exception(std::string const& message) :
+ Pipeline::Exception(message)
+ {
+ }
+
+ virtual ~Exception() throw ()
+ {
+ }
+ };
+
+ // f is externally maintained; this class just writes to and
+ // flushes it. It does not close it.
+ Pl_StdioFile(char const* identifier, FILE* f);
+ virtual ~Pl_StdioFile();
+
+ virtual void write(unsigned char* buf, int len);
+ virtual void finish();
+
+ private:
+ FILE* file;
+};
+
+#endif // __PL_STDIOFILE_HH__
diff --git a/include/qpdf/PointerHolder.hh b/include/qpdf/PointerHolder.hh
new file mode 100644
index 00000000..b4e9bb64
--- /dev/null
+++ b/include/qpdf/PointerHolder.hh
@@ -0,0 +1,170 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __POINTERHOLDER_HH__
+#define __POINTERHOLDER_HH__
+
+#include <iostream>
+
+// This class is basically boost::shared_pointer but predates that by
+// several years.
+
+// This class expects to be initialized with a dynamically allocated
+// object pointer. It keeps a reference count and deletes this once
+// the reference count goes to zero. PointerHolder objects are
+// explicitly safe for use in STL containers.
+
+// It is very important that a client who pulls the pointer out of
+// this holder does not let the holder go out of scope until it is
+// finished with the pointer. It is also important that exactly one
+// instance of this object ever gets initialized with a given pointer.
+// Otherwise, the pointer will be deleted twice, and before that, some
+// objects will be left with a pointer to a deleted object. In other
+// words, the only legitimate way for two PointerHolder objects to
+// contain the same pointer is for one to be a copy of the other.
+// Copy and assignment semantics are well-defined and essentially
+// allow you to use PointerHolder as a means to get pass-by-reference
+// semantics in a pass-by-value environment without having to worry
+// about memory management details.
+
+// Comparison (== and <) are defined and operate on the internally
+// stored pointers, not on the data. This makes it possible to store
+// PointerHolder objects in sorted lists or to find them in STL
+// containers just as one would be able to store pointers. Comparing
+// the underlying pointers provides a well-defined, if not
+// particularly meaningful, ordering.
+
+template <class T>
+class PointerHolder
+{
+ private:
+ class Data
+ {
+ public:
+ Data(T* pointer, bool tracing) :
+ pointer(pointer),
+ tracing(tracing),
+ refcount(0)
+ {
+ static int next_id = 0;
+ this->unique_id = ++next_id;
+ }
+ ~Data()
+ {
+ if (this->tracing)
+ {
+ std::cerr << "PointerHolder deleting pointer "
+ << (void*)pointer
+ << std::endl;
+ }
+ delete this->pointer;
+ if (this->tracing)
+ {
+ std::cerr << "PointerHolder done deleting pointer "
+ << (void*)pointer
+ << std::endl;
+ }
+ }
+ T* pointer;
+ bool tracing;
+ int refcount;
+ int unique_id;
+ private:
+ Data(Data const&);
+ Data& operator=(Data const&);
+ };
+
+ public:
+ PointerHolder(T* pointer = 0, bool tracing = false)
+ {
+ this->init(new Data(pointer, tracing));
+ }
+ PointerHolder(PointerHolder const& rhs)
+ {
+ this->copy(rhs);
+ }
+ PointerHolder& operator=(PointerHolder const& rhs)
+ {
+ if (this != &rhs)
+ {
+ this->destroy();
+ this->copy(rhs);
+ }
+ return *this;
+ }
+ ~PointerHolder()
+ {
+ this->destroy();
+ }
+ bool operator==(PointerHolder const& rhs) const
+ {
+ return this->data->pointer == rhs.data->pointer;
+ }
+ bool operator<(PointerHolder const& rhs) const
+ {
+ return this->data->pointer < rhs.data->pointer;
+ }
+
+ // NOTE: The pointer returned by getPointer turns into a pumpkin
+ // when the last PointerHolder that contains it disappears.
+ T* getPointer()
+ {
+ return this->data->pointer;
+ }
+ T const* getPointer() const
+ {
+ return this->data->pointer;
+ }
+ int getRefcount() const
+ {
+ return this->data->refcount;
+ }
+
+ private:
+ void init(Data* data)
+ {
+ this->data = data;
+ {
+ ++this->data->refcount;
+ if (this->data->tracing)
+ {
+ std::cerr << "PointerHolder " << this->data->unique_id
+ << " refcount increased to " << this->data->refcount
+ << std::endl;
+ }
+ }
+ }
+ void copy(PointerHolder const& rhs)
+ {
+ this->init(rhs.data);
+ }
+ void destroy()
+ {
+ bool gone = false;
+ {
+ if (--this->data->refcount == 0)
+ {
+ gone = true;
+ }
+ if (this->data->tracing)
+ {
+ std::cerr << "PointerHolder " << this->data->unique_id
+ << " refcount decreased to "
+ << this->data->refcount
+ << std::endl;
+ }
+ }
+ if (gone)
+ {
+ delete this->data;
+ }
+ }
+
+ Data* data;
+};
+
+#endif // __POINTERHOLDER_HH__
diff --git a/include/qpdf/QEXC.hh b/include/qpdf/QEXC.hh
new file mode 100644
index 00000000..b94edf7a
--- /dev/null
+++ b/include/qpdf/QEXC.hh
@@ -0,0 +1,119 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QEXC_HH__
+#define __QEXC_HH__
+
+#include <string>
+#include <exception>
+#include <errno.h>
+
+namespace QEXC
+{
+ // This namespace contains all exception classes used by the
+ // library.
+
+ // The class hierarchy is as follows:
+
+ // std::exception
+ // |
+ // +-> QEXC::Base
+ // |
+ // +-> QEXC::General
+ // |
+ // +-> QEXC::Internal
+
+ // QEXC::General is the base class of all standard user-defined
+ // exceptions and "expected" error conditions raised by QClass.
+ // Applications or libraries using QClass are encouraged to derive
+ // their own exceptions from these classes if they wish. It is
+ // entirely reasonable for code to catch QEXC::General or specific
+ // subclasses of it as part of normal error handling.
+
+ // QEXC::Internal is reserved for internal errors. These should
+ // be used only for situations that indicate a likely bug in the
+ // software itself. This may include improper use of a library
+ // function. Operator errors should not be able to cause Internal
+ // errors. (There may be some exceptions to this such as users
+ // invoking programs that were intended only to be invoked by
+ // other programs.) QEXC::Internal should generally not be
+ // trapped except in terminate handlers or top-level exception
+ // handlers which will want to translate them into error messages
+ // and cause the program to exit. Such top-level handlers may
+ // want to catch std::exception instead.
+
+ // All subclasses of QEXC::Base implement a const unparse() method
+ // which returns a std::string const&. They also override
+ // std::exception::what() to return a char* with the same value.
+ // unparse() should be implemented in such a way that a program
+ // catching QEXC::Base or std::exception can use the text returned
+ // by unparse() (or what()) without any exception-specific
+ // adornment. (The program may prefix the program name or other
+ // general information.) Note that std::exception::what() is a
+ // const method that returns a const char*. For this reason, it
+ // is essential that unparse() return a const reference to a
+ // string so that what() can be implemented by calling unparse().
+ // This means that the string that unparse() returns a reference
+ // to must not be allocated on the stack in the call to unparse().
+ // The recommended way to do this is for derived exception classes
+ // to store their string descriptions by calling the protected
+ // setMessage() method and then to not override unparse().
+
+ class Base: public std::exception
+ {
+ // This is the common base class for all exceptions in qclass.
+ // Application/library code should not generally catch this
+ // directly. See above for caveats.
+ public:
+ Base();
+ Base(std::string const& message);
+ virtual ~Base() throw() {}
+ virtual std::string const& unparse() const;
+ virtual const char* what() const throw();
+
+ protected:
+ void setMessage(std::string const& message);
+
+ private:
+ std::string message;
+ };
+
+ class General: public Base
+ {
+ // This is the base class for normal user/library-defined
+ // error conditions.
+ public:
+ General();
+ General(std::string const& message);
+ virtual ~General() throw() {};
+ };
+
+ // Note that Internal is not derived from General. Internal
+ // errors are too severe. We don't want internal errors
+ // accidentally trapped as part of QEXC::General. If you are
+ // going to deal with internal errors, you have to do so
+ // explicitly.
+ class Internal: public Base
+ {
+ public:
+ Internal(std::string const& message);
+ virtual ~Internal() throw() {};
+ };
+
+ class System: public General
+ {
+ public:
+ System(std::string const& prefix, int sys_errno);
+ virtual ~System() throw() {};
+ int getErrno() const;
+
+ private:
+ int sys_errno;
+ };
+};
+
+#endif // __QEXC_HH__
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
new file mode 100644
index 00000000..d311b3c8
--- /dev/null
+++ b/include/qpdf/QPDF.hh
@@ -0,0 +1,750 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDF_HH__
+#define __QPDF_HH__
+
+#include <stdio.h>
+#include <string>
+#include <map>
+#include <list>
+
+#include <qpdf/QPDFXRefEntry.hh>
+#include <qpdf/QPDFObjectHandle.hh>
+#include <qpdf/QPDFTokenizer.hh>
+#include <qpdf/Buffer.hh>
+
+class QPDF_Stream;
+class BitStream;
+class BitWriter;
+class QPDFExc;
+
+class QPDF
+{
+ public:
+ QPDF();
+ ~QPDF();
+
+ // Associate a file with a QPDF object and do initial parsing of
+ // the file. PDF objects are not read until they are needed. A
+ // QPDF object may be associated with only on file in its
+ // lifetime. This method must be called before any methods that
+ // potentially ask for information about the PDF file are called.
+ // Prior to calling this, the only methods that are allowed are
+ // those that set parameters.
+ void processFile(char const* filename, char const* password = "");
+
+ // Parameter settings
+
+ // If true, ignore any cross-reference streams in a hybrid file
+ // (one that contains both cross-reference streams and
+ // cross-reference tables). This can be useful for testing to
+ // ensure that a hybrid file would work with an older reader.
+ void setIgnoreXRefStreams(bool);
+
+ // By default, any warnings are issued to stderr as they are
+ // encountered. If this is called with a true value, reporitng of
+ // warnings is suppressed. You may still retrieve warnings by
+ // calling getWarnings.
+ void setSuppressWarnings(bool);
+
+ // By default, QPDF will try to recover if it finds certain types
+ // of errors in PDF files. If turned off, it will throw an
+ // exception on the first such problem it finds without attempting
+ // recovery.
+ void setAttemptRecovery(bool);
+
+ // Other public methods
+
+ // Return the list of warnings that have been issued so far and
+ // clear the list. This method may be called even if processFile
+ // throws an exception. Note that if setSuppressWarnings was not
+ // called or was called with a false value, any warnings retrieved
+ // here will have already been issued to stderr.
+ std::vector<std::string> getWarnings();
+
+ std::string getFilename() const;
+ std::string getPDFVersion() const;
+ QPDFObjectHandle getTrailer();
+ QPDFObjectHandle getRoot();
+
+ // Install this object handle as an indirect object and return an
+ // indirect reference to it.
+ QPDFObjectHandle makeIndirectObject(QPDFObjectHandle);
+
+ // Retrieve an object by object ID and generation. Returns an
+ // indirect reference to it.
+ QPDFObjectHandle getObjectByID(int objid, int generation);
+
+ // Encryption support
+
+ struct EncryptionData
+ {
+ // This class holds data read from the encryption dictionary.
+ EncryptionData(int V, int R, int Length_bytes, long P,
+ std::string const& O, std::string const& U,
+ std::string const& id1) :
+ V(V),
+ R(R),
+ Length_bytes(Length_bytes),
+ P(P),
+ O(O),
+ U(U),
+ id1(id1)
+ {
+ }
+
+ int V;
+ int R;
+ int Length_bytes;
+ long P;
+ std::string O;
+ std::string U;
+ std::string id1;
+ };
+
+ static void trim_user_password(std::string& user_password);
+ static std::string compute_data_key(
+ std::string const& encryption_key, int objid, int generation);
+ static std::string compute_encryption_key(
+ std::string const& password, EncryptionData const& data);
+
+ static void compute_encryption_O_U(
+ char const* user_password, char const* owner_password,
+ int V, int R, int key_len, unsigned long P,
+ std::string const& id1,
+ std::string& O, std::string& U);
+ std::string const& getUserPassword() const;
+
+ // Linearization support
+
+ // Returns true iff the file starts with a linearization parameter
+ // dictionary. Does no additional validation.
+ bool isLinearized();
+
+ // Performs various sanity checks on a linearized file. Return
+ // true if no errors or warnings. Otherwise, return false and
+ // output errors and warnings to stdout.
+ bool checkLinearization();
+
+ // Calls checkLinearization() and, if possible, prints normalized
+ // contents of some of the hints tables to stdout. Normalization
+ // includes adding min values to delta values and adjusting
+ // offsets based on the location and size of the primary hint
+ // stream.
+ void showLinearizationData();
+
+ // Shows the contents of the cross-reference table
+ void showXRefTable();
+
+ // Optimization support -- see doc/optimization. Implemented in
+ // QPDF_optimization.cc
+
+ // The object_stream_data map maps from a "compressed" object to
+ // the object stream that contains it. This enables optimize to
+ // populate the object <-> user maps with only uncompressed
+ // objects. If allow_changes is false, an exception will be
+ // thrown if any changes are made during the optimization process.
+ // This is available so that the test suite can make sure that a
+ // linearized file is already optimized. When called in this way,
+ // optimize() still populates the object <-> user maps
+ void optimize(std::map<int, int> const& object_stream_data,
+ bool allow_changes = true);
+
+ // Replace all references to indirect objects that are "scalars"
+ // (i.e., things that don't have children: not arrays, streams, or
+ // dictionaries) with direct objects.
+ void flattenScalarReferences();
+
+ // For QPDFWriter:
+
+ // Remove /ID, /Encrypt, and /Prev keys from the trailer
+ // dictionary since these are regenerated during write.
+ void trimTrailerForWrite();
+
+ // Get lists of all objects in order according to the part of a
+ // linearized file that they belong to.
+ void getLinearizedParts(
+ std::map<int, int> const& object_stream_data,
+ std::vector<QPDFObjectHandle>& part4,
+ std::vector<QPDFObjectHandle>& part6,
+ std::vector<QPDFObjectHandle>& part7,
+ std::vector<QPDFObjectHandle>& part8,
+ std::vector<QPDFObjectHandle>& part9);
+
+ void generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber,
+ PointerHolder<Buffer>& hint_stream,
+ int& S, int& O);
+
+ // Map object to object stream that contains it
+ void getObjectStreamData(std::map<int, int>&);
+ // Get a list of objects that would be permitted in an object
+ // stream
+ std::vector<int> getCompressibleObjects();
+
+ // Convenience routines for common functions. See also
+ // QPDFObjectHandle.hh for additional convenience routines.
+
+ // Traverse page tree return all /Page objects.
+ std::vector<QPDFObjectHandle> const& getAllPages();
+
+ // Resolver class is restricted to QPDFObjectHandle so that only
+ // it can resolve indirect references.
+ class Resolver
+ {
+ friend class QPDFObjectHandle;
+ private:
+ static PointerHolder<QPDFObject> resolve(
+ QPDF* qpdf, int objid, int generation)
+ {
+ return qpdf->resolve(objid, generation);
+ }
+ };
+ friend class Resolver;
+
+ // Pipe class is restricted to QPDF_Stream
+ class Pipe
+ {
+ friend class QPDF_Stream;
+ private:
+ static void pipeStreamData(QPDF* qpdf, int objid, int generation,
+ off_t offset, size_t length,
+ QPDFObjectHandle dict,
+ Pipeline* pipeline)
+ {
+ qpdf->pipeStreamData(
+ objid, generation, offset, length, dict, pipeline);
+ }
+ };
+ friend class Pipe;
+
+ private:
+ class InputSource
+ {
+ public:
+ InputSource() :
+ last_offset(0)
+ {
+ }
+ virtual ~InputSource()
+ {
+ }
+
+ void setLastOffset(off_t);
+ off_t getLastOffset() const;
+ std::string readLine();
+
+ virtual std::string const& getName() const = 0;
+ virtual off_t tell() = 0;
+ virtual void seek(off_t offset, int whence) = 0;
+ virtual void rewind() = 0;
+ virtual size_t read(char* buffer, int length) = 0;
+ virtual void unreadCh(char ch) = 0;
+
+ protected:
+ off_t last_offset;
+ };
+
+ class FileInputSource: public InputSource
+ {
+ public:
+ FileInputSource();
+ void setFilename(char const* filename);
+ virtual ~FileInputSource();
+ virtual std::string const& getName() const;
+ virtual off_t tell();
+ virtual void seek(off_t offset, int whence);
+ virtual void rewind();
+ virtual size_t read(char* buffer, int length);
+ virtual void unreadCh(char ch);
+
+ private:
+ FileInputSource(FileInputSource const&);
+ FileInputSource& operator=(FileInputSource const&);
+
+ void destroy();
+
+ std::string filename;
+ FILE* file;
+ };
+
+ class BufferInputSource: public InputSource
+ {
+ public:
+ BufferInputSource(std::string const& description, Buffer* buf);
+ virtual ~BufferInputSource();
+ virtual std::string const& getName() const;
+ virtual off_t tell();
+ virtual void seek(off_t offset, int whence);
+ virtual void rewind();
+ virtual size_t read(char* buffer, int length);
+ virtual void unreadCh(char ch);
+
+ private:
+ std::string description;
+ Buffer* buf;
+ off_t cur_offset;
+ };
+
+ class ObjGen
+ {
+ public:
+ ObjGen();
+ ObjGen(int obj, int gen);
+ bool operator<(ObjGen const&) const;
+
+ int obj;
+ int gen;
+ };
+
+ class ObjCache
+ {
+ public:
+ ObjCache() :
+ end_before_space(0),
+ end_after_space(0)
+ {
+ }
+ ObjCache(PointerHolder<QPDFObject> object,
+ off_t end_before_space,
+ off_t end_after_space) :
+ object(object),
+ end_before_space(end_before_space),
+ end_after_space(end_after_space)
+ {
+ }
+
+ PointerHolder<QPDFObject> object;
+ off_t end_before_space;
+ off_t end_after_space;
+ };
+
+ void parse();
+ void warn(QPDFExc const& e);
+ void setTrailer(QPDFObjectHandle obj);
+ void read_xref(off_t offset);
+ void reconstruct_xref(QPDFExc& e);
+ int read_xrefTable(off_t offset);
+ int read_xrefStream(off_t offset);
+ int processXRefStream(off_t offset, QPDFObjectHandle& xref_stream);
+ void insertXrefEntry(int obj, int f0, int f1, int f2);
+ QPDFObjectHandle readObject(
+ InputSource*, int objid, int generation,
+ bool in_object_stream);
+ QPDFObjectHandle readObjectInternal(
+ InputSource* input, int objid, int generation,
+ bool in_object_stream,
+ bool in_array, bool in_dictionary);
+ int recoverStreamLength(
+ InputSource* input, int objid, int generation, off_t stream_offset);
+ QPDFTokenizer::Token readToken(InputSource*);
+
+ QPDFObjectHandle readObjectAtOffset(
+ off_t offset,
+ int exp_objid, int exp_generation,
+ int& act_objid, int& act_generation);
+ PointerHolder<QPDFObject> resolve(int objid, int generation);
+ void resolveObjectsInStream(int obj_stream_number);
+
+ // Calls finish() on the pipeline when done but does not delete it
+ void pipeStreamData(int objid, int generation,
+ off_t offset, size_t length,
+ QPDFObjectHandle dict,
+ Pipeline* pipeline);
+ void getAllPagesInternal(QPDFObjectHandle cur_pages,
+ std::vector<QPDFObjectHandle>& result);
+
+ // methods to support encryption -- implemented in QPDF_encryption.cc
+ void initializeEncryption();
+ std::string getKeyForObject(int objid, int generation);
+ void decryptString(std::string&, int objid, int generation);
+ void decryptStream(Pipeline*& pipeline, int objid, int generation,
+ std::vector<PointerHolder<Pipeline> >& heap);
+
+ // Linearization Hint table structures.
+ // Naming conventions:
+
+ // HSomething is the Something Hint Table or table header
+ // HSomethingEntry is an entry in the Something table
+
+ // delta_something + min_something = something
+ // nbits_something = number of bits required for something
+
+ // something_offset is the pre-adjusted offset in the file. If >=
+ // H0_offset, H0_length must be added to get an actual file
+ // offset.
+
+ // PDF 1.4: Table F.4
+ struct HPageOffsetEntry
+ {
+ HPageOffsetEntry() :
+ delta_nobjects(0),
+ delta_page_length(0),
+ nshared_objects(0),
+ delta_content_offset(0),
+ delta_content_length(0)
+ {
+ }
+
+ int delta_nobjects; // 1
+ int delta_page_length; // 2
+ int nshared_objects; // 3
+ // vectors' sizes = nshared_objects
+ std::vector<int> shared_identifiers; // 4
+ std::vector<int> shared_numerators; // 5
+ int delta_content_offset; // 6
+ int delta_content_length; // 7
+ };
+
+ // PDF 1.4: Table F.3
+ struct HPageOffset
+ {
+ HPageOffset() :
+ min_nobjects(0),
+ first_page_offset(0),
+ nbits_delta_nobjects(0),
+ min_page_length(0),
+ nbits_delta_page_length(0),
+ min_content_offset(0),
+ nbits_delta_content_offset(0),
+ min_content_length(0),
+ nbits_delta_content_length(0),
+ nbits_nshared_objects(0),
+ nbits_shared_identifier(0),
+ nbits_shared_numerator(0),
+ shared_denominator(0)
+ {
+ }
+
+ int min_nobjects; // 1
+ int first_page_offset; // 2
+ int nbits_delta_nobjects; // 3
+ int min_page_length; // 4
+ int nbits_delta_page_length; // 5
+ int min_content_offset; // 6
+ int nbits_delta_content_offset; // 7
+ int min_content_length; // 8
+ int nbits_delta_content_length; // 9
+ int nbits_nshared_objects; // 10
+ int nbits_shared_identifier; // 11
+ int nbits_shared_numerator; // 12
+ int shared_denominator; // 13
+ // vector size is npages
+ std::vector<HPageOffsetEntry> entries;
+ };
+
+ // PDF 1.4: Table F.6
+ struct HSharedObjectEntry
+ {
+ HSharedObjectEntry() :
+ delta_group_length(0),
+ signature_present(0),
+ nobjects_minus_one(0)
+ {
+ }
+
+ // Item 3 is a 128-bit signature (unsupported by Acrobat)
+ int delta_group_length; // 1
+ int signature_present; // 2 -- always 0
+ int nobjects_minus_one; // 4 -- always 0
+ };
+
+ // PDF 1.4: Table F.5
+ struct HSharedObject
+ {
+ HSharedObject() :
+ first_shared_obj(0),
+ first_shared_offset(0),
+ nshared_first_page(0),
+ nshared_total(0),
+ nbits_nobjects(0),
+ min_group_length(0),
+ nbits_delta_group_length(0)
+ {
+ }
+
+ int first_shared_obj; // 1
+ int first_shared_offset; // 2
+ int nshared_first_page; // 3
+ int nshared_total; // 4
+ int nbits_nobjects; // 5
+ int min_group_length; // 6
+ int nbits_delta_group_length; // 7
+ // vector size is nshared_total
+ std::vector<HSharedObjectEntry> entries;
+ };
+
+ // PDF 1.4: Table F.9
+ struct HGeneric
+ {
+ HGeneric() :
+ first_object(0),
+ first_object_offset(0),
+ nobjects(0),
+ group_length(0)
+ {
+ }
+
+ int first_object; // 1
+ int first_object_offset; // 2
+ int nobjects; // 3
+ int group_length; // 4
+ };
+
+ // Other linearization data structures
+
+ // Initialized from Linearization Parameter dictionary
+ struct LinParameters
+ {
+ LinParameters() :
+ file_size(0),
+ first_page_object(0),
+ first_page_end(0),
+ npages(0),
+ xref_zero_offset(0),
+ first_page(0),
+ H_offset(0),
+ H_length(0)
+ {
+ }
+
+ int file_size; // /L
+ int first_page_object; // /O
+ int first_page_end; // /E
+ int npages; // /N
+ int xref_zero_offset; // /T
+ int first_page; // /P
+ int H_offset; // offset of primary hint stream
+ int H_length; // length of primary hint stream
+ };
+
+ // Computed hint table value data structures. These tables
+ // contain the computed values on which the hint table values are
+ // based. They exclude things like number of bits and store
+ // actual values instead of mins and deltas. File offsets are
+ // also absolute rather than being offset by the size of the
+ // primary hint table. We populate the hint table structures from
+ // these during writing and compare the hint table values with
+ // these during validation. We ignore some values for various
+ // reasons described in the code. Those values are omitted from
+ // these structures. Note also that object numbers are object
+ // numbers from the input file, not the output file.
+
+ // Naming convention: CHSomething is analogous to HSomething
+ // above. "CH" is computed hint.
+
+ struct CHPageOffsetEntry
+ {
+ CHPageOffsetEntry() :
+ nobjects(0),
+ nshared_objects(0)
+ {
+ }
+
+ int nobjects;
+ int nshared_objects;
+ // vectors' sizes = nshared_objects
+ std::vector<int> shared_identifiers;
+ };
+
+ struct CHPageOffset
+ {
+ // vector size is npages
+ std::vector<CHPageOffsetEntry> entries;
+ };
+
+ struct CHSharedObjectEntry
+ {
+ CHSharedObjectEntry(int object) :
+ object(object)
+ {
+ }
+
+ int object;
+ };
+
+ // PDF 1.4: Table F.5
+ struct CHSharedObject
+ {
+ CHSharedObject() :
+ first_shared_obj(0),
+ nshared_first_page(0),
+ nshared_total(0)
+ {
+ }
+
+ int first_shared_obj;
+ int nshared_first_page;
+ int nshared_total;
+ // vector size is nshared_total
+ std::vector<CHSharedObjectEntry> entries;
+ };
+
+ // No need for CHGeneric -- HGeneric is fine as is.
+
+
+ // Data structures to support optimization -- implemented in
+ // QPDF_optimization.cc
+
+ class ObjUser
+ {
+ public:
+ enum user_e
+ {
+ ou_bad,
+ ou_page,
+ ou_thumb,
+ ou_trailer_key,
+ ou_root_key,
+ ou_root
+ };
+
+ // type is set to ou_bad
+ ObjUser();
+
+ // type must be ou_root
+ ObjUser(user_e type);
+
+ // type must be one of ou_page or ou_thumb
+ ObjUser(user_e type, int pageno);
+
+ // type must be one of ou_trailer_key or ou_root_key
+ ObjUser(user_e type, std::string const& key);
+
+ bool operator<(ObjUser const&) const;
+
+ user_e ou_type;
+ int pageno; // if ou_page;
+ std::string key; // if ou_trailer_key or ou_root_key
+ };
+
+ // methods to support linearization checking -- implemented in
+ // QPDF_linearization.cc
+ void readLinearizationData();
+ bool checkLinearizationInternal();
+ void dumpLinearizationDataInternal();
+ QPDFObjectHandle readHintStream(Pipeline&, off_t offset, size_t length);
+ void readHPageOffset(BitStream);
+ void readHSharedObject(BitStream);
+ void readHGeneric(BitStream, HGeneric&);
+ int maxEnd(ObjUser const& ou);
+ int getLinearizationOffset(ObjGen const&);
+ QPDFObjectHandle getUncompressedObject(
+ QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
+ int lengthNextN(int first_object, int n,
+ std::list<std::string>& errors);
+ void checkHPageOffset(std::list<std::string>& errors,
+ std::list<std::string>& warnings,
+ std::vector<QPDFObjectHandle> const& pages,
+ std::map<int, int>& idx_to_obj);
+ void checkHSharedObject(std::list<std::string>& warnings,
+ std::list<std::string>& errors,
+ std::vector<QPDFObjectHandle> const& pages,
+ std::map<int, int>& idx_to_obj);
+ void checkHOutlines(std::list<std::string>& warnings);
+ void dumpHPageOffset();
+ void dumpHSharedObject();
+ void dumpHGeneric(HGeneric&);
+ int adjusted_offset(int offset);
+ QPDFObjectHandle objGenToIndirect(ObjGen const&);
+ void calculateLinearizationData(
+ std::map<int, int> const& object_stream_data);
+ void pushOutlinesToPart(
+ std::vector<QPDFObjectHandle>& part,
+ std::set<ObjGen>& lc_outlines,
+ std::map<int, int> const& object_stream_data);
+ int outputLengthNextN(
+ int in_object, int n,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber);
+ void calculateHPageOffset(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber);
+ void calculateHSharedObject(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber);
+ void calculateHOutline(
+ std::map<int, QPDFXRefEntry> const& xref,
+ std::map<int, size_t> const& lengths,
+ std::map<int, int> const& obj_renumber);
+ void writeHPageOffset(BitWriter&);
+ void writeHSharedObject(BitWriter&);
+ void writeHGeneric(BitWriter&, HGeneric&);
+
+
+ // Methods to support optimization
+
+ void optimizePagesTree(
+ QPDFObjectHandle,
+ std::map<std::string, std::vector<QPDFObjectHandle> >&,
+ int& pageno, bool allow_changes);
+ void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh);
+ void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
+ std::set<ObjGen>& visited, bool top);
+ void filterCompressedObjects(std::map<int, int> const& object_stream_data);
+
+
+ QPDFTokenizer tokenizer;
+ FileInputSource file;
+ bool encrypted;
+ bool encryption_initialized;
+ bool ignore_xref_streams;
+ bool suppress_warnings;
+ bool attempt_recovery;
+ std::string provided_password;
+ std::string user_password;
+ std::string encryption_key;
+ std::string cached_object_encryption_key;
+ int cached_key_objid;
+ int cached_key_generation;
+ std::string pdf_version;
+ std::map<ObjGen, QPDFXRefEntry> xref_table;
+ std::set<int> deleted_objects;
+ std::map<ObjGen, ObjCache> obj_cache;
+ QPDFObjectHandle trailer;
+ std::vector<QPDFObjectHandle> all_pages;
+ std::vector<std::string> warnings;
+
+ // Linearization data
+ int first_xref_item_offset; // actual value from file
+ bool uncompressed_after_compressed;
+
+ // Linearization parameter dictionary and hint table data: may be
+ // read from file or computed prior to writing a linearized file
+ QPDFObjectHandle lindict;
+ LinParameters linp;
+ HPageOffset page_offset_hints;
+ HSharedObject shared_object_hints;
+ HGeneric outline_hints;
+
+ // Computed linearization data: used to populate above tables
+ // during writing and to compare with them during validation. c_
+ // means computed.
+ LinParameters c_linp;
+ CHPageOffset c_page_offset_data;
+ CHSharedObject c_shared_object_data;
+ HGeneric c_outline_data;
+
+ // Object ordering data for linearized files: initialized by
+ // calculateLinearizationData(). Part numbers refer to the PDF
+ // 1.4 specification.
+ std::vector<QPDFObjectHandle> part4;
+ std::vector<QPDFObjectHandle> part6;
+ std::vector<QPDFObjectHandle> part7;
+ std::vector<QPDFObjectHandle> part8;
+ std::vector<QPDFObjectHandle> part9;
+
+ // Optimization data
+ std::map<ObjUser, std::set<ObjGen> > obj_user_to_objects;
+ std::map<ObjGen, std::set<ObjUser> > object_to_obj_users;
+};
+
+#endif // __QPDF_HH__
diff --git a/include/qpdf/QPDFExc.hh b/include/qpdf/QPDFExc.hh
new file mode 100644
index 00000000..d3efb3b9
--- /dev/null
+++ b/include/qpdf/QPDFExc.hh
@@ -0,0 +1,22 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDFEXC_HH__
+#define __QPDFEXC_HH__
+
+#include <qpdf/QEXC.hh>
+
+class QPDFExc: public QEXC::General
+{
+ public:
+ QPDFExc(std::string const& message);
+ QPDFExc(std::string const& filename, int offset,
+ std::string const& message);
+ virtual ~QPDFExc() throw ();
+};
+
+#endif // __QPDFEXC_HH__
diff --git a/include/qpdf/QPDFObject.hh b/include/qpdf/QPDFObject.hh
new file mode 100644
index 00000000..1597e20e
--- /dev/null
+++ b/include/qpdf/QPDFObject.hh
@@ -0,0 +1,20 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDFOBJECT_HH__
+#define __QPDFOBJECT_HH__
+
+#include <string>
+
+class QPDFObject
+{
+ public:
+ virtual ~QPDFObject() {}
+ virtual std::string unparse() = 0;
+};
+
+#endif // __QPDFOBJECT_HH__
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
new file mode 100644
index 00000000..e38eb116
--- /dev/null
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -0,0 +1,221 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDFOBJECTHANDLE_HH__
+#define __QPDFOBJECTHANDLE_HH__
+
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+
+#include <qpdf/QPDFObject.hh>
+
+class Pipeline;
+class QPDF;
+
+class QPDFObjectHandle
+{
+ public:
+ QPDFObjectHandle();
+ bool isInitialized() const;
+
+ // Exactly one of these will return true for any object.
+ bool isBool();
+ bool isNull();
+ bool isInteger();
+ bool isReal();
+ bool isName();
+ bool isString();
+ bool isArray();
+ bool isDictionary();
+ bool isStream();
+
+ // This returns true in addition to the query for the specific
+ // type for indirect objects.
+ bool isIndirect();
+
+ // True for everything except array, dictionary, and stream
+ bool isScalar();
+
+ // Public factory methods
+
+ static QPDFObjectHandle newNull();
+ static QPDFObjectHandle newBool(bool value);
+ static QPDFObjectHandle newInteger(int value);
+ static QPDFObjectHandle newReal(std::string const& value);
+ static QPDFObjectHandle newName(std::string const& name);
+ static QPDFObjectHandle newString(std::string const& str);
+ static QPDFObjectHandle newArray(
+ std::vector<QPDFObjectHandle> const& items);
+ static QPDFObjectHandle newDictionary(
+ std::map<std::string, QPDFObjectHandle> const& items);
+
+ // Accessor methods. If an accessor method that is valid for only
+ // a particular object type is called on an object of the wrong
+ // type, an exception is thrown.
+
+ // Methods for bool objects
+ bool getBoolValue();
+
+ // Methods for integer objects
+ int getIntValue();
+
+ // Methods for real objects
+ std::string getRealValue();
+
+ // Methods that work for both integer and real objects
+ bool isNumber();
+ double getNumericValue();
+
+ // Methods for name objects
+ std::string getName();
+
+ // Methods for string objects
+ std::string getStringValue();
+ std::string getUTF8Value();
+
+ // Methods for array objects
+ int getArrayNItems();
+ QPDFObjectHandle getArrayItem(int n);
+
+ // Methods for dictionary objects
+ bool hasKey(std::string const&);
+ QPDFObjectHandle getKey(std::string const&);
+ std::set<std::string> getKeys();
+
+ // Mutator methods. Use with caution.
+
+ // Recursively copy this object, making it direct. Throws an
+ // exception if a loop is detected or any sub-object is a stream.
+ void makeDirect();
+
+ // Mutator methods for array objects
+ void setArrayItem(int, QPDFObjectHandle const&);
+
+ // Mutator methods for dictionary objects
+
+ // Replace value of key, adding it if it does not exist
+ void replaceKey(std::string const& key, QPDFObjectHandle const&);
+ // Remove key, doing nothing if key does not exist
+ void removeKey(std::string const& key);
+
+ // Methods for stream objects
+ QPDFObjectHandle getDict();
+
+ // Returns filtered (uncompressed) stream data. Throws an
+ // exception if the stream is filtered and we can't decode it.
+ PointerHolder<Buffer> getStreamData();
+
+ // Write stream data through the given pipeline. A null pipeline
+ // value may be used if all you want to do is determine whether a
+ // stream is filterable. If filter is false, write raw stream
+ // data and return false. If filter is true, then attempt to
+ // apply all the decoding filters to the stream data. If we are
+ // successful, return true. Otherwise, return false and write raw
+ // data. If filtering is requested and successfully performed,
+ // then the normalize and compress flags are used to determine
+ // whether stream data should be normalized and compressed. In
+ // all cases, if this function returns false, raw data has been
+ // written. If it returns true, then any requested filtering has
+ // been performed. Note that if the original stream data has no
+ // filters applied to it, the return value will be equal to the
+ // value of the filter parameter. Callers may use the return
+ // value of this function to determine whether or not the /Filter
+ // and /DecodeParms keys in the stream dictionary should be
+ // replaced if writing a new stream object.
+ bool pipeStreamData(Pipeline*, bool filter,
+ bool normalize, bool compress);
+
+ // return 0 for direct objects
+ int getObjectID() const;
+ int getGeneration() const;
+
+ std::string unparse();
+ std::string unparseResolved();
+
+ // Convenience routines for commonly performed functions
+
+ // Throws an exception if this is not a Page object. Returns an
+ // empty map if there are no images or no resources. This
+ // function does not presently support inherited resources. See
+ // comment in the source for details. Return value is a map from
+ // XObject name to the image object, which is always a stream.
+ std::map<std::string, QPDFObjectHandle> getPageImages();
+
+ // Throws an exception if this is not a Page object. Returns a
+ // vector of stream objects representing the content streams for
+ // the given page. This routine allows the caller to not care
+ // whether there are one or more than one content streams for a
+ // page.
+ std::vector<QPDFObjectHandle> getPageContents();
+
+ // Initializers for objects. This Factory class gives the QPDF
+ // class specific permission to call factory methods without
+ // making it a friend of the whole QPDFObjectHandle class.
+ class Factory
+ {
+ friend class QPDF;
+ private:
+ static QPDFObjectHandle newIndirect(QPDF* qpdf,
+ int objid, int generation)
+ {
+ return QPDFObjectHandle::newIndirect(qpdf, objid, generation);
+ }
+ // object must be dictionary object
+ static QPDFObjectHandle newStream(
+ QPDF* qpdf, int objid, int generation,
+ QPDFObjectHandle stream_dict, off_t offset, int length)
+ {
+ return QPDFObjectHandle::newStream(
+ qpdf, objid, generation, stream_dict, offset, length);
+ }
+ };
+ friend class Factory;
+
+ // Accessor for raw underlying object -- only QPDF is allowed to
+ // call this.
+ class ObjAccessor
+ {
+ friend class QPDF;
+ private:
+ static PointerHolder<QPDFObject> getObject(QPDFObjectHandle& o)
+ {
+ o.dereference();
+ return o.obj;
+ }
+ };
+ friend class ObjAccessor;
+
+ private:
+ QPDFObjectHandle(QPDF*, int objid, int generation);
+ QPDFObjectHandle(QPDFObject*);
+
+ // Private object factory methods
+ static QPDFObjectHandle newIndirect(QPDF*, int objid, int generation);
+ static QPDFObjectHandle newStream(
+ QPDF* qpdf, int objid, int generation,
+ QPDFObjectHandle stream_dict, off_t offset, int length);
+
+ void assertInitialized() const;
+ void assertType(char const* type_name, bool istype);
+ void assertPageObject();
+ void dereference();
+ void makeDirectInternal(std::set<int>& visited);
+
+ bool initialized;
+
+ QPDF* qpdf; // 0 for direct object
+ int objid; // 0 for direct object
+ int generation;
+ PointerHolder<QPDFObject> obj;
+};
+
+#endif // __QPDFOBJECTHANDLE_HH__
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
new file mode 100644
index 00000000..e921bfc5
--- /dev/null
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -0,0 +1,141 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDFTOKENIZER_HH__
+#define __QPDFTOKENIZER_HH__
+
+#include <string>
+#include <stdio.h>
+
+class QPDFTokenizer
+{
+ public:
+ enum token_type_e
+ {
+ tt_bad,
+ tt_array_close,
+ tt_array_open,
+ tt_brace_close,
+ tt_brace_open,
+ tt_dict_close,
+ tt_dict_open,
+ tt_integer,
+ tt_name,
+ tt_real,
+ tt_string,
+ tt_null,
+ tt_bool,
+ tt_word,
+ };
+
+ class Token
+ {
+ public:
+ Token() : type(tt_bad) {}
+
+ Token(token_type_e type, std::string const& value) :
+ type(type),
+ value(value)
+ {
+ }
+
+ Token(token_type_e type, std::string const& value,
+ std::string raw_value, std::string error_message) :
+ type(type),
+ value(value),
+ raw_value(raw_value),
+ error_message(error_message)
+ {
+ }
+ token_type_e getType() const
+ {
+ return this->type;
+ }
+ std::string const& getValue() const
+ {
+ return this->value;
+ }
+ std::string const& getRawValue() const
+ {
+ return this->raw_value;
+ }
+ std::string const& getErrorMessage() const
+ {
+ return this->error_message;
+ }
+ bool operator==(Token const& rhs)
+ {
+ // Ignore fields other than type and value
+ return ((this->type != tt_bad) &&
+ (this->type == rhs.type) &&
+ (this->value == rhs.value));
+ }
+
+ private:
+ token_type_e type;
+ std::string value;
+ std::string raw_value;
+ std::string error_message;
+ };
+
+ QPDFTokenizer();
+
+ // PDF files with version < 1.2 allowed the pound character
+ // anywhere in a name. Starting with version 1.2, the pound
+ // character was allowed only when followed by two hexadecimal
+ // digits. This method should be called when parsing a PDF file
+ // whose version is older than 1.2.
+ void allowPoundAnywhereInName();
+
+ // Mode of operation:
+
+ // Keep presenting characters and calling getToken() until
+ // getToken() returns true. When it does, be sure to check
+ // unread_ch and to unread ch if it is true.
+
+ // It these are called when a token is available, an exception
+ // will be thrown.
+ void presentCharacter(char ch);
+ void presentEOF();
+
+ // If a token is available, return true and initialize token with
+ // the token, unread_char with whether or not we have to unread
+ // the last character, and if unread_char, ch with the character
+ // to unread.
+ bool getToken(Token& token, bool& unread_char, char& ch);
+
+ // This function returns true of the current character is between
+ // tokens (i.e., white space that is not part of a string) or is
+ // part of a comment. A tokenizing filter can call this to
+ // determine whether to output the character.
+ bool betweenTokens();
+
+ private:
+ void reset();
+
+ // Lexer state
+ enum { st_top, st_in_comment, st_in_string, st_lt, st_gt,
+ st_literal, st_in_hexstring, st_token_ready } state;
+
+ bool pound_special_in_name;
+
+ // Current token accumulation
+ token_type_e type;
+ std::string val;
+ std::string raw_val;
+ std::string error_message;
+ bool unread_char;
+ char char_to_unread;
+
+ // State for strings
+ int string_depth;
+ bool string_ignoring_newline;
+ char bs_num_register[4];
+ bool last_char_was_bs;
+};
+
+#endif // __QPDFTOKENIZER_HH__
diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh
new file mode 100644
index 00000000..f332a227
--- /dev/null
+++ b/include/qpdf/QPDFWriter.hh
@@ -0,0 +1,243 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+// This class implements a simple writer for saving QPDF objects to
+// new PDF files. See comments through the header file for additional
+// details.
+
+#ifndef __QPDFWRITER_HH__
+#define __QPDFWRITER_HH__
+
+#include <stdio.h>
+#include <string>
+#include <list>
+#include <vector>
+#include <set>
+#include <map>
+
+#include <qpdf/QPDFXRefEntry.hh>
+
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Pipeline.hh>
+#include <qpdf/Buffer.hh>
+
+class QPDF;
+class QPDFObjectHandle;
+class Pl_Count;
+
+class QPDFWriter
+{
+ public:
+ // Passing null as filename means write to stdout
+ QPDFWriter(QPDF& pdf, char const* filename);
+ ~QPDFWriter();
+
+ // Set the value of object stream mode. In disable mode, we never
+ // generate any object streams. In preserve mode, we preserve
+ // object stream structure from the original file. In generate
+ // mode, we generate our own object streams. In all cases, we
+ // generate a conventional cross-reference table if there are no
+ // object streams and a cross-reference stream if there are object
+ // streams. The default is o_preserve.
+ enum object_stream_e { o_disable, o_preserve, o_generate };
+ void setObjectStreamMode(object_stream_e);
+
+ // Set value of stream data mode. In uncompress mode, we attempt
+ // to uncompress any stream that we can. In preserve mode, we
+ // preserve any filtering applied to streams. In compress mode,
+ // if we can apply all filters and the stream is not already
+ // optimally compressed, recompress the stream.
+ enum stream_data_e { s_uncompress, s_preserve, s_compress };
+ void setStreamDataMode(stream_data_e);
+
+ // Set value of content stream normalization. The default is
+ // "false". If true, we attempt to normalize newlines inside of
+ // content streams. Some constructs such as inline images may
+ // thwart our efforts. There may be some cases where this can
+ // damage the content stream. This flag should be used only for
+ // debugging and experimenting with PDF content streams. Never
+ // use it for production files.
+ void setContentNormalization(bool);
+
+ // Set QDF mode. QDF mode causes special "pretty printing" of
+ // PDF objects, adds comments for easier perusing of files.
+ // Resulting PDF files can be edited in a text editor and then run
+ // through fix-qdf to update cross reference tables and stream
+ // lengths.
+ void setQDFMode(bool);
+
+ // Cause a static /ID value to be generated. Use only in test
+ // suites.
+ void setStaticID(bool);
+
+ // Preserve encryption. The default is true unless prefilering,
+ // content normalization, or qdf mode has been selected in which
+ // case encryption is never preserved. Encryption is also not
+ // preserved if we explicitly set encryption parameters.
+ void setPreserveEncryption(bool);
+
+ // Set up for encrypted output. Disables stream prefiltering and
+ // content normalization. Note that setting R2 encryption
+ // parameters sets the PDF version to at least 1.3, and setting R3
+ // encryption parameters pushes the PDF version number to at least
+ // 1.4.
+ void setR2EncryptionParameters(
+ char const* user_password, char const* owner_password,
+ bool allow_print, bool allow_modify,
+ bool allow_extract, bool allow_annotate);
+ enum r3_print_e
+ {
+ r3p_full, // allow all printing
+ r3p_low, // allow only low-resolution printing
+ r3p_none // allow no printing
+ };
+ enum r3_modify_e
+ {
+ r3m_all, // allow all modification
+ r3m_annotate, // allow comment authoring and form operations
+ r3m_form, // allow form field fill-in or signing
+ r3m_assembly, // allow only document assembly
+ r3m_none // allow no modification
+ };
+ void setR3EncryptionParameters(
+ char const* user_password, char const* owner_password,
+ bool allow_accessibility, bool allow_extract,
+ r3_print_e print, r3_modify_e modify);
+
+ // Create linearized output. Disables qdf mode, content
+ // normalization, and stream prefiltering.
+ void setLinearization(bool);
+
+ void write();
+
+ private:
+ // flags used by unparseObject
+ static int const f_stream = 1 << 0;
+ static int const f_filtered = 1 << 1;
+ static int const f_in_ostream = 1 << 2;
+
+ enum trailer_e { t_normal, t_lin_first, t_lin_second };
+
+ int bytesNeeded(unsigned long n);
+ void writeBinary(unsigned long val, unsigned int bytes);
+ void writeString(std::string const& str);
+ void writeBuffer(PointerHolder<Buffer>&);
+ void writeStringQDF(std::string const& str);
+ void writeStringNoQDF(std::string const& str);
+ void assignCompressedObjectNumbers(int objid);
+ void enqueueObject(QPDFObjectHandle object);
+ void writeObjectStreamOffsets(std::vector<int>& offsets, int first_obj);
+ void writeObjectStream(QPDFObjectHandle object);
+ void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
+ void writeTrailer(trailer_e which, int size,
+ bool xref_stream, int prev = 0);
+ void unparseObject(QPDFObjectHandle object, int level,
+ unsigned int flags);
+ void unparseObject(QPDFObjectHandle object, int level,
+ unsigned int flags,
+ // for stream dictionaries
+ int stream_length, bool compress);
+ void unparseChild(QPDFObjectHandle child, int level, int flags);
+ void initializeSpecialStreams();
+ void preserveObjectStreams();
+ void generateObjectStreams();
+ void generateID();
+ void setEncryptionParameters(
+ char const* user_password, char const* owner_password,
+ int V, int R, int key_len, std::set<int>& bits_to_clear);
+ void setEncryptionParametersInternal(
+ int V, int R, int key_len, long P,
+ std::string const& O, std::string const& U,
+ std::string const& id1, std::string const& user_password);
+ void copyEncryptionParameters();
+ void setDataKey(int objid);
+ int openObject(int objid = 0);
+ void closeObject(int objid);
+ void writeStandard();
+ void writeLinearized();
+ void enqueuePart(std::vector<QPDFObjectHandle>& part);
+ void writeEncryptionDictionary();
+ void writeHeader();
+ void writeHintStream(int hint_id);
+ int writeXRefTable(trailer_e which, int first, int last, int size);
+ int writeXRefTable(trailer_e which, int first, int last, int size,
+ // for linearization
+ int prev,
+ bool suppress_offsets,
+ int hint_id,
+ int hint_offset,
+ int hint_length);
+ int writeXRefStream(int objid, int max_id, int max_offset,
+ trailer_e which, int first, int last, int size);
+ int writeXRefStream(int objid, int max_id, int max_offset,
+ trailer_e which, int first, int last, int size,
+ // for linearization
+ int prev,
+ int hint_id,
+ int hint_offset,
+ int hint_length);
+
+ // When filtering subsections, push additional pipelines to the
+ // stack. When ready to switch, activate the pipeline stack.
+ // Pipelines passed to pushPipeline are deleted when
+ // clearPipelineStack is called.
+ Pipeline* pushPipeline(Pipeline*);
+ void activatePipelineStack();
+
+ // Calls finish on the current pipeline and pops the pipeline
+ // stack until the top of stack is a previous active top of stack,
+ // and restores the pipeline to that point. Deletes any piplines
+ // that it pops. If the bp argument is non-null and any of the
+ // stack items are of type Pl_Buffer, the buffer is retrieved.
+ void popPipelineStack(PointerHolder<Buffer>* bp = 0);
+
+ void pushEncryptionFilter();
+ void pushDiscardFilter();
+
+ QPDF& pdf;
+ char const* filename;
+ FILE* file;
+ bool close_file;
+ bool normalize_content_set;
+ bool normalize_content;
+ bool stream_data_mode_set;
+ stream_data_e stream_data_mode;
+ bool qdf_mode;
+ bool static_id;
+ bool direct_stream_lengths;
+ bool encrypted;
+ bool preserve_encryption;
+ bool linearized;
+ object_stream_e object_stream_mode;
+ std::string encryption_key;
+ std::map<std::string, std::string> encryption_dictionary;
+
+ std::string id1; // for /ID key of
+ std::string id2; // trailer dictionary
+ std::string min_pdf_version;
+ int encryption_dict_objid;
+ std::string cur_data_key;
+ std::list<PointerHolder<Pipeline> > to_delete;
+ Pl_Count* pipeline;
+ std::list<QPDFObjectHandle> object_queue;
+ std::map<int, int> obj_renumber;
+ std::map<int, QPDFXRefEntry> xref;
+ std::map<int, size_t> lengths;
+ int next_objid;
+ int cur_stream_length_id;
+ int cur_stream_length;
+ bool added_newline;
+ int max_ostream_index;
+ std::set<int> normalized_streams;
+ std::map<int, int> page_object_to_seq;
+ std::map<int, int> contents_to_page_seq;
+ std::map<int, int> object_to_object_stream;
+ std::map<int, std::set<int> > object_stream_to_objects;
+ std::list<Pipeline*> pipeline_stack;
+};
+
+#endif // __QPDFWRITER_HH__
diff --git a/include/qpdf/QPDFXRefEntry.hh b/include/qpdf/QPDFXRefEntry.hh
new file mode 100644
index 00000000..4b1db9a2
--- /dev/null
+++ b/include/qpdf/QPDFXRefEntry.hh
@@ -0,0 +1,34 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QPDFXREFENTRY_HH__
+#define __QPDFXREFENTRY_HH__
+
+class QPDFXRefEntry
+{
+ public:
+ // Type constants are from the PDF spec section
+ // "Cross-Reference Streams":
+ // 0 = free entry; not used
+ // 1 = "uncompressed"; field 1 = offset
+ // 2 = "compressed"; field 1 = object stream number, field 2 = index
+
+ QPDFXRefEntry();
+ QPDFXRefEntry(int type, int field1, int field2);
+
+ int getType() const;
+ int getOffset() const; // only for type 1
+ int getObjStreamNumber() const; // only for type 2
+ int getObjStreamIndex() const; // only for type 2
+
+ private:
+ int type;
+ int field1;
+ int field2;
+};
+
+#endif // __QPDFXREFENTRY_HH__
diff --git a/include/qpdf/QTC.hh b/include/qpdf/QTC.hh
new file mode 100644
index 00000000..3d9597d4
--- /dev/null
+++ b/include/qpdf/QTC.hh
@@ -0,0 +1,16 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QTC_HH__
+#define __QTC_HH__
+
+namespace QTC
+{
+ void TC(char const* const scope, char const* const ccase, int n = 0);
+};
+
+#endif // __QTC_HH__
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
new file mode 100644
index 00000000..f2b67d92
--- /dev/null
+++ b/include/qpdf/QUtil.hh
@@ -0,0 +1,45 @@
+// Copyright (c) 2005-2008 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __QUTIL_HH__
+#define __QUTIL_HH__
+
+#include <string>
+#include <list>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include <qpdf/QEXC.hh>
+
+namespace QUtil
+{
+ // This is a collection of useful utility functions that don't
+ // really go anywhere else.
+ std::string int_to_string(int, int length = 0);
+ std::string double_to_string(double, int decimal_places = 0);
+
+ // If status is -1, convert the current value of errno to a
+ // QEXC::System exception. Otherwise, return status.
+ int os_wrapper(std::string const& description, int status)
+ throw (QEXC::System);
+
+ FILE* fopen_wrapper(std::string const&, FILE*)
+ throw (QEXC::System);
+
+ char* copy_string(std::string const&);
+
+ // Get the value of an environment variable in a portable fashion.
+ // Returns true iff the variable is defined. If `value' is
+ // non-null, initializes it with the value of the variable.
+ bool get_env(std::string const& var, std::string* value = 0);
+
+ // Return a string containing the byte representation of the UTF-8
+ // encoding for the unicode value passed in.
+ std::string toUTF8(unsigned long uval);
+};
+
+#endif // __QUTIL_HH__