diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/qpdf/Buffer.hh | 32 | ||||
-rw-r--r-- | include/qpdf/Pipeline.hh | 73 | ||||
-rw-r--r-- | include/qpdf/Pl_Buffer.hh | 46 | ||||
-rw-r--r-- | include/qpdf/Pl_Count.hh | 34 | ||||
-rw-r--r-- | include/qpdf/Pl_Discard.hh | 28 | ||||
-rw-r--r-- | include/qpdf/Pl_Flate.hh | 53 | ||||
-rw-r--r-- | include/qpdf/Pl_StdioFile.hh | 49 | ||||
-rw-r--r-- | include/qpdf/PointerHolder.hh | 170 | ||||
-rw-r--r-- | include/qpdf/QEXC.hh | 119 | ||||
-rw-r--r-- | include/qpdf/QPDF.hh | 750 | ||||
-rw-r--r-- | include/qpdf/QPDFExc.hh | 22 | ||||
-rw-r--r-- | include/qpdf/QPDFObject.hh | 20 | ||||
-rw-r--r-- | include/qpdf/QPDFObjectHandle.hh | 221 | ||||
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 141 | ||||
-rw-r--r-- | include/qpdf/QPDFWriter.hh | 243 | ||||
-rw-r--r-- | include/qpdf/QPDFXRefEntry.hh | 34 | ||||
-rw-r--r-- | include/qpdf/QTC.hh | 16 | ||||
-rw-r--r-- | include/qpdf/QUtil.hh | 45 |
18 files changed, 2096 insertions, 0 deletions
diff --git a/include/qpdf/Buffer.hh b/include/qpdf/Buffer.hh new file mode 100644 index 00000000..703dee3e --- /dev/null +++ b/include/qpdf/Buffer.hh @@ -0,0 +1,32 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __BUFFER_HH__ +#define __BUFFER_HH__ + +class Buffer +{ + public: + Buffer(); + Buffer(unsigned long size); + Buffer(Buffer const&); + Buffer& operator=(Buffer const&); + ~Buffer(); + unsigned long getSize() const; + unsigned char const* getBuffer() const; + unsigned char* getBuffer(); + + private: + void init(unsigned long size); + void copy(Buffer const&); + void destroy(); + + unsigned long size; + unsigned char* buf; +}; + +#endif // __BUFFER_HH__ diff --git a/include/qpdf/Pipeline.hh b/include/qpdf/Pipeline.hh new file mode 100644 index 00000000..acbc2d98 --- /dev/null +++ b/include/qpdf/Pipeline.hh @@ -0,0 +1,73 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +// Generalized Pipeline interface. By convention, subclasses of +// Pipeline are called Pl_Something. +// +// When an instance of Pipeline is created with a pointer to a next +// pipeline, that pipeline writes its data to the next one when it +// finishes with it. In order to make possible a usage style in which +// a pipeline may be passed to a function which may stick other +// pipelines in front of it, the allocator of a pipeline is +// responsible for its destruction. In other words, one pipeline +// object does not attempt to manage the memory of its successor. +// +// The client is required to call finish() before destroying a +// Pipeline in order to avoid loss of data. A Pipeline class should +// not throw an exception in the destructor if this hasn't been done +// though since doing so causes too mcuh trouble when deleting +// pipelines during error conditions. +// +// Some pipelines are resuable (i.e., you can call write() after +// calling finish() and can call finish() multiple times) while others +// are not. It is up to the caller to use a pipeline according to its +// own restrictions. + +#ifndef __PIPELINE_HH__ +#define __PIPELINE_HH__ + +#include <qpdf/QEXC.hh> + +class Pipeline +{ + public: + class Exception: public QEXC::General + { + public: + Exception(std::string const& message) : + QEXC::General(message) + { + } + + virtual ~Exception() throw() + { + } + }; + + Pipeline(char const* identifier, Pipeline* next); + + virtual ~Pipeline(); + + // Subclasses should implement write and finish to do their jobs + // and then, if they are not end-of-line pipelines, call + // getNext()->write or getNext()->finish. + virtual void write(unsigned char* data, int len) = 0; + virtual void finish() = 0; + + protected: + Pipeline* getNext(bool allow_null = false); + std::string identifier; + + private: + // Do not implement copy or assign + Pipeline(Pipeline const&); + Pipeline& operator=(Pipeline const&); + + Pipeline* next; +}; + +#endif // __PIPELINE_HH__ diff --git a/include/qpdf/Pl_Buffer.hh b/include/qpdf/Pl_Buffer.hh new file mode 100644 index 00000000..e78b5a17 --- /dev/null +++ b/include/qpdf/Pl_Buffer.hh @@ -0,0 +1,46 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __PL_BUFFER_HH__ +#define __PL_BUFFER_HH__ + +// This pipeline accumulates the data passed to it into a memory +// buffer. Each subsequent use of this buffer appends to the data +// accumulated so far. getBuffer() may be called only after calling +// finish() and before calling any subsequent write(). At that point, +// a dynamically allocated Buffer object is returned and the internal +// buffer is reset. The caller is responseible for deleting the +// returned Buffer. +// +// For this pipeline, "next" may be null. If a next pointer is +// provided, this pipeline will also pass the data through to it. + +#include <qpdf/Pipeline.hh> +#include <qpdf/PointerHolder.hh> +#include <qpdf/Buffer.hh> +#include <list> + +class Pl_Buffer: public Pipeline +{ + public: + Pl_Buffer(char const* identifier, Pipeline* next = 0); + virtual ~Pl_Buffer(); + virtual void write(unsigned char*, int); + virtual void finish(); + + // Each call to getBuffer() resets this object -- see notes above. + // The caller is responsible for deleting the returned Buffer + // object. + Buffer* getBuffer(); + + private: + bool ready; + std::list<PointerHolder<Buffer> > data; + size_t total_size; +}; + +#endif // __PL_BUFFER_HH__ diff --git a/include/qpdf/Pl_Count.hh b/include/qpdf/Pl_Count.hh new file mode 100644 index 00000000..287b8297 --- /dev/null +++ b/include/qpdf/Pl_Count.hh @@ -0,0 +1,34 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __PL_COUNT_HH__ +#define __PL_COUNT_HH__ + +// This pipeline is reusable; i.e., it is safe to call write() after +// calling finish(). + +#include <qpdf/Pipeline.hh> + +class Pl_Count: public Pipeline +{ + public: + Pl_Count(char const* identifier, Pipeline* next); + virtual ~Pl_Count(); + virtual void write(unsigned char*, int); + virtual void finish(); + // Returns the number of bytes written + int getCount() const; + // Returns the last character written, or '\0' if no characters + // have been written (in which case getCount() returns 0) + unsigned char getLastChar() const; + + private: + int count; + unsigned char last_char; +}; + +#endif // __PL_COUNT_HH__ diff --git a/include/qpdf/Pl_Discard.hh b/include/qpdf/Pl_Discard.hh new file mode 100644 index 00000000..cd0865a8 --- /dev/null +++ b/include/qpdf/Pl_Discard.hh @@ -0,0 +1,28 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __PL_DISCARD_HH__ +#define __PL_DISCARD_HH__ + +// This pipeline discards its output. It is an end-of-line pipeline +// (with no next). + +// This pipeline is reusable; i.e., it is safe to call write() after +// calling finish(). + +#include <qpdf/Pipeline.hh> + +class Pl_Discard: public Pipeline +{ + public: + Pl_Discard(); + virtual ~Pl_Discard(); + virtual void write(unsigned char*, int); + virtual void finish(); +}; + +#endif // __PL_DISCARD_HH__ diff --git a/include/qpdf/Pl_Flate.hh b/include/qpdf/Pl_Flate.hh new file mode 100644 index 00000000..16058d37 --- /dev/null +++ b/include/qpdf/Pl_Flate.hh @@ -0,0 +1,53 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __PL_FLATE_HH__ +#define __PL_FLATE_HH__ + +#include <qpdf/Pipeline.hh> + +#include <zlib.h> + +class Pl_Flate: public Pipeline +{ + public: + class Exception: public Pipeline::Exception + { + public: + Exception(std::string const& message) : + Pipeline::Exception(message) + { + } + + virtual ~Exception() throw () + { + } + }; + + static int const def_bufsize = 65536; + + enum action_e { a_inflate, a_deflate }; + + Pl_Flate(char const* identifier, Pipeline* next, + action_e action, int out_bufsize = def_bufsize); + virtual ~Pl_Flate(); + + virtual void write(unsigned char* data, int len); + virtual void finish(); + + private: + void handleData(unsigned char* data, int len, int flush); + void checkError(char const* prefix, int error_code); + + unsigned char* outbuf; + int out_bufsize; + action_e action; + bool initialized; + z_stream zstream; +}; + +#endif // __PL_FLATE_HH__ diff --git a/include/qpdf/Pl_StdioFile.hh b/include/qpdf/Pl_StdioFile.hh new file mode 100644 index 00000000..d74ded3a --- /dev/null +++ b/include/qpdf/Pl_StdioFile.hh @@ -0,0 +1,49 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +// End-of-line pipeline that simply writes its data to a stdio FILE* object. + +#ifndef __PL_STDIOFILE_HH__ +#define __PL_STDIOFILE_HH__ + +#include <qpdf/Pipeline.hh> + +#include <stdio.h> + +// +// This pipeline is reusable. +// + +class Pl_StdioFile: public Pipeline +{ + public: + class Exception: public Pipeline::Exception + { + public: + Exception(std::string const& message) : + Pipeline::Exception(message) + { + } + + virtual ~Exception() throw () + { + } + }; + + // f is externally maintained; this class just writes to and + // flushes it. It does not close it. + Pl_StdioFile(char const* identifier, FILE* f); + virtual ~Pl_StdioFile(); + + virtual void write(unsigned char* buf, int len); + virtual void finish(); + + private: + FILE* file; +}; + +#endif // __PL_STDIOFILE_HH__ diff --git a/include/qpdf/PointerHolder.hh b/include/qpdf/PointerHolder.hh new file mode 100644 index 00000000..b4e9bb64 --- /dev/null +++ b/include/qpdf/PointerHolder.hh @@ -0,0 +1,170 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __POINTERHOLDER_HH__ +#define __POINTERHOLDER_HH__ + +#include <iostream> + +// This class is basically boost::shared_pointer but predates that by +// several years. + +// This class expects to be initialized with a dynamically allocated +// object pointer. It keeps a reference count and deletes this once +// the reference count goes to zero. PointerHolder objects are +// explicitly safe for use in STL containers. + +// It is very important that a client who pulls the pointer out of +// this holder does not let the holder go out of scope until it is +// finished with the pointer. It is also important that exactly one +// instance of this object ever gets initialized with a given pointer. +// Otherwise, the pointer will be deleted twice, and before that, some +// objects will be left with a pointer to a deleted object. In other +// words, the only legitimate way for two PointerHolder objects to +// contain the same pointer is for one to be a copy of the other. +// Copy and assignment semantics are well-defined and essentially +// allow you to use PointerHolder as a means to get pass-by-reference +// semantics in a pass-by-value environment without having to worry +// about memory management details. + +// Comparison (== and <) are defined and operate on the internally +// stored pointers, not on the data. This makes it possible to store +// PointerHolder objects in sorted lists or to find them in STL +// containers just as one would be able to store pointers. Comparing +// the underlying pointers provides a well-defined, if not +// particularly meaningful, ordering. + +template <class T> +class PointerHolder +{ + private: + class Data + { + public: + Data(T* pointer, bool tracing) : + pointer(pointer), + tracing(tracing), + refcount(0) + { + static int next_id = 0; + this->unique_id = ++next_id; + } + ~Data() + { + if (this->tracing) + { + std::cerr << "PointerHolder deleting pointer " + << (void*)pointer + << std::endl; + } + delete this->pointer; + if (this->tracing) + { + std::cerr << "PointerHolder done deleting pointer " + << (void*)pointer + << std::endl; + } + } + T* pointer; + bool tracing; + int refcount; + int unique_id; + private: + Data(Data const&); + Data& operator=(Data const&); + }; + + public: + PointerHolder(T* pointer = 0, bool tracing = false) + { + this->init(new Data(pointer, tracing)); + } + PointerHolder(PointerHolder const& rhs) + { + this->copy(rhs); + } + PointerHolder& operator=(PointerHolder const& rhs) + { + if (this != &rhs) + { + this->destroy(); + this->copy(rhs); + } + return *this; + } + ~PointerHolder() + { + this->destroy(); + } + bool operator==(PointerHolder const& rhs) const + { + return this->data->pointer == rhs.data->pointer; + } + bool operator<(PointerHolder const& rhs) const + { + return this->data->pointer < rhs.data->pointer; + } + + // NOTE: The pointer returned by getPointer turns into a pumpkin + // when the last PointerHolder that contains it disappears. + T* getPointer() + { + return this->data->pointer; + } + T const* getPointer() const + { + return this->data->pointer; + } + int getRefcount() const + { + return this->data->refcount; + } + + private: + void init(Data* data) + { + this->data = data; + { + ++this->data->refcount; + if (this->data->tracing) + { + std::cerr << "PointerHolder " << this->data->unique_id + << " refcount increased to " << this->data->refcount + << std::endl; + } + } + } + void copy(PointerHolder const& rhs) + { + this->init(rhs.data); + } + void destroy() + { + bool gone = false; + { + if (--this->data->refcount == 0) + { + gone = true; + } + if (this->data->tracing) + { + std::cerr << "PointerHolder " << this->data->unique_id + << " refcount decreased to " + << this->data->refcount + << std::endl; + } + } + if (gone) + { + delete this->data; + } + } + + Data* data; +}; + +#endif // __POINTERHOLDER_HH__ diff --git a/include/qpdf/QEXC.hh b/include/qpdf/QEXC.hh new file mode 100644 index 00000000..b94edf7a --- /dev/null +++ b/include/qpdf/QEXC.hh @@ -0,0 +1,119 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QEXC_HH__ +#define __QEXC_HH__ + +#include <string> +#include <exception> +#include <errno.h> + +namespace QEXC +{ + // This namespace contains all exception classes used by the + // library. + + // The class hierarchy is as follows: + + // std::exception + // | + // +-> QEXC::Base + // | + // +-> QEXC::General + // | + // +-> QEXC::Internal + + // QEXC::General is the base class of all standard user-defined + // exceptions and "expected" error conditions raised by QClass. + // Applications or libraries using QClass are encouraged to derive + // their own exceptions from these classes if they wish. It is + // entirely reasonable for code to catch QEXC::General or specific + // subclasses of it as part of normal error handling. + + // QEXC::Internal is reserved for internal errors. These should + // be used only for situations that indicate a likely bug in the + // software itself. This may include improper use of a library + // function. Operator errors should not be able to cause Internal + // errors. (There may be some exceptions to this such as users + // invoking programs that were intended only to be invoked by + // other programs.) QEXC::Internal should generally not be + // trapped except in terminate handlers or top-level exception + // handlers which will want to translate them into error messages + // and cause the program to exit. Such top-level handlers may + // want to catch std::exception instead. + + // All subclasses of QEXC::Base implement a const unparse() method + // which returns a std::string const&. They also override + // std::exception::what() to return a char* with the same value. + // unparse() should be implemented in such a way that a program + // catching QEXC::Base or std::exception can use the text returned + // by unparse() (or what()) without any exception-specific + // adornment. (The program may prefix the program name or other + // general information.) Note that std::exception::what() is a + // const method that returns a const char*. For this reason, it + // is essential that unparse() return a const reference to a + // string so that what() can be implemented by calling unparse(). + // This means that the string that unparse() returns a reference + // to must not be allocated on the stack in the call to unparse(). + // The recommended way to do this is for derived exception classes + // to store their string descriptions by calling the protected + // setMessage() method and then to not override unparse(). + + class Base: public std::exception + { + // This is the common base class for all exceptions in qclass. + // Application/library code should not generally catch this + // directly. See above for caveats. + public: + Base(); + Base(std::string const& message); + virtual ~Base() throw() {} + virtual std::string const& unparse() const; + virtual const char* what() const throw(); + + protected: + void setMessage(std::string const& message); + + private: + std::string message; + }; + + class General: public Base + { + // This is the base class for normal user/library-defined + // error conditions. + public: + General(); + General(std::string const& message); + virtual ~General() throw() {}; + }; + + // Note that Internal is not derived from General. Internal + // errors are too severe. We don't want internal errors + // accidentally trapped as part of QEXC::General. If you are + // going to deal with internal errors, you have to do so + // explicitly. + class Internal: public Base + { + public: + Internal(std::string const& message); + virtual ~Internal() throw() {}; + }; + + class System: public General + { + public: + System(std::string const& prefix, int sys_errno); + virtual ~System() throw() {}; + int getErrno() const; + + private: + int sys_errno; + }; +}; + +#endif // __QEXC_HH__ diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh new file mode 100644 index 00000000..d311b3c8 --- /dev/null +++ b/include/qpdf/QPDF.hh @@ -0,0 +1,750 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDF_HH__ +#define __QPDF_HH__ + +#include <stdio.h> +#include <string> +#include <map> +#include <list> + +#include <qpdf/QPDFXRefEntry.hh> +#include <qpdf/QPDFObjectHandle.hh> +#include <qpdf/QPDFTokenizer.hh> +#include <qpdf/Buffer.hh> + +class QPDF_Stream; +class BitStream; +class BitWriter; +class QPDFExc; + +class QPDF +{ + public: + QPDF(); + ~QPDF(); + + // Associate a file with a QPDF object and do initial parsing of + // the file. PDF objects are not read until they are needed. A + // QPDF object may be associated with only on file in its + // lifetime. This method must be called before any methods that + // potentially ask for information about the PDF file are called. + // Prior to calling this, the only methods that are allowed are + // those that set parameters. + void processFile(char const* filename, char const* password = ""); + + // Parameter settings + + // If true, ignore any cross-reference streams in a hybrid file + // (one that contains both cross-reference streams and + // cross-reference tables). This can be useful for testing to + // ensure that a hybrid file would work with an older reader. + void setIgnoreXRefStreams(bool); + + // By default, any warnings are issued to stderr as they are + // encountered. If this is called with a true value, reporitng of + // warnings is suppressed. You may still retrieve warnings by + // calling getWarnings. + void setSuppressWarnings(bool); + + // By default, QPDF will try to recover if it finds certain types + // of errors in PDF files. If turned off, it will throw an + // exception on the first such problem it finds without attempting + // recovery. + void setAttemptRecovery(bool); + + // Other public methods + + // Return the list of warnings that have been issued so far and + // clear the list. This method may be called even if processFile + // throws an exception. Note that if setSuppressWarnings was not + // called or was called with a false value, any warnings retrieved + // here will have already been issued to stderr. + std::vector<std::string> getWarnings(); + + std::string getFilename() const; + std::string getPDFVersion() const; + QPDFObjectHandle getTrailer(); + QPDFObjectHandle getRoot(); + + // Install this object handle as an indirect object and return an + // indirect reference to it. + QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); + + // Retrieve an object by object ID and generation. Returns an + // indirect reference to it. + QPDFObjectHandle getObjectByID(int objid, int generation); + + // Encryption support + + struct EncryptionData + { + // This class holds data read from the encryption dictionary. + EncryptionData(int V, int R, int Length_bytes, long P, + std::string const& O, std::string const& U, + std::string const& id1) : + V(V), + R(R), + Length_bytes(Length_bytes), + P(P), + O(O), + U(U), + id1(id1) + { + } + + int V; + int R; + int Length_bytes; + long P; + std::string O; + std::string U; + std::string id1; + }; + + static void trim_user_password(std::string& user_password); + static std::string compute_data_key( + std::string const& encryption_key, int objid, int generation); + static std::string compute_encryption_key( + std::string const& password, EncryptionData const& data); + + static void compute_encryption_O_U( + char const* user_password, char const* owner_password, + int V, int R, int key_len, unsigned long P, + std::string const& id1, + std::string& O, std::string& U); + std::string const& getUserPassword() const; + + // Linearization support + + // Returns true iff the file starts with a linearization parameter + // dictionary. Does no additional validation. + bool isLinearized(); + + // Performs various sanity checks on a linearized file. Return + // true if no errors or warnings. Otherwise, return false and + // output errors and warnings to stdout. + bool checkLinearization(); + + // Calls checkLinearization() and, if possible, prints normalized + // contents of some of the hints tables to stdout. Normalization + // includes adding min values to delta values and adjusting + // offsets based on the location and size of the primary hint + // stream. + void showLinearizationData(); + + // Shows the contents of the cross-reference table + void showXRefTable(); + + // Optimization support -- see doc/optimization. Implemented in + // QPDF_optimization.cc + + // The object_stream_data map maps from a "compressed" object to + // the object stream that contains it. This enables optimize to + // populate the object <-> user maps with only uncompressed + // objects. If allow_changes is false, an exception will be + // thrown if any changes are made during the optimization process. + // This is available so that the test suite can make sure that a + // linearized file is already optimized. When called in this way, + // optimize() still populates the object <-> user maps + void optimize(std::map<int, int> const& object_stream_data, + bool allow_changes = true); + + // Replace all references to indirect objects that are "scalars" + // (i.e., things that don't have children: not arrays, streams, or + // dictionaries) with direct objects. + void flattenScalarReferences(); + + // For QPDFWriter: + + // Remove /ID, /Encrypt, and /Prev keys from the trailer + // dictionary since these are regenerated during write. + void trimTrailerForWrite(); + + // Get lists of all objects in order according to the part of a + // linearized file that they belong to. + void getLinearizedParts( + std::map<int, int> const& object_stream_data, + std::vector<QPDFObjectHandle>& part4, + std::vector<QPDFObjectHandle>& part6, + std::vector<QPDFObjectHandle>& part7, + std::vector<QPDFObjectHandle>& part8, + std::vector<QPDFObjectHandle>& part9); + + void generateHintStream(std::map<int, QPDFXRefEntry> const& xref, + std::map<int, size_t> const& lengths, + std::map<int, int> const& obj_renumber, + PointerHolder<Buffer>& hint_stream, + int& S, int& O); + + // Map object to object stream that contains it + void getObjectStreamData(std::map<int, int>&); + // Get a list of objects that would be permitted in an object + // stream + std::vector<int> getCompressibleObjects(); + + // Convenience routines for common functions. See also + // QPDFObjectHandle.hh for additional convenience routines. + + // Traverse page tree return all /Page objects. + std::vector<QPDFObjectHandle> const& getAllPages(); + + // Resolver class is restricted to QPDFObjectHandle so that only + // it can resolve indirect references. + class Resolver + { + friend class QPDFObjectHandle; + private: + static PointerHolder<QPDFObject> resolve( + QPDF* qpdf, int objid, int generation) + { + return qpdf->resolve(objid, generation); + } + }; + friend class Resolver; + + // Pipe class is restricted to QPDF_Stream + class Pipe + { + friend class QPDF_Stream; + private: + static void pipeStreamData(QPDF* qpdf, int objid, int generation, + off_t offset, size_t length, + QPDFObjectHandle dict, + Pipeline* pipeline) + { + qpdf->pipeStreamData( + objid, generation, offset, length, dict, pipeline); + } + }; + friend class Pipe; + + private: + class InputSource + { + public: + InputSource() : + last_offset(0) + { + } + virtual ~InputSource() + { + } + + void setLastOffset(off_t); + off_t getLastOffset() const; + std::string readLine(); + + virtual std::string const& getName() const = 0; + virtual off_t tell() = 0; + virtual void seek(off_t offset, int whence) = 0; + virtual void rewind() = 0; + virtual size_t read(char* buffer, int length) = 0; + virtual void unreadCh(char ch) = 0; + + protected: + off_t last_offset; + }; + + class FileInputSource: public InputSource + { + public: + FileInputSource(); + void setFilename(char const* filename); + virtual ~FileInputSource(); + virtual std::string const& getName() const; + virtual off_t tell(); + virtual void seek(off_t offset, int whence); + virtual void rewind(); + virtual size_t read(char* buffer, int length); + virtual void unreadCh(char ch); + + private: + FileInputSource(FileInputSource const&); + FileInputSource& operator=(FileInputSource const&); + + void destroy(); + + std::string filename; + FILE* file; + }; + + class BufferInputSource: public InputSource + { + public: + BufferInputSource(std::string const& description, Buffer* buf); + virtual ~BufferInputSource(); + virtual std::string const& getName() const; + virtual off_t tell(); + virtual void seek(off_t offset, int whence); + virtual void rewind(); + virtual size_t read(char* buffer, int length); + virtual void unreadCh(char ch); + + private: + std::string description; + Buffer* buf; + off_t cur_offset; + }; + + class ObjGen + { + public: + ObjGen(); + ObjGen(int obj, int gen); + bool operator<(ObjGen const&) const; + + int obj; + int gen; + }; + + class ObjCache + { + public: + ObjCache() : + end_before_space(0), + end_after_space(0) + { + } + ObjCache(PointerHolder<QPDFObject> object, + off_t end_before_space, + off_t end_after_space) : + object(object), + end_before_space(end_before_space), + end_after_space(end_after_space) + { + } + + PointerHolder<QPDFObject> object; + off_t end_before_space; + off_t end_after_space; + }; + + void parse(); + void warn(QPDFExc const& e); + void setTrailer(QPDFObjectHandle obj); + void read_xref(off_t offset); + void reconstruct_xref(QPDFExc& e); + int read_xrefTable(off_t offset); + int read_xrefStream(off_t offset); + int processXRefStream(off_t offset, QPDFObjectHandle& xref_stream); + void insertXrefEntry(int obj, int f0, int f1, int f2); + QPDFObjectHandle readObject( + InputSource*, int objid, int generation, + bool in_object_stream); + QPDFObjectHandle readObjectInternal( + InputSource* input, int objid, int generation, + bool in_object_stream, + bool in_array, bool in_dictionary); + int recoverStreamLength( + InputSource* input, int objid, int generation, off_t stream_offset); + QPDFTokenizer::Token readToken(InputSource*); + + QPDFObjectHandle readObjectAtOffset( + off_t offset, + int exp_objid, int exp_generation, + int& act_objid, int& act_generation); + PointerHolder<QPDFObject> resolve(int objid, int generation); + void resolveObjectsInStream(int obj_stream_number); + + // Calls finish() on the pipeline when done but does not delete it + void pipeStreamData(int objid, int generation, + off_t offset, size_t length, + QPDFObjectHandle dict, + Pipeline* pipeline); + void getAllPagesInternal(QPDFObjectHandle cur_pages, + std::vector<QPDFObjectHandle>& result); + + // methods to support encryption -- implemented in QPDF_encryption.cc + void initializeEncryption(); + std::string getKeyForObject(int objid, int generation); + void decryptString(std::string&, int objid, int generation); + void decryptStream(Pipeline*& pipeline, int objid, int generation, + std::vector<PointerHolder<Pipeline> >& heap); + + // Linearization Hint table structures. + // Naming conventions: + + // HSomething is the Something Hint Table or table header + // HSomethingEntry is an entry in the Something table + + // delta_something + min_something = something + // nbits_something = number of bits required for something + + // something_offset is the pre-adjusted offset in the file. If >= + // H0_offset, H0_length must be added to get an actual file + // offset. + + // PDF 1.4: Table F.4 + struct HPageOffsetEntry + { + HPageOffsetEntry() : + delta_nobjects(0), + delta_page_length(0), + nshared_objects(0), + delta_content_offset(0), + delta_content_length(0) + { + } + + int delta_nobjects; // 1 + int delta_page_length; // 2 + int nshared_objects; // 3 + // vectors' sizes = nshared_objects + std::vector<int> shared_identifiers; // 4 + std::vector<int> shared_numerators; // 5 + int delta_content_offset; // 6 + int delta_content_length; // 7 + }; + + // PDF 1.4: Table F.3 + struct HPageOffset + { + HPageOffset() : + min_nobjects(0), + first_page_offset(0), + nbits_delta_nobjects(0), + min_page_length(0), + nbits_delta_page_length(0), + min_content_offset(0), + nbits_delta_content_offset(0), + min_content_length(0), + nbits_delta_content_length(0), + nbits_nshared_objects(0), + nbits_shared_identifier(0), + nbits_shared_numerator(0), + shared_denominator(0) + { + } + + int min_nobjects; // 1 + int first_page_offset; // 2 + int nbits_delta_nobjects; // 3 + int min_page_length; // 4 + int nbits_delta_page_length; // 5 + int min_content_offset; // 6 + int nbits_delta_content_offset; // 7 + int min_content_length; // 8 + int nbits_delta_content_length; // 9 + int nbits_nshared_objects; // 10 + int nbits_shared_identifier; // 11 + int nbits_shared_numerator; // 12 + int shared_denominator; // 13 + // vector size is npages + std::vector<HPageOffsetEntry> entries; + }; + + // PDF 1.4: Table F.6 + struct HSharedObjectEntry + { + HSharedObjectEntry() : + delta_group_length(0), + signature_present(0), + nobjects_minus_one(0) + { + } + + // Item 3 is a 128-bit signature (unsupported by Acrobat) + int delta_group_length; // 1 + int signature_present; // 2 -- always 0 + int nobjects_minus_one; // 4 -- always 0 + }; + + // PDF 1.4: Table F.5 + struct HSharedObject + { + HSharedObject() : + first_shared_obj(0), + first_shared_offset(0), + nshared_first_page(0), + nshared_total(0), + nbits_nobjects(0), + min_group_length(0), + nbits_delta_group_length(0) + { + } + + int first_shared_obj; // 1 + int first_shared_offset; // 2 + int nshared_first_page; // 3 + int nshared_total; // 4 + int nbits_nobjects; // 5 + int min_group_length; // 6 + int nbits_delta_group_length; // 7 + // vector size is nshared_total + std::vector<HSharedObjectEntry> entries; + }; + + // PDF 1.4: Table F.9 + struct HGeneric + { + HGeneric() : + first_object(0), + first_object_offset(0), + nobjects(0), + group_length(0) + { + } + + int first_object; // 1 + int first_object_offset; // 2 + int nobjects; // 3 + int group_length; // 4 + }; + + // Other linearization data structures + + // Initialized from Linearization Parameter dictionary + struct LinParameters + { + LinParameters() : + file_size(0), + first_page_object(0), + first_page_end(0), + npages(0), + xref_zero_offset(0), + first_page(0), + H_offset(0), + H_length(0) + { + } + + int file_size; // /L + int first_page_object; // /O + int first_page_end; // /E + int npages; // /N + int xref_zero_offset; // /T + int first_page; // /P + int H_offset; // offset of primary hint stream + int H_length; // length of primary hint stream + }; + + // Computed hint table value data structures. These tables + // contain the computed values on which the hint table values are + // based. They exclude things like number of bits and store + // actual values instead of mins and deltas. File offsets are + // also absolute rather than being offset by the size of the + // primary hint table. We populate the hint table structures from + // these during writing and compare the hint table values with + // these during validation. We ignore some values for various + // reasons described in the code. Those values are omitted from + // these structures. Note also that object numbers are object + // numbers from the input file, not the output file. + + // Naming convention: CHSomething is analogous to HSomething + // above. "CH" is computed hint. + + struct CHPageOffsetEntry + { + CHPageOffsetEntry() : + nobjects(0), + nshared_objects(0) + { + } + + int nobjects; + int nshared_objects; + // vectors' sizes = nshared_objects + std::vector<int> shared_identifiers; + }; + + struct CHPageOffset + { + // vector size is npages + std::vector<CHPageOffsetEntry> entries; + }; + + struct CHSharedObjectEntry + { + CHSharedObjectEntry(int object) : + object(object) + { + } + + int object; + }; + + // PDF 1.4: Table F.5 + struct CHSharedObject + { + CHSharedObject() : + first_shared_obj(0), + nshared_first_page(0), + nshared_total(0) + { + } + + int first_shared_obj; + int nshared_first_page; + int nshared_total; + // vector size is nshared_total + std::vector<CHSharedObjectEntry> entries; + }; + + // No need for CHGeneric -- HGeneric is fine as is. + + + // Data structures to support optimization -- implemented in + // QPDF_optimization.cc + + class ObjUser + { + public: + enum user_e + { + ou_bad, + ou_page, + ou_thumb, + ou_trailer_key, + ou_root_key, + ou_root + }; + + // type is set to ou_bad + ObjUser(); + + // type must be ou_root + ObjUser(user_e type); + + // type must be one of ou_page or ou_thumb + ObjUser(user_e type, int pageno); + + // type must be one of ou_trailer_key or ou_root_key + ObjUser(user_e type, std::string const& key); + + bool operator<(ObjUser const&) const; + + user_e ou_type; + int pageno; // if ou_page; + std::string key; // if ou_trailer_key or ou_root_key + }; + + // methods to support linearization checking -- implemented in + // QPDF_linearization.cc + void readLinearizationData(); + bool checkLinearizationInternal(); + void dumpLinearizationDataInternal(); + QPDFObjectHandle readHintStream(Pipeline&, off_t offset, size_t length); + void readHPageOffset(BitStream); + void readHSharedObject(BitStream); + void readHGeneric(BitStream, HGeneric&); + int maxEnd(ObjUser const& ou); + int getLinearizationOffset(ObjGen const&); + QPDFObjectHandle getUncompressedObject( + QPDFObjectHandle&, std::map<int, int> const& object_stream_data); + int lengthNextN(int first_object, int n, + std::list<std::string>& errors); + void checkHPageOffset(std::list<std::string>& errors, + std::list<std::string>& warnings, + std::vector<QPDFObjectHandle> const& pages, + std::map<int, int>& idx_to_obj); + void checkHSharedObject(std::list<std::string>& warnings, + std::list<std::string>& errors, + std::vector<QPDFObjectHandle> const& pages, + std::map<int, int>& idx_to_obj); + void checkHOutlines(std::list<std::string>& warnings); + void dumpHPageOffset(); + void dumpHSharedObject(); + void dumpHGeneric(HGeneric&); + int adjusted_offset(int offset); + QPDFObjectHandle objGenToIndirect(ObjGen const&); + void calculateLinearizationData( + std::map<int, int> const& object_stream_data); + void pushOutlinesToPart( + std::vector<QPDFObjectHandle>& part, + std::set<ObjGen>& lc_outlines, + std::map<int, int> const& object_stream_data); + int outputLengthNextN( + int in_object, int n, + std::map<int, size_t> const& lengths, + std::map<int, int> const& obj_renumber); + void calculateHPageOffset( + std::map<int, QPDFXRefEntry> const& xref, + std::map<int, size_t> const& lengths, + std::map<int, int> const& obj_renumber); + void calculateHSharedObject( + std::map<int, QPDFXRefEntry> const& xref, + std::map<int, size_t> const& lengths, + std::map<int, int> const& obj_renumber); + void calculateHOutline( + std::map<int, QPDFXRefEntry> const& xref, + std::map<int, size_t> const& lengths, + std::map<int, int> const& obj_renumber); + void writeHPageOffset(BitWriter&); + void writeHSharedObject(BitWriter&); + void writeHGeneric(BitWriter&, HGeneric&); + + + // Methods to support optimization + + void optimizePagesTree( + QPDFObjectHandle, + std::map<std::string, std::vector<QPDFObjectHandle> >&, + int& pageno, bool allow_changes); + void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh); + void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, + std::set<ObjGen>& visited, bool top); + void filterCompressedObjects(std::map<int, int> const& object_stream_data); + + + QPDFTokenizer tokenizer; + FileInputSource file; + bool encrypted; + bool encryption_initialized; + bool ignore_xref_streams; + bool suppress_warnings; + bool attempt_recovery; + std::string provided_password; + std::string user_password; + std::string encryption_key; + std::string cached_object_encryption_key; + int cached_key_objid; + int cached_key_generation; + std::string pdf_version; + std::map<ObjGen, QPDFXRefEntry> xref_table; + std::set<int> deleted_objects; + std::map<ObjGen, ObjCache> obj_cache; + QPDFObjectHandle trailer; + std::vector<QPDFObjectHandle> all_pages; + std::vector<std::string> warnings; + + // Linearization data + int first_xref_item_offset; // actual value from file + bool uncompressed_after_compressed; + + // Linearization parameter dictionary and hint table data: may be + // read from file or computed prior to writing a linearized file + QPDFObjectHandle lindict; + LinParameters linp; + HPageOffset page_offset_hints; + HSharedObject shared_object_hints; + HGeneric outline_hints; + + // Computed linearization data: used to populate above tables + // during writing and to compare with them during validation. c_ + // means computed. + LinParameters c_linp; + CHPageOffset c_page_offset_data; + CHSharedObject c_shared_object_data; + HGeneric c_outline_data; + + // Object ordering data for linearized files: initialized by + // calculateLinearizationData(). Part numbers refer to the PDF + // 1.4 specification. + std::vector<QPDFObjectHandle> part4; + std::vector<QPDFObjectHandle> part6; + std::vector<QPDFObjectHandle> part7; + std::vector<QPDFObjectHandle> part8; + std::vector<QPDFObjectHandle> part9; + + // Optimization data + std::map<ObjUser, std::set<ObjGen> > obj_user_to_objects; + std::map<ObjGen, std::set<ObjUser> > object_to_obj_users; +}; + +#endif // __QPDF_HH__ diff --git a/include/qpdf/QPDFExc.hh b/include/qpdf/QPDFExc.hh new file mode 100644 index 00000000..d3efb3b9 --- /dev/null +++ b/include/qpdf/QPDFExc.hh @@ -0,0 +1,22 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDFEXC_HH__ +#define __QPDFEXC_HH__ + +#include <qpdf/QEXC.hh> + +class QPDFExc: public QEXC::General +{ + public: + QPDFExc(std::string const& message); + QPDFExc(std::string const& filename, int offset, + std::string const& message); + virtual ~QPDFExc() throw (); +}; + +#endif // __QPDFEXC_HH__ diff --git a/include/qpdf/QPDFObject.hh b/include/qpdf/QPDFObject.hh new file mode 100644 index 00000000..1597e20e --- /dev/null +++ b/include/qpdf/QPDFObject.hh @@ -0,0 +1,20 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDFOBJECT_HH__ +#define __QPDFOBJECT_HH__ + +#include <string> + +class QPDFObject +{ + public: + virtual ~QPDFObject() {} + virtual std::string unparse() = 0; +}; + +#endif // __QPDFOBJECT_HH__ diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh new file mode 100644 index 00000000..e38eb116 --- /dev/null +++ b/include/qpdf/QPDFObjectHandle.hh @@ -0,0 +1,221 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDFOBJECTHANDLE_HH__ +#define __QPDFOBJECTHANDLE_HH__ + +#include <string> +#include <vector> +#include <set> +#include <map> + +#include <qpdf/PointerHolder.hh> +#include <qpdf/Buffer.hh> + +#include <qpdf/QPDFObject.hh> + +class Pipeline; +class QPDF; + +class QPDFObjectHandle +{ + public: + QPDFObjectHandle(); + bool isInitialized() const; + + // Exactly one of these will return true for any object. + bool isBool(); + bool isNull(); + bool isInteger(); + bool isReal(); + bool isName(); + bool isString(); + bool isArray(); + bool isDictionary(); + bool isStream(); + + // This returns true in addition to the query for the specific + // type for indirect objects. + bool isIndirect(); + + // True for everything except array, dictionary, and stream + bool isScalar(); + + // Public factory methods + + static QPDFObjectHandle newNull(); + static QPDFObjectHandle newBool(bool value); + static QPDFObjectHandle newInteger(int value); + static QPDFObjectHandle newReal(std::string const& value); + static QPDFObjectHandle newName(std::string const& name); + static QPDFObjectHandle newString(std::string const& str); + static QPDFObjectHandle newArray( + std::vector<QPDFObjectHandle> const& items); + static QPDFObjectHandle newDictionary( + std::map<std::string, QPDFObjectHandle> const& items); + + // Accessor methods. If an accessor method that is valid for only + // a particular object type is called on an object of the wrong + // type, an exception is thrown. + + // Methods for bool objects + bool getBoolValue(); + + // Methods for integer objects + int getIntValue(); + + // Methods for real objects + std::string getRealValue(); + + // Methods that work for both integer and real objects + bool isNumber(); + double getNumericValue(); + + // Methods for name objects + std::string getName(); + + // Methods for string objects + std::string getStringValue(); + std::string getUTF8Value(); + + // Methods for array objects + int getArrayNItems(); + QPDFObjectHandle getArrayItem(int n); + + // Methods for dictionary objects + bool hasKey(std::string const&); + QPDFObjectHandle getKey(std::string const&); + std::set<std::string> getKeys(); + + // Mutator methods. Use with caution. + + // Recursively copy this object, making it direct. Throws an + // exception if a loop is detected or any sub-object is a stream. + void makeDirect(); + + // Mutator methods for array objects + void setArrayItem(int, QPDFObjectHandle const&); + + // Mutator methods for dictionary objects + + // Replace value of key, adding it if it does not exist + void replaceKey(std::string const& key, QPDFObjectHandle const&); + // Remove key, doing nothing if key does not exist + void removeKey(std::string const& key); + + // Methods for stream objects + QPDFObjectHandle getDict(); + + // Returns filtered (uncompressed) stream data. Throws an + // exception if the stream is filtered and we can't decode it. + PointerHolder<Buffer> getStreamData(); + + // Write stream data through the given pipeline. A null pipeline + // value may be used if all you want to do is determine whether a + // stream is filterable. If filter is false, write raw stream + // data and return false. If filter is true, then attempt to + // apply all the decoding filters to the stream data. If we are + // successful, return true. Otherwise, return false and write raw + // data. If filtering is requested and successfully performed, + // then the normalize and compress flags are used to determine + // whether stream data should be normalized and compressed. In + // all cases, if this function returns false, raw data has been + // written. If it returns true, then any requested filtering has + // been performed. Note that if the original stream data has no + // filters applied to it, the return value will be equal to the + // value of the filter parameter. Callers may use the return + // value of this function to determine whether or not the /Filter + // and /DecodeParms keys in the stream dictionary should be + // replaced if writing a new stream object. + bool pipeStreamData(Pipeline*, bool filter, + bool normalize, bool compress); + + // return 0 for direct objects + int getObjectID() const; + int getGeneration() const; + + std::string unparse(); + std::string unparseResolved(); + + // Convenience routines for commonly performed functions + + // Throws an exception if this is not a Page object. Returns an + // empty map if there are no images or no resources. This + // function does not presently support inherited resources. See + // comment in the source for details. Return value is a map from + // XObject name to the image object, which is always a stream. + std::map<std::string, QPDFObjectHandle> getPageImages(); + + // Throws an exception if this is not a Page object. Returns a + // vector of stream objects representing the content streams for + // the given page. This routine allows the caller to not care + // whether there are one or more than one content streams for a + // page. + std::vector<QPDFObjectHandle> getPageContents(); + + // Initializers for objects. This Factory class gives the QPDF + // class specific permission to call factory methods without + // making it a friend of the whole QPDFObjectHandle class. + class Factory + { + friend class QPDF; + private: + static QPDFObjectHandle newIndirect(QPDF* qpdf, + int objid, int generation) + { + return QPDFObjectHandle::newIndirect(qpdf, objid, generation); + } + // object must be dictionary object + static QPDFObjectHandle newStream( + QPDF* qpdf, int objid, int generation, + QPDFObjectHandle stream_dict, off_t offset, int length) + { + return QPDFObjectHandle::newStream( + qpdf, objid, generation, stream_dict, offset, length); + } + }; + friend class Factory; + + // Accessor for raw underlying object -- only QPDF is allowed to + // call this. + class ObjAccessor + { + friend class QPDF; + private: + static PointerHolder<QPDFObject> getObject(QPDFObjectHandle& o) + { + o.dereference(); + return o.obj; + } + }; + friend class ObjAccessor; + + private: + QPDFObjectHandle(QPDF*, int objid, int generation); + QPDFObjectHandle(QPDFObject*); + + // Private object factory methods + static QPDFObjectHandle newIndirect(QPDF*, int objid, int generation); + static QPDFObjectHandle newStream( + QPDF* qpdf, int objid, int generation, + QPDFObjectHandle stream_dict, off_t offset, int length); + + void assertInitialized() const; + void assertType(char const* type_name, bool istype); + void assertPageObject(); + void dereference(); + void makeDirectInternal(std::set<int>& visited); + + bool initialized; + + QPDF* qpdf; // 0 for direct object + int objid; // 0 for direct object + int generation; + PointerHolder<QPDFObject> obj; +}; + +#endif // __QPDFOBJECTHANDLE_HH__ diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh new file mode 100644 index 00000000..e921bfc5 --- /dev/null +++ b/include/qpdf/QPDFTokenizer.hh @@ -0,0 +1,141 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDFTOKENIZER_HH__ +#define __QPDFTOKENIZER_HH__ + +#include <string> +#include <stdio.h> + +class QPDFTokenizer +{ + public: + enum token_type_e + { + tt_bad, + tt_array_close, + tt_array_open, + tt_brace_close, + tt_brace_open, + tt_dict_close, + tt_dict_open, + tt_integer, + tt_name, + tt_real, + tt_string, + tt_null, + tt_bool, + tt_word, + }; + + class Token + { + public: + Token() : type(tt_bad) {} + + Token(token_type_e type, std::string const& value) : + type(type), + value(value) + { + } + + Token(token_type_e type, std::string const& value, + std::string raw_value, std::string error_message) : + type(type), + value(value), + raw_value(raw_value), + error_message(error_message) + { + } + token_type_e getType() const + { + return this->type; + } + std::string const& getValue() const + { + return this->value; + } + std::string const& getRawValue() const + { + return this->raw_value; + } + std::string const& getErrorMessage() const + { + return this->error_message; + } + bool operator==(Token const& rhs) + { + // Ignore fields other than type and value + return ((this->type != tt_bad) && + (this->type == rhs.type) && + (this->value == rhs.value)); + } + + private: + token_type_e type; + std::string value; + std::string raw_value; + std::string error_message; + }; + + QPDFTokenizer(); + + // PDF files with version < 1.2 allowed the pound character + // anywhere in a name. Starting with version 1.2, the pound + // character was allowed only when followed by two hexadecimal + // digits. This method should be called when parsing a PDF file + // whose version is older than 1.2. + void allowPoundAnywhereInName(); + + // Mode of operation: + + // Keep presenting characters and calling getToken() until + // getToken() returns true. When it does, be sure to check + // unread_ch and to unread ch if it is true. + + // It these are called when a token is available, an exception + // will be thrown. + void presentCharacter(char ch); + void presentEOF(); + + // If a token is available, return true and initialize token with + // the token, unread_char with whether or not we have to unread + // the last character, and if unread_char, ch with the character + // to unread. + bool getToken(Token& token, bool& unread_char, char& ch); + + // This function returns true of the current character is between + // tokens (i.e., white space that is not part of a string) or is + // part of a comment. A tokenizing filter can call this to + // determine whether to output the character. + bool betweenTokens(); + + private: + void reset(); + + // Lexer state + enum { st_top, st_in_comment, st_in_string, st_lt, st_gt, + st_literal, st_in_hexstring, st_token_ready } state; + + bool pound_special_in_name; + + // Current token accumulation + token_type_e type; + std::string val; + std::string raw_val; + std::string error_message; + bool unread_char; + char char_to_unread; + + // State for strings + int string_depth; + bool string_ignoring_newline; + char bs_num_register[4]; + bool last_char_was_bs; +}; + +#endif // __QPDFTOKENIZER_HH__ diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh new file mode 100644 index 00000000..f332a227 --- /dev/null +++ b/include/qpdf/QPDFWriter.hh @@ -0,0 +1,243 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +// This class implements a simple writer for saving QPDF objects to +// new PDF files. See comments through the header file for additional +// details. + +#ifndef __QPDFWRITER_HH__ +#define __QPDFWRITER_HH__ + +#include <stdio.h> +#include <string> +#include <list> +#include <vector> +#include <set> +#include <map> + +#include <qpdf/QPDFXRefEntry.hh> + +#include <qpdf/PointerHolder.hh> +#include <qpdf/Pipeline.hh> +#include <qpdf/Buffer.hh> + +class QPDF; +class QPDFObjectHandle; +class Pl_Count; + +class QPDFWriter +{ + public: + // Passing null as filename means write to stdout + QPDFWriter(QPDF& pdf, char const* filename); + ~QPDFWriter(); + + // Set the value of object stream mode. In disable mode, we never + // generate any object streams. In preserve mode, we preserve + // object stream structure from the original file. In generate + // mode, we generate our own object streams. In all cases, we + // generate a conventional cross-reference table if there are no + // object streams and a cross-reference stream if there are object + // streams. The default is o_preserve. + enum object_stream_e { o_disable, o_preserve, o_generate }; + void setObjectStreamMode(object_stream_e); + + // Set value of stream data mode. In uncompress mode, we attempt + // to uncompress any stream that we can. In preserve mode, we + // preserve any filtering applied to streams. In compress mode, + // if we can apply all filters and the stream is not already + // optimally compressed, recompress the stream. + enum stream_data_e { s_uncompress, s_preserve, s_compress }; + void setStreamDataMode(stream_data_e); + + // Set value of content stream normalization. The default is + // "false". If true, we attempt to normalize newlines inside of + // content streams. Some constructs such as inline images may + // thwart our efforts. There may be some cases where this can + // damage the content stream. This flag should be used only for + // debugging and experimenting with PDF content streams. Never + // use it for production files. + void setContentNormalization(bool); + + // Set QDF mode. QDF mode causes special "pretty printing" of + // PDF objects, adds comments for easier perusing of files. + // Resulting PDF files can be edited in a text editor and then run + // through fix-qdf to update cross reference tables and stream + // lengths. + void setQDFMode(bool); + + // Cause a static /ID value to be generated. Use only in test + // suites. + void setStaticID(bool); + + // Preserve encryption. The default is true unless prefilering, + // content normalization, or qdf mode has been selected in which + // case encryption is never preserved. Encryption is also not + // preserved if we explicitly set encryption parameters. + void setPreserveEncryption(bool); + + // Set up for encrypted output. Disables stream prefiltering and + // content normalization. Note that setting R2 encryption + // parameters sets the PDF version to at least 1.3, and setting R3 + // encryption parameters pushes the PDF version number to at least + // 1.4. + void setR2EncryptionParameters( + char const* user_password, char const* owner_password, + bool allow_print, bool allow_modify, + bool allow_extract, bool allow_annotate); + enum r3_print_e + { + r3p_full, // allow all printing + r3p_low, // allow only low-resolution printing + r3p_none // allow no printing + }; + enum r3_modify_e + { + r3m_all, // allow all modification + r3m_annotate, // allow comment authoring and form operations + r3m_form, // allow form field fill-in or signing + r3m_assembly, // allow only document assembly + r3m_none // allow no modification + }; + void setR3EncryptionParameters( + char const* user_password, char const* owner_password, + bool allow_accessibility, bool allow_extract, + r3_print_e print, r3_modify_e modify); + + // Create linearized output. Disables qdf mode, content + // normalization, and stream prefiltering. + void setLinearization(bool); + + void write(); + + private: + // flags used by unparseObject + static int const f_stream = 1 << 0; + static int const f_filtered = 1 << 1; + static int const f_in_ostream = 1 << 2; + + enum trailer_e { t_normal, t_lin_first, t_lin_second }; + + int bytesNeeded(unsigned long n); + void writeBinary(unsigned long val, unsigned int bytes); + void writeString(std::string const& str); + void writeBuffer(PointerHolder<Buffer>&); + void writeStringQDF(std::string const& str); + void writeStringNoQDF(std::string const& str); + void assignCompressedObjectNumbers(int objid); + void enqueueObject(QPDFObjectHandle object); + void writeObjectStreamOffsets(std::vector<int>& offsets, int first_obj); + void writeObjectStream(QPDFObjectHandle object); + void writeObject(QPDFObjectHandle object, int object_stream_index = -1); + void writeTrailer(trailer_e which, int size, + bool xref_stream, int prev = 0); + void unparseObject(QPDFObjectHandle object, int level, + unsigned int flags); + void unparseObject(QPDFObjectHandle object, int level, + unsigned int flags, + // for stream dictionaries + int stream_length, bool compress); + void unparseChild(QPDFObjectHandle child, int level, int flags); + void initializeSpecialStreams(); + void preserveObjectStreams(); + void generateObjectStreams(); + void generateID(); + void setEncryptionParameters( + char const* user_password, char const* owner_password, + int V, int R, int key_len, std::set<int>& bits_to_clear); + void setEncryptionParametersInternal( + int V, int R, int key_len, long P, + std::string const& O, std::string const& U, + std::string const& id1, std::string const& user_password); + void copyEncryptionParameters(); + void setDataKey(int objid); + int openObject(int objid = 0); + void closeObject(int objid); + void writeStandard(); + void writeLinearized(); + void enqueuePart(std::vector<QPDFObjectHandle>& part); + void writeEncryptionDictionary(); + void writeHeader(); + void writeHintStream(int hint_id); + int writeXRefTable(trailer_e which, int first, int last, int size); + int writeXRefTable(trailer_e which, int first, int last, int size, + // for linearization + int prev, + bool suppress_offsets, + int hint_id, + int hint_offset, + int hint_length); + int writeXRefStream(int objid, int max_id, int max_offset, + trailer_e which, int first, int last, int size); + int writeXRefStream(int objid, int max_id, int max_offset, + trailer_e which, int first, int last, int size, + // for linearization + int prev, + int hint_id, + int hint_offset, + int hint_length); + + // When filtering subsections, push additional pipelines to the + // stack. When ready to switch, activate the pipeline stack. + // Pipelines passed to pushPipeline are deleted when + // clearPipelineStack is called. + Pipeline* pushPipeline(Pipeline*); + void activatePipelineStack(); + + // Calls finish on the current pipeline and pops the pipeline + // stack until the top of stack is a previous active top of stack, + // and restores the pipeline to that point. Deletes any piplines + // that it pops. If the bp argument is non-null and any of the + // stack items are of type Pl_Buffer, the buffer is retrieved. + void popPipelineStack(PointerHolder<Buffer>* bp = 0); + + void pushEncryptionFilter(); + void pushDiscardFilter(); + + QPDF& pdf; + char const* filename; + FILE* file; + bool close_file; + bool normalize_content_set; + bool normalize_content; + bool stream_data_mode_set; + stream_data_e stream_data_mode; + bool qdf_mode; + bool static_id; + bool direct_stream_lengths; + bool encrypted; + bool preserve_encryption; + bool linearized; + object_stream_e object_stream_mode; + std::string encryption_key; + std::map<std::string, std::string> encryption_dictionary; + + std::string id1; // for /ID key of + std::string id2; // trailer dictionary + std::string min_pdf_version; + int encryption_dict_objid; + std::string cur_data_key; + std::list<PointerHolder<Pipeline> > to_delete; + Pl_Count* pipeline; + std::list<QPDFObjectHandle> object_queue; + std::map<int, int> obj_renumber; + std::map<int, QPDFXRefEntry> xref; + std::map<int, size_t> lengths; + int next_objid; + int cur_stream_length_id; + int cur_stream_length; + bool added_newline; + int max_ostream_index; + std::set<int> normalized_streams; + std::map<int, int> page_object_to_seq; + std::map<int, int> contents_to_page_seq; + std::map<int, int> object_to_object_stream; + std::map<int, std::set<int> > object_stream_to_objects; + std::list<Pipeline*> pipeline_stack; +}; + +#endif // __QPDFWRITER_HH__ diff --git a/include/qpdf/QPDFXRefEntry.hh b/include/qpdf/QPDFXRefEntry.hh new file mode 100644 index 00000000..4b1db9a2 --- /dev/null +++ b/include/qpdf/QPDFXRefEntry.hh @@ -0,0 +1,34 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QPDFXREFENTRY_HH__ +#define __QPDFXREFENTRY_HH__ + +class QPDFXRefEntry +{ + public: + // Type constants are from the PDF spec section + // "Cross-Reference Streams": + // 0 = free entry; not used + // 1 = "uncompressed"; field 1 = offset + // 2 = "compressed"; field 1 = object stream number, field 2 = index + + QPDFXRefEntry(); + QPDFXRefEntry(int type, int field1, int field2); + + int getType() const; + int getOffset() const; // only for type 1 + int getObjStreamNumber() const; // only for type 2 + int getObjStreamIndex() const; // only for type 2 + + private: + int type; + int field1; + int field2; +}; + +#endif // __QPDFXREFENTRY_HH__ diff --git a/include/qpdf/QTC.hh b/include/qpdf/QTC.hh new file mode 100644 index 00000000..3d9597d4 --- /dev/null +++ b/include/qpdf/QTC.hh @@ -0,0 +1,16 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QTC_HH__ +#define __QTC_HH__ + +namespace QTC +{ + void TC(char const* const scope, char const* const ccase, int n = 0); +}; + +#endif // __QTC_HH__ diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh new file mode 100644 index 00000000..f2b67d92 --- /dev/null +++ b/include/qpdf/QUtil.hh @@ -0,0 +1,45 @@ +// Copyright (c) 2005-2008 Jay Berkenbilt +// +// This file is part of qpdf. This software may be distributed under +// the terms of version 2 of the Artistic License which may be found +// in the source distribution. It is provided "as is" without express +// or implied warranty. + +#ifndef __QUTIL_HH__ +#define __QUTIL_HH__ + +#include <string> +#include <list> +#include <stdio.h> +#include <sys/stat.h> + +#include <qpdf/QEXC.hh> + +namespace QUtil +{ + // This is a collection of useful utility functions that don't + // really go anywhere else. + std::string int_to_string(int, int length = 0); + std::string double_to_string(double, int decimal_places = 0); + + // If status is -1, convert the current value of errno to a + // QEXC::System exception. Otherwise, return status. + int os_wrapper(std::string const& description, int status) + throw (QEXC::System); + + FILE* fopen_wrapper(std::string const&, FILE*) + throw (QEXC::System); + + char* copy_string(std::string const&); + + // Get the value of an environment variable in a portable fashion. + // Returns true iff the variable is defined. If `value' is + // non-null, initializes it with the value of the variable. + bool get_env(std::string const& var, std::string* value = 0); + + // Return a string containing the byte representation of the UTF-8 + // encoding for the unicode value passed in. + std::string toUTF8(unsigned long uval); +}; + +#endif // __QUTIL_HH__ |