aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2012-07-21 15:00:06 +0200
committerJay Berkenbilt <ejb@ql.org>2012-07-21 15:06:10 +0200
commit6bbea4baa0c06b39b1b71f1aa6fc276789296556 (patch)
tree62198136a609c86029d124323be9e2ea72f88d9a
parentf3e267fce28c58039789379ba3488ad12c20a7f6 (diff)
downloadqpdf-6bbea4baa0c06b39b1b71f1aa6fc276789296556.tar.zst
Implement QPDFObjectHandle::parse
Move object parsing code from QPDF to QPDFObjectHandle and parameterize the parts of it that are specific to a QPDF object. Provide a version that can't handle indirect objects and that can be called on an arbitrary string. A side effect of this change is that the offset used when reporting invalid stream length has changed, but since the new value seems like a better value than the old one, the test suite has been updated rather than making the code backward compatible. This only effects the offset reported for invalid streams that lack /Length or have an invalid /Length key. Updated some test code and exmaples to use QPDFObjectHandle::parse. Supporting changes include adding a BufferInputSource constructor that takes a string.
-rw-r--r--ChangeLog10
-rw-r--r--TODO6
-rw-r--r--examples/pdf-create.cc36
-rw-r--r--include/qpdf/BufferInputSource.hh2
-rw-r--r--include/qpdf/QPDF.hh21
-rw-r--r--include/qpdf/QPDFObjectHandle.hh56
-rw-r--r--libqpdf/BufferInputSource.cc12
-rw-r--r--libqpdf/QPDF.cc496
-rw-r--r--libqpdf/QPDFObjectHandle.cc269
-rw-r--r--libqpdf/QPDF_Stream.cc15
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh2
-rw-r--r--qpdf/pdf_from_scratch.cc23
-rw-r--r--qpdf/qpdf.testcov10
-rw-r--r--qpdf/qtest/qpdf.test6
-rw-r--r--qpdf/qtest/qpdf/bad22.out2
-rw-r--r--qpdf/qtest/qpdf/bad23.out2
-rw-r--r--qpdf/qtest/qpdf/parse-object.out4
-rw-r--r--qpdf/test_driver.cc32
18 files changed, 618 insertions, 386 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b76b011..2bb4564a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-07-21 Jay Berkenbilt <ejb@ql.org>
+
+ * Add new method QPDFObjectHandle::replaceDict to replace a
+ stream's dictionary. Use with caution; see comments in
+ QPDFObjectHandle.hh.
+
+ * Add new method QPDFObjectHandle::parse for creation of
+ QPDFObjectHandle objects from string representations of the
+ objects. Thanks to Tobias Hoffmann for the idea.
+
2012-07-15 Jay Berkenbilt <ejb@ql.org>
* add new QPDF::isEncrypted method that returns some additional
diff --git a/TODO b/TODO
index cf752daf..42d5e7a6 100644
--- a/TODO
+++ b/TODO
@@ -20,16 +20,14 @@ Next
* Make sure that the release notes call attention to the one API
breaking change: removal of length from replaceStreamData.
- * Add a way to create new QPDFObjectHandles with a string
- representation of them, such as
- QPDFObjectHandle::parse("<< /a 1 /b 2 >>");
-
* Document thread safety: One individual QPDF or QPDFWriter object
can only be used by one thread at a time, but multiple threads can
simultaneously use separate objects.
* Write some documentation about the design of copyForeignObject.
+ * Mention QPDFObjectHandle::parse in the documentation.
+
* copyForeignObject still to do:
- qpdf command
diff --git a/examples/pdf-create.cc b/examples/pdf-create.cc
index a9ad2389..902c6805 100644
--- a/examples/pdf-create.cc
+++ b/examples/pdf-create.cc
@@ -81,24 +81,28 @@ static void create_pdf(char const* filename)
// Add an indirect object to contain a font descriptor for the
// built-in Helvetica font.
QPDFObjectHandle font = pdf.makeIndirectObject(
- QPDFObjectHandle::newDictionary());
- font.replaceKey("/Type", newName("/Font"));
- font.replaceKey("/Subtype", newName("/Type1"));
- font.replaceKey("/Name", newName("/F1"));
- font.replaceKey("/BaseFont", newName("/Helvetica"));
- font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
+ QPDFObjectHandle::parse(
+ "<<"
+ " /Type /Font"
+ " /Subtype /Type1"
+ " /Name /F1"
+ " /BaseFont /Helvetica"
+ " /Encoding /WinAnsiEncoding"
+ ">>"));
// Create a stream to encode our image. We don't have to set the
// length or filters. QPDFWriter will fill in the length and
// compress the stream data using FlateDecode by default.
QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
- QPDFObjectHandle image_dict = image.getDict();
- image_dict.replaceKey("/Type", newName("/XObject"));
- image_dict.replaceKey("/Subtype", newName("/Image"));
- image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB"));
- image_dict.replaceKey("/BitsPerComponent", newInteger(8));
- image_dict.replaceKey("/Width", newInteger(100));
- image_dict.replaceKey("/Height", newInteger(100));
+ image.replaceDict(QPDFObjectHandle::parse(
+ "<<"
+ " /Type /XObject"
+ " /Subtype /Image"
+ " /ColorSpace /DeviceRGB"
+ " /BitsPerComponent 8"
+ " /Width 100"
+ " /Height 100"
+ ">>"));
// Provide the stream data.
ImageProvider* p = new ImageProvider(100, 100);
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
@@ -107,10 +111,8 @@ static void create_pdf(char const* filename)
QPDFObjectHandle::newNull());
// Create direct objects as needed by the page dictionary.
- QPDFObjectHandle procset = QPDFObjectHandle::newArray();
- procset.appendItem(newName("/PDF"));
- procset.appendItem(newName("/Text"));
- procset.appendItem(newName("/ImageC"));
+ QPDFObjectHandle procset = QPDFObjectHandle::parse(
+ "[/PDF /Text /ImageC]");
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
rfont.replaceKey("/F1", font);
diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh
index 63c14def..48f6e3ac 100644
--- a/include/qpdf/BufferInputSource.hh
+++ b/include/qpdf/BufferInputSource.hh
@@ -9,6 +9,8 @@ class BufferInputSource: public InputSource
public:
BufferInputSource(std::string const& description, Buffer* buf,
bool own_memory = false);
+ BufferInputSource(std::string const& description,
+ std::string const& contents);
virtual ~BufferInputSource();
virtual qpdf_offset_t findAndSkipNextEOL();
virtual std::string const& getName() const;
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index e6ff75b4..dc6e9090 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -531,6 +531,23 @@ class QPDF
std::map<ObjGen, QPDFObjectHandle> foreign_streams;
};
+ class StringDecrypter: public QPDFObjectHandle::StringDecrypter
+ {
+ friend class QPDF;
+
+ public:
+ StringDecrypter(QPDF* qpdf, int objid, int gen);
+ virtual ~StringDecrypter()
+ {
+ }
+ virtual void decryptString(std::string& val);
+
+ private:
+ QPDF* qpdf;
+ int objid;
+ int gen;
+ };
+
void parse(char const* password);
void warn(QPDFExc const& e);
void setTrailer(QPDFObjectHandle obj);
@@ -547,10 +564,6 @@ class QPDF
QPDFObjectHandle readObject(
PointerHolder<InputSource>, std::string const& description,
int objid, int generation, bool in_object_stream);
- QPDFObjectHandle readObjectInternal(
- PointerHolder<InputSource> input, int objid, int generation,
- bool in_object_stream,
- bool in_array, bool in_dictionary);
size_t recoverStreamLength(
PointerHolder<InputSource> input, int objid, int generation,
qpdf_offset_t stream_offset);
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 22ded37e..a1819d99 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -18,6 +18,7 @@
#include <qpdf/PointerHolder.hh>
#include <qpdf/Buffer.hh>
+#include <qpdf/InputSource.hh>
#include <qpdf/QPDFObject.hh>
@@ -25,6 +26,7 @@ class Pipeline;
class QPDF;
class QPDF_Dictionary;
class QPDF_Array;
+class QPDFTokenizer;
class QPDFObjectHandle
{
@@ -57,6 +59,18 @@ class QPDFObjectHandle
Pipeline* pipeline) = 0;
};
+ // This class is used by parse to decrypt strings when reading an
+ // object that contains encrypted strings.
+ class StringDecrypter
+ {
+ public:
+ QPDF_DLL
+ virtual ~StringDecrypter()
+ {
+ }
+ virtual void decryptString(std::string& val) = 0;
+ };
+
QPDF_DLL
QPDFObjectHandle();
QPDF_DLL
@@ -95,6 +109,30 @@ class QPDFObjectHandle
// Public factory methods
+ // Construct an object of any type from a string representation of
+ // the object. Throws QPDFExc with an empty filename and an
+ // offset into the string if there is an error. Any indirect
+ // object syntax (obj gen R) will cause a logic_error exception to
+ // be thrown. If object_description is provided, it will appear
+ // in the message of any QPDFExc exception thrown for invalid
+ // syntax.
+ QPDF_DLL
+ static QPDFObjectHandle parse(std::string const& object_str,
+ std::string const& object_description = "");
+
+ // Construct an object as above by reading from the given
+ // InputSource at its current position and using the tokenizer you
+ // supply. Indirect objects and encrypted strings are permitted.
+ // This method is intended to be called by QPDF for parsing
+ // objects that are ready from the object's input stream.
+ QPDF_DLL
+ static QPDFObjectHandle parse(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer&, bool& empty,
+ StringDecrypter* decrypter,
+ QPDF* context);
+
+ // Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();
QPDF_DLL
@@ -124,7 +162,8 @@ class QPDFObjectHandle
// object. A subsequent call must be made to replaceStreamData()
// to provide data for the stream. The stream's dictionary may be
// retrieved by calling getDict(), and the resulting dictionary
- // may be modified.
+ // may be modified. Alternatively, you can create a new
+ // dictionary and call replaceDict to install it.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf);
@@ -303,6 +342,15 @@ class QPDFObjectHandle
bool pipeStreamData(Pipeline*, bool filter,
bool normalize, bool compress);
+ // Replace a stream's dictionary. The new dictionary must be
+ // consistent with the stream's data. This is most appropriately
+ // used when creating streams from scratch that will use a stream
+ // data provider and therefore start with an empty dictionary. It
+ // may be more convenient in this case than calling getDict and
+ // modifying it for each key. The pdf-create example does this.
+ QPDF_DLL
+ void replaceDict(QPDFObjectHandle);
+
// Replace this stream's stream data with the given data buffer,
// and replace the /Filter and /DecodeParms keys in the stream
// dictionary with the given values. (If either value is empty,
@@ -489,6 +537,12 @@ class QPDFObjectHandle
void dereference();
void makeDirectInternal(std::set<int>& visited);
void releaseResolved();
+ static QPDFObjectHandle parseInternal(
+ PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context,
+ bool in_array, bool in_dictionary);
bool initialized;
diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc
index 6909dce2..03439955 100644
--- a/libqpdf/BufferInputSource.cc
+++ b/libqpdf/BufferInputSource.cc
@@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description,
{
}
+BufferInputSource::BufferInputSource(std::string const& description,
+ std::string const& contents) :
+ own_memory(true),
+ description(description),
+ buf(0),
+ cur_offset(0)
+{
+ this->buf = new Buffer(contents.length());
+ unsigned char* bp = buf->getBuffer();
+ memcpy(bp, (char*)contents.c_str(), contents.length());
+}
+
BufferInputSource::~BufferInputSource()
{
if (own_memory)
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index e1ce5fc4..bee2f3ee 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
this->foreign_streams[local_og] = foreign_stream;
}
+QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
+ qpdf(qpdf),
+ objid(objid),
+ gen(gen)
+{
+}
+
+void
+QPDF::StringDecrypter::decryptString(std::string& val)
+{
+ qpdf->decryptString(val, objid, gen);
+}
std::string const&
QPDF::QPDFVersion()
@@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder<InputSource> input,
{
setLastObjectDescription(description, objid, generation);
qpdf_offset_t offset = input->tell();
- QPDFObjectHandle object = readObjectInternal(
- input, objid, generation, in_object_stream, false, false);
- // Override last_offset so that it points to the beginning of the
- // object we just read
- input->setLastOffset(offset);
- return object;
-}
-
-QPDFObjectHandle
-QPDF::readObjectInternal(PointerHolder<InputSource> input,
- int objid, int generation,
- bool in_object_stream,
- bool in_array, bool in_dictionary)
-{
- if (in_dictionary && in_array)
- {
- // Although dictionaries and arrays arbitrarily nest, these
- // variables indicate what is at the top of the stack right
- // now, so they can, by definition, never both be true.
- throw std::logic_error(
- "INTERNAL ERROR: readObjectInternal: in_dict && in_array");
- }
-
- QPDFObjectHandle object;
- qpdf_offset_t offset = input->tell();
- std::vector<QPDFObjectHandle> olist;
- bool done = false;
- while (! done)
+ bool empty = false;
+ PointerHolder<StringDecrypter> decrypter_ph;
+ StringDecrypter* decrypter = 0;
+ if (this->encrypted && (! in_object_stream))
{
- object = QPDFObjectHandle();
-
- QPDFTokenizer::Token token = readToken(input);
-
- switch (token.getType())
- {
- case QPDFTokenizer::tt_brace_open:
- case QPDFTokenizer::tt_brace_close:
- // Don't know what to do with these for now
- QTC::TC("qpdf", "QPDF bad brace");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected brace token");
- break;
-
- case QPDFTokenizer::tt_array_close:
- if (in_array)
- {
- done = true;
- }
- else
- {
- QTC::TC("qpdf", "QPDF bad array close");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected array close token");
- }
- break;
-
- case QPDFTokenizer::tt_dict_close:
- if (in_dictionary)
- {
- done = true;
- }
- else
- {
- QTC::TC("qpdf", "QPDF bad dictionary close");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected dictionary close token");
- }
- break;
-
- case QPDFTokenizer::tt_array_open:
- object = readObjectInternal(
- input, objid, generation, in_object_stream, true, false);
- break;
-
- case QPDFTokenizer::tt_dict_open:
- object = readObjectInternal(
- input, objid, generation, in_object_stream, false, true);
- break;
-
- case QPDFTokenizer::tt_bool:
- object = QPDFObjectHandle::newBool(
- (token.getValue() == "true"));
- break;
-
- case QPDFTokenizer::tt_null:
- object = QPDFObjectHandle::newNull();
- break;
-
- case QPDFTokenizer::tt_integer:
- object = QPDFObjectHandle::newInteger(
- QUtil::string_to_ll(token.getValue().c_str()));
- break;
-
- case QPDFTokenizer::tt_real:
- object = QPDFObjectHandle::newReal(token.getValue());
- break;
-
- case QPDFTokenizer::tt_name:
- object = QPDFObjectHandle::newName(token.getValue());
- break;
-
- case QPDFTokenizer::tt_word:
- {
- std::string const& value = token.getValue();
- if ((value == "R") && (in_array || in_dictionary) &&
- (olist.size() >= 2) &&
- (olist[olist.size() - 1].isInteger()) &&
- (olist[olist.size() - 2].isInteger()))
- {
- // Try to resolve indirect objects
- object = QPDFObjectHandle::Factory::newIndirect(
- this,
- olist[olist.size() - 2].getIntValue(),
- olist[olist.size() - 1].getIntValue());
- olist.pop_back();
- olist.pop_back();
- }
- else if ((value == "endobj") &&
- (! (in_array || in_dictionary)))
- {
- // Nothing in the PDF spec appears to allow empty
- // objects, but they have been encountered in
- // actual PDF files and Adobe Reader appears to
- // ignore them.
- warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "empty object treated as null"));
- object = QPDFObjectHandle::newNull();
- input->seek(input->getLastOffset(), SEEK_SET);
- }
- else
- {
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unknown token while reading object (" +
- value + ")");
- }
- }
- break;
-
- case QPDFTokenizer::tt_string:
- {
- std::string val = token.getValue();
- if (this->encrypted && (! in_object_stream))
- {
- decryptString(val, objid, generation);
- }
- object = QPDFObjectHandle::newString(val);
- }
- break;
-
- default:
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unknown token type while reading object");
- break;
- }
-
- if (in_dictionary || in_array)
- {
- if (! done)
- {
- olist.push_back(object);
- }
- }
- else if (! object.isInitialized())
- {
- throw std::logic_error(
- "INTERNAL ERROR: uninitialized object (token = " +
- QUtil::int_to_string(token.getType()) +
- ", " + token.getValue() + ")");
- }
- else
- {
- done = true;
- }
+ decrypter_ph = new StringDecrypter(this, objid, generation);
+ decrypter = decrypter_ph.getPointer();
}
-
- if (in_array)
+ QPDFObjectHandle object = QPDFObjectHandle::parse(
+ input, description, this->tokenizer, empty, decrypter, this);
+ if (empty)
{
- object = QPDFObjectHandle::newArray(olist);
+ // Nothing in the PDF spec appears to allow empty objects, but
+ // they have been encountered in actual PDF files and Adobe
+ // Reader appears to ignore them.
+ warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->getLastOffset(),
+ "empty object treated as null"));
}
- else if (in_dictionary)
+ else if (object.isDictionary() && (! in_object_stream))
{
- // Convert list to map. Alternating elements are keys.
- std::map<std::string, QPDFObjectHandle> dict;
- if (olist.size() % 2)
- {
- QTC::TC("qpdf", "QPDF dictionary odd number of elements");
- throw QPDFExc(
- qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, input->getLastOffset(),
- "dictionary ending here has an odd number of elements");
- }
- for (unsigned int i = 0; i < olist.size(); i += 2)
- {
- QPDFObjectHandle key_obj = olist[i];
- QPDFObjectHandle val = olist[i + 1];
- if (! key_obj.isName())
- {
- throw QPDFExc(
- qpdf_e_damaged_pdf,
- input->getName(), this->last_object_description, offset,
- std::string("dictionary key not name (") +
- key_obj.unparse() + ")");
- }
- dict[key_obj.getName()] = val;
- }
- object = QPDFObjectHandle::newDictionary(dict);
+ // check for stream
+ qpdf_offset_t cur_offset = input->tell();
+ if (readToken(input) ==
+ QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
+ {
+ // The PDF specification states that the word "stream"
+ // should be followed by either a carriage return and
+ // a newline or by a newline alone. It specifically
+ // disallowed following it by a carriage return alone
+ // since, in that case, there would be no way to tell
+ // whether the NL in a CR NL sequence was part of the
+ // stream data. However, some readers, including
+ // Adobe reader, accept a carriage return by itself
+ // when followed by a non-newline character, so that's
+ // what we do here.
+ {
+ char ch;
+ if (input->read(&ch, 1) == 0)
+ {
+ // A premature EOF here will result in some
+ // other problem that will get reported at
+ // another time.
+ }
+ else if (ch == '\n')
+ {
+ // ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with NL only");
+ }
+ else if (ch == '\r')
+ {
+ // Read another character
+ if (input->read(&ch, 1) != 0)
+ {
+ if (ch == '\n')
+ {
+ // Ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with CRNL");
+ }
+ else
+ {
+ // Treat the \r by itself as the
+ // whitespace after endstream and
+ // start reading stream data in spite
+ // of not having seen a newline.
+ QTC::TC("qpdf", "QPDF stream with CR only");
+ input->unreadCh(ch);
+ warn(QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword followed"
+ " by carriage return only"));
+ }
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF stream without newline");
+ warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword not followed"
+ " by proper line terminator"));
+ }
+ }
- if (! in_object_stream)
- {
- // check for stream
- qpdf_offset_t cur_offset = input->tell();
- if (readToken(input) ==
- QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
- {
- // The PDF specification states that the word "stream"
- // should be followed by either a carriage return and
- // a newline or by a newline alone. It specifically
- // disallowed following it by a carriage return alone
- // since, in that case, there would be no way to tell
- // whether the NL in a CR NL sequence was part of the
- // stream data. However, some readers, including
- // Adobe reader, accept a carriage return by itself
- // when followed by a non-newline character, so that's
- // what we do here.
- {
- char ch;
- if (input->read(&ch, 1) == 0)
- {
- // A premature EOF here will result in some
- // other problem that will get reported at
- // another time.
- }
- else if (ch == '\n')
- {
- // ready to read stream data
- QTC::TC("qpdf", "QPDF stream with NL only");
- }
- else if (ch == '\r')
- {
- // Read another character
- if (input->read(&ch, 1) != 0)
- {
- if (ch == '\n')
- {
- // Ready to read stream data
- QTC::TC("qpdf", "QPDF stream with CRNL");
- }
- else
- {
- // Treat the \r by itself as the
- // whitespace after endstream and
- // start reading stream data in spite
- // of not having seen a newline.
- QTC::TC("qpdf", "QPDF stream with CR only");
- input->unreadCh(ch);
- warn(QPDFExc(
- qpdf_e_damaged_pdf,
- input->getName(),
- this->last_object_description,
- input->tell(),
- "stream keyword followed"
- " by carriage return only"));
- }
- }
- }
- else
- {
- QTC::TC("qpdf", "QPDF stream without newline");
- warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->tell(),
- "stream keyword not followed"
- " by proper line terminator"));
- }
- }
+ // Must get offset before accessing any additional
+ // objects since resolving a previously unresolved
+ // indirect object will change file position.
+ qpdf_offset_t stream_offset = input->tell();
+ size_t length = 0;
- // Must get offset before accessing any additional
- // objects since resolving a previously unresolved
- // indirect object will change file position.
- qpdf_offset_t stream_offset = input->tell();
- size_t length = 0;
+ try
+ {
+ std::map<std::string, QPDFObjectHandle> dict =
+ object.getDictAsMap();
- try
- {
- if (dict.count("/Length") == 0)
- {
- QTC::TC("qpdf", "QPDF stream without length");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, offset,
- "stream dictionary lacks /Length key");
- }
+ if (dict.count("/Length") == 0)
+ {
+ QTC::TC("qpdf", "QPDF stream without length");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description, offset,
+ "stream dictionary lacks /Length key");
+ }
- QPDFObjectHandle length_obj = dict["/Length"];
- if (! length_obj.isInteger())
- {
- QTC::TC("qpdf", "QPDF stream length not integer");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, offset,
- "/Length key in stream dictionary is not "
- "an integer");
- }
+ QPDFObjectHandle length_obj = dict["/Length"];
+ if (! length_obj.isInteger())
+ {
+ QTC::TC("qpdf", "QPDF stream length not integer");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description, offset,
+ "/Length key in stream dictionary is not "
+ "an integer");
+ }
- length = length_obj.getIntValue();
- input->seek(
- stream_offset + (qpdf_offset_t)length, SEEK_SET);
- if (! (readToken(input) ==
- QPDFTokenizer::Token(
- QPDFTokenizer::tt_word, "endstream")))
- {
- QTC::TC("qpdf", "QPDF missing endstream");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "expected endstream");
- }
- }
- catch (QPDFExc& e)
- {
- if (this->attempt_recovery)
- {
- // may throw an exception
- length = recoverStreamLength(
- input, objid, generation, stream_offset);
- }
- else
- {
- throw e;
- }
- }
- object = QPDFObjectHandle::Factory::newStream(
- this, objid, generation, object, stream_offset, length);
- }
- else
- {
- input->seek(cur_offset, SEEK_SET);
- }
- }
+ length = length_obj.getIntValue();
+ input->seek(
+ stream_offset + (qpdf_offset_t)length, SEEK_SET);
+ if (! (readToken(input) ==
+ QPDFTokenizer::Token(
+ QPDFTokenizer::tt_word, "endstream")))
+ {
+ QTC::TC("qpdf", "QPDF missing endstream");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->getLastOffset(),
+ "expected endstream");
+ }
+ }
+ catch (QPDFExc& e)
+ {
+ if (this->attempt_recovery)
+ {
+ // may throw an exception
+ length = recoverStreamLength(
+ input, objid, generation, stream_offset);
+ }
+ else
+ {
+ throw e;
+ }
+ }
+ object = QPDFObjectHandle::Factory::newStream(
+ this, objid, generation, object, stream_offset, length);
+ }
+ else
+ {
+ input->seek(cur_offset, SEEK_SET);
+ }
}
+ // Override last_offset so that it points to the beginning of the
+ // object we just read
+ input->setLastOffset(offset);
return object;
}
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 642dee69..6bb182e8 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -11,12 +11,15 @@
#include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_Reserved.hh>
+#include <qpdf/BufferInputSource.hh>
+#include <qpdf/QPDFExc.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <stdexcept>
#include <stdlib.h>
+#include <ctype.h>
QPDFObjectHandle::QPDFObjectHandle() :
initialized(false),
@@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
}
+void
+QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
+{
+ assertStream();
+ dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
+}
+
PointerHolder<Buffer>
QPDFObjectHandle::getStreamData()
{
@@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved()
}
QPDFObjectHandle
+QPDFObjectHandle::parse(std::string const& object_str,
+ std::string const& object_description)
+{
+ PointerHolder<InputSource> input =
+ new BufferInputSource("parsed object", object_str);
+ QPDFTokenizer tokenizer;
+ bool empty = false;
+ QPDFObjectHandle result =
+ parse(input, object_description, tokenizer, empty, 0, 0);
+ size_t offset = (size_t) input->tell();
+ while (offset < object_str.length())
+ {
+ if (! isspace(object_str[offset]))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "trailing data found parsing object from string");
+ }
+ ++offset;
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parse(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context)
+{
+ return parseInternal(input, object_description, tokenizer, empty,
+ decrypter, context, false, false);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context,
+ bool in_array, bool in_dictionary)
+{
+ empty = false;
+ if (in_dictionary && in_array)
+ {
+ // Although dictionaries and arrays arbitrarily nest, these
+ // variables indicate what is at the top of the stack right
+ // now, so they can, by definition, never both be true.
+ throw std::logic_error(
+ "INTERNAL ERROR: parseInternal: in_dict && in_array");
+ }
+
+ QPDFObjectHandle object;
+
+ qpdf_offset_t offset = input->tell();
+ std::vector<QPDFObjectHandle> olist;
+ bool done = false;
+ while (! done)
+ {
+ object = QPDFObjectHandle();
+
+ QPDFTokenizer::Token token =
+ tokenizer.readToken(input, object_description);
+
+ switch (token.getType())
+ {
+ case QPDFTokenizer::tt_brace_open:
+ case QPDFTokenizer::tt_brace_close:
+ // Don't know what to do with these for now
+ QTC::TC("qpdf", "QPDFObjectHandle bad brace");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected brace token");
+ break;
+
+ case QPDFTokenizer::tt_array_close:
+ if (in_array)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad array close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected array close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_dict_close:
+ if (in_dictionary)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected dictionary close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_array_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, true, false);
+ break;
+
+ case QPDFTokenizer::tt_dict_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, false, true);
+ break;
+
+ case QPDFTokenizer::tt_bool:
+ object = newBool((token.getValue() == "true"));
+ break;
+
+ case QPDFTokenizer::tt_null:
+ object = newNull();
+ break;
+
+ case QPDFTokenizer::tt_integer:
+ object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
+ break;
+
+ case QPDFTokenizer::tt_real:
+ object = newReal(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_name:
+ object = newName(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_word:
+ {
+ std::string const& value = token.getValue();
+ if ((value == "R") && (in_array || in_dictionary) &&
+ (olist.size() >= 2) &&
+ (olist[olist.size() - 1].isInteger()) &&
+ (olist[olist.size() - 2].isInteger()))
+ {
+ if (context == 0)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
+ throw std::logic_error(
+ "QPDFObjectHandle::parse called without context"
+ " on an object with indirect references");
+ }
+ // Try to resolve indirect objects
+ object = newIndirect(
+ context,
+ olist[olist.size() - 2].getIntValue(),
+ olist[olist.size() - 1].getIntValue());
+ olist.pop_back();
+ olist.pop_back();
+ }
+ else if ((value == "endobj") &&
+ (! (in_array || in_dictionary)))
+ {
+ // We just saw endobj without having read
+ // anything. Treat this as a null and do not move
+ // the input source's offset.
+ object = newNull();
+ input->seek(input->getLastOffset(), SEEK_SET);
+ empty = true;
+ }
+ else
+ {
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token while reading object (" +
+ value + ")");
+ }
+ }
+ break;
+
+ case QPDFTokenizer::tt_string:
+ {
+ std::string val = token.getValue();
+ if (decrypter)
+ {
+ decrypter->decryptString(val);
+ }
+ object = QPDFObjectHandle::newString(val);
+ }
+
+ break;
+
+ default:
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token type while reading object");
+ break;
+ }
+
+ if (in_dictionary || in_array)
+ {
+ if (! done)
+ {
+ olist.push_back(object);
+ }
+ }
+ else if (! object.isInitialized())
+ {
+ throw std::logic_error(
+ "INTERNAL ERROR: uninitialized object (token = " +
+ QUtil::int_to_string(token.getType()) +
+ ", " + token.getValue() + ")");
+ }
+ else
+ {
+ done = true;
+ }
+ }
+
+ if (in_array)
+ {
+ object = newArray(olist);
+ }
+ else if (in_dictionary)
+ {
+ // Convert list to map. Alternating elements are keys.
+ std::map<std::string, QPDFObjectHandle> dict;
+ if (olist.size() % 2)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
+ throw QPDFExc(
+ qpdf_e_damaged_pdf, input->getName(),
+ object_description, input->getLastOffset(),
+ "dictionary ending here has an odd number of elements");
+ }
+ for (unsigned int i = 0; i < olist.size(); i += 2)
+ {
+ QPDFObjectHandle key_obj = olist[i];
+ QPDFObjectHandle val = olist[i + 1];
+ if (! key_obj.isName())
+ {
+ throw QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(), object_description, offset,
+ std::string("dictionary key not name (") +
+ key_obj.unparse() + ")");
+ }
+ dict[key_obj.getName()] = val;
+ }
+ object = newDictionary(dict);
+ }
+
+ return object;
+}
+
+QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{
return QPDFObjectHandle(qpdf, objid, generation);
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index c089bcc1..970ee58b 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
"/Length", QPDFObjectHandle::newInteger((int)length));
}
}
+
+void
+QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
+{
+ this->stream_dict = new_dict;
+ QPDFObjectHandle length_obj = new_dict.getKey("/Length");
+ if (length_obj.isInteger())
+ {
+ this->length = length_obj.getIntValue();
+ }
+ else
+ {
+ this->length = 0;
+ }
+}
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index ce46d994..34eaceeb 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
+ void replaceDict(QPDFObjectHandle new_dict);
+
// Replace object ID and generation. This may only be called if
// object ID and generation are 0. It is used by QPDFObjectHandle
// when adding streams to files.
diff --git a/qpdf/pdf_from_scratch.cc b/qpdf/pdf_from_scratch.cc
index 2f853c24..10ef5512 100644
--- a/qpdf/pdf_from_scratch.cc
+++ b/qpdf/pdf_from_scratch.cc
@@ -38,25 +38,20 @@ void runtest(int n)
// Create a minimal PDF from scratch.
QPDFObjectHandle font = pdf.makeIndirectObject(
- QPDFObjectHandle::newDictionary());
- font.replaceKey("/Type", newName("/Font"));
- font.replaceKey("/Subtype", newName("/Type1"));
- font.replaceKey("/Name", newName("/F1"));
- font.replaceKey("/BaseFont", newName("/Helvetica"));
- font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
+ QPDFObjectHandle::parse("<<"
+ " /Type /Font"
+ " /Subtype /Type1"
+ " /Name /F1"
+ " /BaseFont /Helvetica"
+ " /Encoding /WinAnsiEncoding"
+ ">>"));
QPDFObjectHandle procset = pdf.makeIndirectObject(
- QPDFObjectHandle::newArray());
- procset.appendItem(newName("/PDF"));
- procset.appendItem(newName("/Text"));
+ QPDFObjectHandle::parse("[/PDF /Text]"));
QPDFObjectHandle contents = createPageContents(pdf, "First Page");
- QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();
- mediabox.appendItem(QPDFObjectHandle::newInteger(0));
- mediabox.appendItem(QPDFObjectHandle::newInteger(0));
- mediabox.appendItem(QPDFObjectHandle::newInteger(612));
- mediabox.appendItem(QPDFObjectHandle::newInteger(792));
+ QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]");
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
rfont.replaceKey("/F1", font);
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index c51e527b..ae771c6d 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -60,13 +60,13 @@ QPDF missing trailer 0
QPDF trailer lacks size 0
QPDF trailer size not integer 0
QPDF trailer prev not integer 0
-QPDF bad brace 0
-QPDF bad array close 0
-QPDF dictionary odd number of elements 0
+QPDFObjectHandle bad brace 0
+QPDFObjectHandle bad array close 0
+QPDFObjectHandle dictionary odd number of elements 0
QPDF stream without length 0
QPDF stream length not integer 0
QPDF missing endstream 0
-QPDF bad dictionary close 0
+QPDFObjectHandle bad dictionary close 0
QPDF can't find xref 0
QPDF_Tokenizer bad ) 0
QPDF_Tokenizer bad > 0
@@ -235,3 +235,5 @@ QPDF not copying pages object 0
QPDF insert foreign page 0
QPDFWriter foreign object 0
QPDFWriter copy use_aes 1
+QPDFObjectHandle indirect without context 0
+QPDFObjectHandle trailing data in parse 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 1b979724..02a90736 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -149,7 +149,7 @@ $td->runtest("remove page we don't have",
$td->NORMALIZE_NEWLINES);
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 44;
+$n_tests += 45;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write",
" copy-foreign-objects-in.pdf minimal.pdf"},
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+$td->runtest("parse objects from string",
+ {$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used
+ {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
diff --git a/qpdf/qtest/qpdf/bad22.out b/qpdf/qtest/qpdf/bad22.out
index 2ff4de23..ec6d5f8e 100644
--- a/qpdf/qtest/qpdf/bad22.out
+++ b/qpdf/qtest/qpdf/bad22.out
@@ -1 +1 @@
-bad22.pdf (object 4 0, file position 317): stream dictionary lacks /Length key
+bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key
diff --git a/qpdf/qtest/qpdf/bad23.out b/qpdf/qtest/qpdf/bad23.out
index 9ff20de3..b4cf25e8 100644
--- a/qpdf/qtest/qpdf/bad23.out
+++ b/qpdf/qtest/qpdf/bad23.out
@@ -1 +1 @@
-bad23.pdf (object 4 0, file position 317): /Length key in stream dictionary is not an integer
+bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer
diff --git a/qpdf/qtest/qpdf/parse-object.out b/qpdf/qtest/qpdf/parse-object.out
new file mode 100644
index 00000000..456e2f80
--- /dev/null
+++ b/qpdf/qtest/qpdf/parse-object.out
@@ -0,0 +1,4 @@
+[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
+logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
+trailing data: parsed object (trailing test): trailing data found parsing object from string
+test 31 done
diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc
index 3d2f0dca..6cbb7882 100644
--- a/qpdf/test_driver.cc
+++ b/qpdf/test_driver.cc
@@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2)
<< std::endl;
}
}
+ else if (n == 31)
+ {
+ // Test object parsing from a string. The input file is not used.
+
+ QPDFObjectHandle o1 =
+ QPDFObjectHandle::parse(
+ "[/name 16059 3.14159 false\n"
+ " << /key true /other [ (string1) (string2) ] >> null]");
+ std::cout << o1.unparse() << std::endl;
+ QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f ");
+ assert(o2.isInteger() && (o2.getIntValue() == 12345));
+ try
+ {
+ QPDFObjectHandle::parse("[1 0 R]", "indirect test");
+ std::cout << "oops -- didn't throw" << std::endl;
+ }
+ catch (std::logic_error e)
+ {
+ std::cout << "logic error parsing indirect: " << e.what()
+ << std::endl;
+ }
+ try
+ {
+ QPDFObjectHandle::parse("0 trailing", "trailing test");
+ std::cout << "oops -- didn't throw" << std::endl;
+ }
+ catch (std::runtime_error e)
+ {
+ std::cout << "trailing data: " << e.what()
+ << std::endl;
+ }
+ }
else
{
throw std::runtime_error(std::string("invalid test ") +