summaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2012-07-21 15:00:06 +0200
committerJay Berkenbilt <ejb@ql.org>2012-07-21 15:06:10 +0200
commit6bbea4baa0c06b39b1b71f1aa6fc276789296556 (patch)
tree62198136a609c86029d124323be9e2ea72f88d9a /libqpdf
parentf3e267fce28c58039789379ba3488ad12c20a7f6 (diff)
downloadqpdf-6bbea4baa0c06b39b1b71f1aa6fc276789296556.tar.zst
Implement QPDFObjectHandle::parse
Move object parsing code from QPDF to QPDFObjectHandle and parameterize the parts of it that are specific to a QPDF object. Provide a version that can't handle indirect objects and that can be called on an arbitrary string. A side effect of this change is that the offset used when reporting invalid stream length has changed, but since the new value seems like a better value than the old one, the test suite has been updated rather than making the code backward compatible. This only effects the offset reported for invalid streams that lack /Length or have an invalid /Length key. Updated some test code and exmaples to use QPDFObjectHandle::parse. Supporting changes include adding a BufferInputSource constructor that takes a string.
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/BufferInputSource.cc12
-rw-r--r--libqpdf/QPDF.cc496
-rw-r--r--libqpdf/QPDFObjectHandle.cc269
-rw-r--r--libqpdf/QPDF_Stream.cc15
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh2
5 files changed, 455 insertions, 339 deletions
diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc
index 6909dce2..03439955 100644
--- a/libqpdf/BufferInputSource.cc
+++ b/libqpdf/BufferInputSource.cc
@@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description,
{
}
+BufferInputSource::BufferInputSource(std::string const& description,
+ std::string const& contents) :
+ own_memory(true),
+ description(description),
+ buf(0),
+ cur_offset(0)
+{
+ this->buf = new Buffer(contents.length());
+ unsigned char* bp = buf->getBuffer();
+ memcpy(bp, (char*)contents.c_str(), contents.length());
+}
+
BufferInputSource::~BufferInputSource()
{
if (own_memory)
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index e1ce5fc4..bee2f3ee 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
this->foreign_streams[local_og] = foreign_stream;
}
+QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
+ qpdf(qpdf),
+ objid(objid),
+ gen(gen)
+{
+}
+
+void
+QPDF::StringDecrypter::decryptString(std::string& val)
+{
+ qpdf->decryptString(val, objid, gen);
+}
std::string const&
QPDF::QPDFVersion()
@@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder<InputSource> input,
{
setLastObjectDescription(description, objid, generation);
qpdf_offset_t offset = input->tell();
- QPDFObjectHandle object = readObjectInternal(
- input, objid, generation, in_object_stream, false, false);
- // Override last_offset so that it points to the beginning of the
- // object we just read
- input->setLastOffset(offset);
- return object;
-}
-
-QPDFObjectHandle
-QPDF::readObjectInternal(PointerHolder<InputSource> input,
- int objid, int generation,
- bool in_object_stream,
- bool in_array, bool in_dictionary)
-{
- if (in_dictionary && in_array)
- {
- // Although dictionaries and arrays arbitrarily nest, these
- // variables indicate what is at the top of the stack right
- // now, so they can, by definition, never both be true.
- throw std::logic_error(
- "INTERNAL ERROR: readObjectInternal: in_dict && in_array");
- }
-
- QPDFObjectHandle object;
- qpdf_offset_t offset = input->tell();
- std::vector<QPDFObjectHandle> olist;
- bool done = false;
- while (! done)
+ bool empty = false;
+ PointerHolder<StringDecrypter> decrypter_ph;
+ StringDecrypter* decrypter = 0;
+ if (this->encrypted && (! in_object_stream))
{
- object = QPDFObjectHandle();
-
- QPDFTokenizer::Token token = readToken(input);
-
- switch (token.getType())
- {
- case QPDFTokenizer::tt_brace_open:
- case QPDFTokenizer::tt_brace_close:
- // Don't know what to do with these for now
- QTC::TC("qpdf", "QPDF bad brace");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected brace token");
- break;
-
- case QPDFTokenizer::tt_array_close:
- if (in_array)
- {
- done = true;
- }
- else
- {
- QTC::TC("qpdf", "QPDF bad array close");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected array close token");
- }
- break;
-
- case QPDFTokenizer::tt_dict_close:
- if (in_dictionary)
- {
- done = true;
- }
- else
- {
- QTC::TC("qpdf", "QPDF bad dictionary close");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unexpected dictionary close token");
- }
- break;
-
- case QPDFTokenizer::tt_array_open:
- object = readObjectInternal(
- input, objid, generation, in_object_stream, true, false);
- break;
-
- case QPDFTokenizer::tt_dict_open:
- object = readObjectInternal(
- input, objid, generation, in_object_stream, false, true);
- break;
-
- case QPDFTokenizer::tt_bool:
- object = QPDFObjectHandle::newBool(
- (token.getValue() == "true"));
- break;
-
- case QPDFTokenizer::tt_null:
- object = QPDFObjectHandle::newNull();
- break;
-
- case QPDFTokenizer::tt_integer:
- object = QPDFObjectHandle::newInteger(
- QUtil::string_to_ll(token.getValue().c_str()));
- break;
-
- case QPDFTokenizer::tt_real:
- object = QPDFObjectHandle::newReal(token.getValue());
- break;
-
- case QPDFTokenizer::tt_name:
- object = QPDFObjectHandle::newName(token.getValue());
- break;
-
- case QPDFTokenizer::tt_word:
- {
- std::string const& value = token.getValue();
- if ((value == "R") && (in_array || in_dictionary) &&
- (olist.size() >= 2) &&
- (olist[olist.size() - 1].isInteger()) &&
- (olist[olist.size() - 2].isInteger()))
- {
- // Try to resolve indirect objects
- object = QPDFObjectHandle::Factory::newIndirect(
- this,
- olist[olist.size() - 2].getIntValue(),
- olist[olist.size() - 1].getIntValue());
- olist.pop_back();
- olist.pop_back();
- }
- else if ((value == "endobj") &&
- (! (in_array || in_dictionary)))
- {
- // Nothing in the PDF spec appears to allow empty
- // objects, but they have been encountered in
- // actual PDF files and Adobe Reader appears to
- // ignore them.
- warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "empty object treated as null"));
- object = QPDFObjectHandle::newNull();
- input->seek(input->getLastOffset(), SEEK_SET);
- }
- else
- {
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unknown token while reading object (" +
- value + ")");
- }
- }
- break;
-
- case QPDFTokenizer::tt_string:
- {
- std::string val = token.getValue();
- if (this->encrypted && (! in_object_stream))
- {
- decryptString(val, objid, generation);
- }
- object = QPDFObjectHandle::newString(val);
- }
- break;
-
- default:
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "unknown token type while reading object");
- break;
- }
-
- if (in_dictionary || in_array)
- {
- if (! done)
- {
- olist.push_back(object);
- }
- }
- else if (! object.isInitialized())
- {
- throw std::logic_error(
- "INTERNAL ERROR: uninitialized object (token = " +
- QUtil::int_to_string(token.getType()) +
- ", " + token.getValue() + ")");
- }
- else
- {
- done = true;
- }
+ decrypter_ph = new StringDecrypter(this, objid, generation);
+ decrypter = decrypter_ph.getPointer();
}
-
- if (in_array)
+ QPDFObjectHandle object = QPDFObjectHandle::parse(
+ input, description, this->tokenizer, empty, decrypter, this);
+ if (empty)
{
- object = QPDFObjectHandle::newArray(olist);
+ // Nothing in the PDF spec appears to allow empty objects, but
+ // they have been encountered in actual PDF files and Adobe
+ // Reader appears to ignore them.
+ warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->getLastOffset(),
+ "empty object treated as null"));
}
- else if (in_dictionary)
+ else if (object.isDictionary() && (! in_object_stream))
{
- // Convert list to map. Alternating elements are keys.
- std::map<std::string, QPDFObjectHandle> dict;
- if (olist.size() % 2)
- {
- QTC::TC("qpdf", "QPDF dictionary odd number of elements");
- throw QPDFExc(
- qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, input->getLastOffset(),
- "dictionary ending here has an odd number of elements");
- }
- for (unsigned int i = 0; i < olist.size(); i += 2)
- {
- QPDFObjectHandle key_obj = olist[i];
- QPDFObjectHandle val = olist[i + 1];
- if (! key_obj.isName())
- {
- throw QPDFExc(
- qpdf_e_damaged_pdf,
- input->getName(), this->last_object_description, offset,
- std::string("dictionary key not name (") +
- key_obj.unparse() + ")");
- }
- dict[key_obj.getName()] = val;
- }
- object = QPDFObjectHandle::newDictionary(dict);
+ // check for stream
+ qpdf_offset_t cur_offset = input->tell();
+ if (readToken(input) ==
+ QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
+ {
+ // The PDF specification states that the word "stream"
+ // should be followed by either a carriage return and
+ // a newline or by a newline alone. It specifically
+ // disallowed following it by a carriage return alone
+ // since, in that case, there would be no way to tell
+ // whether the NL in a CR NL sequence was part of the
+ // stream data. However, some readers, including
+ // Adobe reader, accept a carriage return by itself
+ // when followed by a non-newline character, so that's
+ // what we do here.
+ {
+ char ch;
+ if (input->read(&ch, 1) == 0)
+ {
+ // A premature EOF here will result in some
+ // other problem that will get reported at
+ // another time.
+ }
+ else if (ch == '\n')
+ {
+ // ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with NL only");
+ }
+ else if (ch == '\r')
+ {
+ // Read another character
+ if (input->read(&ch, 1) != 0)
+ {
+ if (ch == '\n')
+ {
+ // Ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with CRNL");
+ }
+ else
+ {
+ // Treat the \r by itself as the
+ // whitespace after endstream and
+ // start reading stream data in spite
+ // of not having seen a newline.
+ QTC::TC("qpdf", "QPDF stream with CR only");
+ input->unreadCh(ch);
+ warn(QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword followed"
+ " by carriage return only"));
+ }
+ }
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF stream without newline");
+ warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword not followed"
+ " by proper line terminator"));
+ }
+ }
- if (! in_object_stream)
- {
- // check for stream
- qpdf_offset_t cur_offset = input->tell();
- if (readToken(input) ==
- QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
- {
- // The PDF specification states that the word "stream"
- // should be followed by either a carriage return and
- // a newline or by a newline alone. It specifically
- // disallowed following it by a carriage return alone
- // since, in that case, there would be no way to tell
- // whether the NL in a CR NL sequence was part of the
- // stream data. However, some readers, including
- // Adobe reader, accept a carriage return by itself
- // when followed by a non-newline character, so that's
- // what we do here.
- {
- char ch;
- if (input->read(&ch, 1) == 0)
- {
- // A premature EOF here will result in some
- // other problem that will get reported at
- // another time.
- }
- else if (ch == '\n')
- {
- // ready to read stream data
- QTC::TC("qpdf", "QPDF stream with NL only");
- }
- else if (ch == '\r')
- {
- // Read another character
- if (input->read(&ch, 1) != 0)
- {
- if (ch == '\n')
- {
- // Ready to read stream data
- QTC::TC("qpdf", "QPDF stream with CRNL");
- }
- else
- {
- // Treat the \r by itself as the
- // whitespace after endstream and
- // start reading stream data in spite
- // of not having seen a newline.
- QTC::TC("qpdf", "QPDF stream with CR only");
- input->unreadCh(ch);
- warn(QPDFExc(
- qpdf_e_damaged_pdf,
- input->getName(),
- this->last_object_description,
- input->tell(),
- "stream keyword followed"
- " by carriage return only"));
- }
- }
- }
- else
- {
- QTC::TC("qpdf", "QPDF stream without newline");
- warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->tell(),
- "stream keyword not followed"
- " by proper line terminator"));
- }
- }
+ // Must get offset before accessing any additional
+ // objects since resolving a previously unresolved
+ // indirect object will change file position.
+ qpdf_offset_t stream_offset = input->tell();
+ size_t length = 0;
- // Must get offset before accessing any additional
- // objects since resolving a previously unresolved
- // indirect object will change file position.
- qpdf_offset_t stream_offset = input->tell();
- size_t length = 0;
+ try
+ {
+ std::map<std::string, QPDFObjectHandle> dict =
+ object.getDictAsMap();
- try
- {
- if (dict.count("/Length") == 0)
- {
- QTC::TC("qpdf", "QPDF stream without length");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, offset,
- "stream dictionary lacks /Length key");
- }
+ if (dict.count("/Length") == 0)
+ {
+ QTC::TC("qpdf", "QPDF stream without length");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description, offset,
+ "stream dictionary lacks /Length key");
+ }
- QPDFObjectHandle length_obj = dict["/Length"];
- if (! length_obj.isInteger())
- {
- QTC::TC("qpdf", "QPDF stream length not integer");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description, offset,
- "/Length key in stream dictionary is not "
- "an integer");
- }
+ QPDFObjectHandle length_obj = dict["/Length"];
+ if (! length_obj.isInteger())
+ {
+ QTC::TC("qpdf", "QPDF stream length not integer");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description, offset,
+ "/Length key in stream dictionary is not "
+ "an integer");
+ }
- length = length_obj.getIntValue();
- input->seek(
- stream_offset + (qpdf_offset_t)length, SEEK_SET);
- if (! (readToken(input) ==
- QPDFTokenizer::Token(
- QPDFTokenizer::tt_word, "endstream")))
- {
- QTC::TC("qpdf", "QPDF missing endstream");
- throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
- this->last_object_description,
- input->getLastOffset(),
- "expected endstream");
- }
- }
- catch (QPDFExc& e)
- {
- if (this->attempt_recovery)
- {
- // may throw an exception
- length = recoverStreamLength(
- input, objid, generation, stream_offset);
- }
- else
- {
- throw e;
- }
- }
- object = QPDFObjectHandle::Factory::newStream(
- this, objid, generation, object, stream_offset, length);
- }
- else
- {
- input->seek(cur_offset, SEEK_SET);
- }
- }
+ length = length_obj.getIntValue();
+ input->seek(
+ stream_offset + (qpdf_offset_t)length, SEEK_SET);
+ if (! (readToken(input) ==
+ QPDFTokenizer::Token(
+ QPDFTokenizer::tt_word, "endstream")))
+ {
+ QTC::TC("qpdf", "QPDF missing endstream");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->getLastOffset(),
+ "expected endstream");
+ }
+ }
+ catch (QPDFExc& e)
+ {
+ if (this->attempt_recovery)
+ {
+ // may throw an exception
+ length = recoverStreamLength(
+ input, objid, generation, stream_offset);
+ }
+ else
+ {
+ throw e;
+ }
+ }
+ object = QPDFObjectHandle::Factory::newStream(
+ this, objid, generation, object, stream_offset, length);
+ }
+ else
+ {
+ input->seek(cur_offset, SEEK_SET);
+ }
}
+ // Override last_offset so that it points to the beginning of the
+ // object we just read
+ input->setLastOffset(offset);
return object;
}
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 642dee69..6bb182e8 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -11,12 +11,15 @@
#include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_Reserved.hh>
+#include <qpdf/BufferInputSource.hh>
+#include <qpdf/QPDFExc.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <stdexcept>
#include <stdlib.h>
+#include <ctype.h>
QPDFObjectHandle::QPDFObjectHandle() :
initialized(false),
@@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
}
+void
+QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
+{
+ assertStream();
+ dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
+}
+
PointerHolder<Buffer>
QPDFObjectHandle::getStreamData()
{
@@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved()
}
QPDFObjectHandle
+QPDFObjectHandle::parse(std::string const& object_str,
+ std::string const& object_description)
+{
+ PointerHolder<InputSource> input =
+ new BufferInputSource("parsed object", object_str);
+ QPDFTokenizer tokenizer;
+ bool empty = false;
+ QPDFObjectHandle result =
+ parse(input, object_description, tokenizer, empty, 0, 0);
+ size_t offset = (size_t) input->tell();
+ while (offset < object_str.length())
+ {
+ if (! isspace(object_str[offset]))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "trailing data found parsing object from string");
+ }
+ ++offset;
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parse(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context)
+{
+ return parseInternal(input, object_description, tokenizer, empty,
+ decrypter, context, false, false);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context,
+ bool in_array, bool in_dictionary)
+{
+ empty = false;
+ if (in_dictionary && in_array)
+ {
+ // Although dictionaries and arrays arbitrarily nest, these
+ // variables indicate what is at the top of the stack right
+ // now, so they can, by definition, never both be true.
+ throw std::logic_error(
+ "INTERNAL ERROR: parseInternal: in_dict && in_array");
+ }
+
+ QPDFObjectHandle object;
+
+ qpdf_offset_t offset = input->tell();
+ std::vector<QPDFObjectHandle> olist;
+ bool done = false;
+ while (! done)
+ {
+ object = QPDFObjectHandle();
+
+ QPDFTokenizer::Token token =
+ tokenizer.readToken(input, object_description);
+
+ switch (token.getType())
+ {
+ case QPDFTokenizer::tt_brace_open:
+ case QPDFTokenizer::tt_brace_close:
+ // Don't know what to do with these for now
+ QTC::TC("qpdf", "QPDFObjectHandle bad brace");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected brace token");
+ break;
+
+ case QPDFTokenizer::tt_array_close:
+ if (in_array)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad array close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected array close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_dict_close:
+ if (in_dictionary)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected dictionary close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_array_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, true, false);
+ break;
+
+ case QPDFTokenizer::tt_dict_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, false, true);
+ break;
+
+ case QPDFTokenizer::tt_bool:
+ object = newBool((token.getValue() == "true"));
+ break;
+
+ case QPDFTokenizer::tt_null:
+ object = newNull();
+ break;
+
+ case QPDFTokenizer::tt_integer:
+ object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
+ break;
+
+ case QPDFTokenizer::tt_real:
+ object = newReal(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_name:
+ object = newName(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_word:
+ {
+ std::string const& value = token.getValue();
+ if ((value == "R") && (in_array || in_dictionary) &&
+ (olist.size() >= 2) &&
+ (olist[olist.size() - 1].isInteger()) &&
+ (olist[olist.size() - 2].isInteger()))
+ {
+ if (context == 0)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
+ throw std::logic_error(
+ "QPDFObjectHandle::parse called without context"
+ " on an object with indirect references");
+ }
+ // Try to resolve indirect objects
+ object = newIndirect(
+ context,
+ olist[olist.size() - 2].getIntValue(),
+ olist[olist.size() - 1].getIntValue());
+ olist.pop_back();
+ olist.pop_back();
+ }
+ else if ((value == "endobj") &&
+ (! (in_array || in_dictionary)))
+ {
+ // We just saw endobj without having read
+ // anything. Treat this as a null and do not move
+ // the input source's offset.
+ object = newNull();
+ input->seek(input->getLastOffset(), SEEK_SET);
+ empty = true;
+ }
+ else
+ {
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token while reading object (" +
+ value + ")");
+ }
+ }
+ break;
+
+ case QPDFTokenizer::tt_string:
+ {
+ std::string val = token.getValue();
+ if (decrypter)
+ {
+ decrypter->decryptString(val);
+ }
+ object = QPDFObjectHandle::newString(val);
+ }
+
+ break;
+
+ default:
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token type while reading object");
+ break;
+ }
+
+ if (in_dictionary || in_array)
+ {
+ if (! done)
+ {
+ olist.push_back(object);
+ }
+ }
+ else if (! object.isInitialized())
+ {
+ throw std::logic_error(
+ "INTERNAL ERROR: uninitialized object (token = " +
+ QUtil::int_to_string(token.getType()) +
+ ", " + token.getValue() + ")");
+ }
+ else
+ {
+ done = true;
+ }
+ }
+
+ if (in_array)
+ {
+ object = newArray(olist);
+ }
+ else if (in_dictionary)
+ {
+ // Convert list to map. Alternating elements are keys.
+ std::map<std::string, QPDFObjectHandle> dict;
+ if (olist.size() % 2)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
+ throw QPDFExc(
+ qpdf_e_damaged_pdf, input->getName(),
+ object_description, input->getLastOffset(),
+ "dictionary ending here has an odd number of elements");
+ }
+ for (unsigned int i = 0; i < olist.size(); i += 2)
+ {
+ QPDFObjectHandle key_obj = olist[i];
+ QPDFObjectHandle val = olist[i + 1];
+ if (! key_obj.isName())
+ {
+ throw QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(), object_description, offset,
+ std::string("dictionary key not name (") +
+ key_obj.unparse() + ")");
+ }
+ dict[key_obj.getName()] = val;
+ }
+ object = newDictionary(dict);
+ }
+
+ return object;
+}
+
+QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{
return QPDFObjectHandle(qpdf, objid, generation);
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index c089bcc1..970ee58b 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
"/Length", QPDFObjectHandle::newInteger((int)length));
}
}
+
+void
+QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
+{
+ this->stream_dict = new_dict;
+ QPDFObjectHandle length_obj = new_dict.getKey("/Length");
+ if (length_obj.isInteger())
+ {
+ this->length = length_obj.getIntValue();
+ }
+ else
+ {
+ this->length = 0;
+ }
+}
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index ce46d994..34eaceeb 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms);
+ void replaceDict(QPDFObjectHandle new_dict);
+
// Replace object ID and generation. This may only be called if
// object ID and generation are 0. It is used by QPDFObjectHandle
// when adding streams to files.