aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFObjectHandle.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf/QPDFObjectHandle.cc')
-rw-r--r--libqpdf/QPDFObjectHandle.cc269
1 files changed, 269 insertions, 0 deletions
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 642dee69..6bb182e8 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -11,12 +11,15 @@
#include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_Reserved.hh>
+#include <qpdf/BufferInputSource.hh>
+#include <qpdf/QPDFExc.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <stdexcept>
#include <stdlib.h>
+#include <ctype.h>
QPDFObjectHandle::QPDFObjectHandle() :
initialized(false),
@@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
}
+void
+QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
+{
+ assertStream();
+ dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
+}
+
PointerHolder<Buffer>
QPDFObjectHandle::getStreamData()
{
@@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved()
}
QPDFObjectHandle
+QPDFObjectHandle::parse(std::string const& object_str,
+ std::string const& object_description)
+{
+ PointerHolder<InputSource> input =
+ new BufferInputSource("parsed object", object_str);
+ QPDFTokenizer tokenizer;
+ bool empty = false;
+ QPDFObjectHandle result =
+ parse(input, object_description, tokenizer, empty, 0, 0);
+ size_t offset = (size_t) input->tell();
+ while (offset < object_str.length())
+ {
+ if (! isspace(object_str[offset]))
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "trailing data found parsing object from string");
+ }
+ ++offset;
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parse(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context)
+{
+ return parseInternal(input, object_description, tokenizer, empty,
+ decrypter, context, false, false);
+}
+
+QPDFObjectHandle
+QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
+ std::string const& object_description,
+ QPDFTokenizer& tokenizer, bool& empty,
+ StringDecrypter* decrypter, QPDF* context,
+ bool in_array, bool in_dictionary)
+{
+ empty = false;
+ if (in_dictionary && in_array)
+ {
+ // Although dictionaries and arrays arbitrarily nest, these
+ // variables indicate what is at the top of the stack right
+ // now, so they can, by definition, never both be true.
+ throw std::logic_error(
+ "INTERNAL ERROR: parseInternal: in_dict && in_array");
+ }
+
+ QPDFObjectHandle object;
+
+ qpdf_offset_t offset = input->tell();
+ std::vector<QPDFObjectHandle> olist;
+ bool done = false;
+ while (! done)
+ {
+ object = QPDFObjectHandle();
+
+ QPDFTokenizer::Token token =
+ tokenizer.readToken(input, object_description);
+
+ switch (token.getType())
+ {
+ case QPDFTokenizer::tt_brace_open:
+ case QPDFTokenizer::tt_brace_close:
+ // Don't know what to do with these for now
+ QTC::TC("qpdf", "QPDFObjectHandle bad brace");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected brace token");
+ break;
+
+ case QPDFTokenizer::tt_array_close:
+ if (in_array)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad array close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected array close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_dict_close:
+ if (in_dictionary)
+ {
+ done = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unexpected dictionary close token");
+ }
+ break;
+
+ case QPDFTokenizer::tt_array_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, true, false);
+ break;
+
+ case QPDFTokenizer::tt_dict_open:
+ object = parseInternal(
+ input, object_description, tokenizer, empty,
+ decrypter, context, false, true);
+ break;
+
+ case QPDFTokenizer::tt_bool:
+ object = newBool((token.getValue() == "true"));
+ break;
+
+ case QPDFTokenizer::tt_null:
+ object = newNull();
+ break;
+
+ case QPDFTokenizer::tt_integer:
+ object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
+ break;
+
+ case QPDFTokenizer::tt_real:
+ object = newReal(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_name:
+ object = newName(token.getValue());
+ break;
+
+ case QPDFTokenizer::tt_word:
+ {
+ std::string const& value = token.getValue();
+ if ((value == "R") && (in_array || in_dictionary) &&
+ (olist.size() >= 2) &&
+ (olist[olist.size() - 1].isInteger()) &&
+ (olist[olist.size() - 2].isInteger()))
+ {
+ if (context == 0)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
+ throw std::logic_error(
+ "QPDFObjectHandle::parse called without context"
+ " on an object with indirect references");
+ }
+ // Try to resolve indirect objects
+ object = newIndirect(
+ context,
+ olist[olist.size() - 2].getIntValue(),
+ olist[olist.size() - 1].getIntValue());
+ olist.pop_back();
+ olist.pop_back();
+ }
+ else if ((value == "endobj") &&
+ (! (in_array || in_dictionary)))
+ {
+ // We just saw endobj without having read
+ // anything. Treat this as a null and do not move
+ // the input source's offset.
+ object = newNull();
+ input->seek(input->getLastOffset(), SEEK_SET);
+ empty = true;
+ }
+ else
+ {
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token while reading object (" +
+ value + ")");
+ }
+ }
+ break;
+
+ case QPDFTokenizer::tt_string:
+ {
+ std::string val = token.getValue();
+ if (decrypter)
+ {
+ decrypter->decryptString(val);
+ }
+ object = QPDFObjectHandle::newString(val);
+ }
+
+ break;
+
+ default:
+ throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ object_description,
+ input->getLastOffset(),
+ "unknown token type while reading object");
+ break;
+ }
+
+ if (in_dictionary || in_array)
+ {
+ if (! done)
+ {
+ olist.push_back(object);
+ }
+ }
+ else if (! object.isInitialized())
+ {
+ throw std::logic_error(
+ "INTERNAL ERROR: uninitialized object (token = " +
+ QUtil::int_to_string(token.getType()) +
+ ", " + token.getValue() + ")");
+ }
+ else
+ {
+ done = true;
+ }
+ }
+
+ if (in_array)
+ {
+ object = newArray(olist);
+ }
+ else if (in_dictionary)
+ {
+ // Convert list to map. Alternating elements are keys.
+ std::map<std::string, QPDFObjectHandle> dict;
+ if (olist.size() % 2)
+ {
+ QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
+ throw QPDFExc(
+ qpdf_e_damaged_pdf, input->getName(),
+ object_description, input->getLastOffset(),
+ "dictionary ending here has an odd number of elements");
+ }
+ for (unsigned int i = 0; i < olist.size(); i += 2)
+ {
+ QPDFObjectHandle key_obj = olist[i];
+ QPDFObjectHandle val = olist[i + 1];
+ if (! key_obj.isName())
+ {
+ throw QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(), object_description, offset,
+ std::string("dictionary key not name (") +
+ key_obj.unparse() + ")");
+ }
+ dict[key_obj.getName()] = val;
+ }
+ object = newDictionary(dict);
+ }
+
+ return object;
+}
+
+QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{
return QPDFObjectHandle(qpdf, objid, generation);