diff options
Diffstat (limited to 'libqpdf')
62 files changed, 2029 insertions, 1760 deletions
diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc index 5b59c801..6402f639 100644 --- a/libqpdf/BufferInputSource.cc +++ b/libqpdf/BufferInputSource.cc @@ -7,8 +7,8 @@ #include <stdexcept> #include <string.h> -BufferInputSource::Members::Members( - bool own_memory, std::string const& description, Buffer* buf) : +BufferInputSource::BufferInputSource( + std::string const& description, Buffer* buf, bool own_memory) : own_memory(own_memory), description(description), buf(buf), @@ -18,60 +18,54 @@ BufferInputSource::Members::Members( } BufferInputSource::BufferInputSource( - std::string const& description, Buffer* buf, bool own_memory) : - m(new Members(own_memory, description, buf)) -{ -} - -BufferInputSource::BufferInputSource( std::string const& description, std::string const& contents) : - m(new Members(true, description, nullptr)) + own_memory(true), + description(description), + buf(new Buffer(contents.length())), + cur_offset(0), + max_offset(QIntC::to_offset(buf->getSize())) { - this->m->buf = new Buffer(contents.length()); - this->m->max_offset = QIntC::to_offset(this->m->buf->getSize()); - unsigned char* bp = this->m->buf->getBuffer(); - memcpy(bp, contents.c_str(), contents.length()); + memcpy(buf->getBuffer(), contents.c_str(), contents.length()); } BufferInputSource::~BufferInputSource() { - if (this->m->own_memory) { - delete this->m->buf; + if (this->own_memory) { + delete this->buf; } } qpdf_offset_t BufferInputSource::findAndSkipNextEOL() { - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0"); } - qpdf_offset_t end_pos = this->m->max_offset; - if (this->m->cur_offset >= end_pos) { + qpdf_offset_t end_pos = this->max_offset; + if (this->cur_offset >= end_pos) { this->last_offset = end_pos; - this->m->cur_offset = end_pos; + this->cur_offset = end_pos; return end_pos; } qpdf_offset_t result = 0; - unsigned char const* buffer = this->m->buf->getBuffer(); + unsigned char const* buffer = this->buf->getBuffer(); unsigned char const* end = buffer + end_pos; - unsigned char const* p = buffer + this->m->cur_offset; + unsigned char const* p = buffer + this->cur_offset; while ((p < end) && !((*p == '\r') || (*p == '\n'))) { ++p; } if (p < end) { result = p - buffer; - this->m->cur_offset = result + 1; + this->cur_offset = result + 1; ++p; - while ((this->m->cur_offset < end_pos) && - ((*p == '\r') || (*p == '\n'))) { + while ((this->cur_offset < end_pos) && ((*p == '\r') || (*p == '\n'))) { ++p; - ++this->m->cur_offset; + ++this->cur_offset; } } else { - this->m->cur_offset = end_pos; + this->cur_offset = end_pos; result = end_pos; } return result; @@ -80,13 +74,13 @@ BufferInputSource::findAndSkipNextEOL() std::string const& BufferInputSource::getName() const { - return this->m->description; + return this->description; } qpdf_offset_t BufferInputSource::tell() { - return this->m->cur_offset; + return this->cur_offset; } void @@ -94,17 +88,17 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence) { switch (whence) { case SEEK_SET: - this->m->cur_offset = offset; + this->cur_offset = offset; break; case SEEK_END: - QIntC::range_check(this->m->max_offset, offset); - this->m->cur_offset = this->m->max_offset + offset; + QIntC::range_check(this->max_offset, offset); + this->cur_offset = this->max_offset + offset; break; case SEEK_CUR: - QIntC::range_check(this->m->cur_offset, offset); - this->m->cur_offset += offset; + QIntC::range_check(this->cur_offset, offset); + this->cur_offset += offset; break; default: @@ -113,42 +107,41 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence) break; } - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::runtime_error( - this->m->description + ": seek before beginning of buffer"); + this->description + ": seek before beginning of buffer"); } } void BufferInputSource::rewind() { - this->m->cur_offset = 0; + this->cur_offset = 0; } size_t BufferInputSource::read(char* buffer, size_t length) { - if (this->m->cur_offset < 0) { + if (this->cur_offset < 0) { throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0"); } - qpdf_offset_t end_pos = this->m->max_offset; - if (this->m->cur_offset >= end_pos) { + qpdf_offset_t end_pos = this->max_offset; + if (this->cur_offset >= end_pos) { this->last_offset = end_pos; return 0; } - this->last_offset = this->m->cur_offset; - size_t len = - std::min(QIntC::to_size(end_pos - this->m->cur_offset), length); - memcpy(buffer, this->m->buf->getBuffer() + this->m->cur_offset, len); - this->m->cur_offset += QIntC::to_offset(len); + this->last_offset = this->cur_offset; + size_t len = std::min(QIntC::to_size(end_pos - this->cur_offset), length); + memcpy(buffer, this->buf->getBuffer() + this->cur_offset, len); + this->cur_offset += QIntC::to_offset(len); return len; } void BufferInputSource::unreadCh(char ch) { - if (this->m->cur_offset > 0) { - --this->m->cur_offset; + if (this->cur_offset > 0) { + --this->cur_offset; } } diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index cf807f6d..686ea04a 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -80,10 +80,12 @@ set(libqpdf_SOURCES QPDFPageDocumentHelper.cc QPDFPageLabelDocumentHelper.cc QPDFPageObjectHelper.cc + QPDFParser.cc QPDFStreamFilter.cc QPDFSystemError.cc QPDFTokenizer.cc QPDFUsage.cc + QPDFValue.cc QPDFWriter.cc QPDFXRefEntry.cc QPDF_Array.cc @@ -98,6 +100,7 @@ set(libqpdf_SOURCES QPDF_Reserved.cc QPDF_Stream.cc QPDF_String.cc + QPDF_Unresolved.cc QPDF_encryption.cc QPDF_json.cc QPDF_linearization.cc @@ -375,6 +378,29 @@ int main(int argc, char* argv[]) { endif() endfunction() +check_c_source_compiles( +"#include <malloc.h> +#include <stdio.h> +int main(int argc, char* argv[]) { + malloc_info(0, stdout); + return 0; +}" + HAVE_MALLOC_INFO) + +check_c_source_compiles( +"#include <stdio.h> +#include <stdlib.h> +int main(int argc, char* argv[]) { + char* buf; + size_t size; + FILE* f; + f = open_memstream(&buf, &size); + fclose(f); + free(buf); + return 0; +}" + HAVE_OPEN_MEMSTREAM) + qpdf_check_ll_fmt("%lld" fmt_lld) qpdf_check_ll_fmt("%I64d" fmt_i64d) qpdf_check_ll_fmt("%I64lld" fmt_i64lld) diff --git a/libqpdf/ClosedFileInputSource.cc b/libqpdf/ClosedFileInputSource.cc index dfb98c6a..06ebb156 100644 --- a/libqpdf/ClosedFileInputSource.cc +++ b/libqpdf/ClosedFileInputSource.cc @@ -2,18 +2,13 @@ #include <qpdf/FileInputSource.hh> -ClosedFileInputSource::Members::Members(char const* filename) : +ClosedFileInputSource::ClosedFileInputSource(char const* filename) : filename(filename), offset(0), stay_open(false) { } -ClosedFileInputSource::ClosedFileInputSource(char const* filename) : - m(new Members(filename)) -{ -} - ClosedFileInputSource::~ClosedFileInputSource() { // Must be explicit and not inline -- see QPDF_DLL_CLASS in @@ -23,30 +18,29 @@ ClosedFileInputSource::~ClosedFileInputSource() void ClosedFileInputSource::before() { - if (nullptr == this->m->fis.get()) { - this->m->fis = - std::make_shared<FileInputSource>(this->m->filename.c_str()); - this->m->fis->seek(this->m->offset, SEEK_SET); - this->m->fis->setLastOffset(this->last_offset); + if (nullptr == this->fis) { + this->fis = std::make_shared<FileInputSource>(this->filename.c_str()); + this->fis->seek(this->offset, SEEK_SET); + this->fis->setLastOffset(this->last_offset); } } void ClosedFileInputSource::after() { - this->last_offset = this->m->fis->getLastOffset(); - this->m->offset = this->m->fis->tell(); - if (this->m->stay_open) { + this->last_offset = this->fis->getLastOffset(); + this->offset = this->fis->tell(); + if (this->stay_open) { return; } - this->m->fis = nullptr; + this->fis = nullptr; } qpdf_offset_t ClosedFileInputSource::findAndSkipNextEOL() { before(); - qpdf_offset_t r = this->m->fis->findAndSkipNextEOL(); + qpdf_offset_t r = this->fis->findAndSkipNextEOL(); after(); return r; } @@ -54,14 +48,14 @@ ClosedFileInputSource::findAndSkipNextEOL() std::string const& ClosedFileInputSource::getName() const { - return this->m->filename; + return this->filename; } qpdf_offset_t ClosedFileInputSource::tell() { before(); - qpdf_offset_t r = this->m->fis->tell(); + qpdf_offset_t r = this->fis->tell(); after(); return r; } @@ -70,16 +64,16 @@ void ClosedFileInputSource::seek(qpdf_offset_t offset, int whence) { before(); - this->m->fis->seek(offset, whence); + this->fis->seek(offset, whence); after(); } void ClosedFileInputSource::rewind() { - this->m->offset = 0; - if (this->m->fis.get()) { - this->m->fis->rewind(); + this->offset = 0; + if (this->fis.get()) { + this->fis->rewind(); } } @@ -87,7 +81,7 @@ size_t ClosedFileInputSource::read(char* buffer, size_t length) { before(); - size_t r = this->m->fis->read(buffer, length); + size_t r = this->fis->read(buffer, length); after(); return r; } @@ -96,7 +90,7 @@ void ClosedFileInputSource::unreadCh(char ch) { before(); - this->m->fis->unreadCh(ch); + this->fis->unreadCh(ch); // Don't call after -- the file has to stay open after this // operation. } @@ -104,8 +98,8 @@ ClosedFileInputSource::unreadCh(char ch) void ClosedFileInputSource::stayOpen(bool val) { - this->m->stay_open = val; - if ((!val) && this->m->fis.get()) { + this->stay_open = val; + if ((!val) && this->fis.get()) { after(); } } diff --git a/libqpdf/FileInputSource.cc b/libqpdf/FileInputSource.cc index ab88d302..2b1ee1ab 100644 --- a/libqpdf/FileInputSource.cc +++ b/libqpdf/FileInputSource.cc @@ -5,60 +5,52 @@ #include <algorithm> #include <string.h> -FileInputSource::Members::Members(bool close_file) : - close_file(close_file), - file(nullptr) -{ -} - -FileInputSource::Members::~Members() -{ - if (this->file && this->close_file) { - fclose(this->file); - } -} - FileInputSource::FileInputSource() : - m(new Members(false)) + close_file(false), + file(nullptr) { } FileInputSource::FileInputSource(char const* filename) : - m(new Members(false)) + close_file(true), + filename(filename), + file(QUtil::safe_fopen(filename, "rb")) { - setFilename(filename); } FileInputSource::FileInputSource( char const* description, FILE* filep, bool close_file) : - m(new Members(false)) + close_file(close_file), + filename(description), + file(filep) +{ +} + +FileInputSource::~FileInputSource() { - setFile(description, filep, close_file); + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer + if (this->file && this->close_file) { + fclose(this->file); + } } void FileInputSource::setFilename(char const* filename) { - this->m = std::shared_ptr<Members>(new Members(true)); - this->m->filename = filename; - this->m->file = QUtil::safe_fopen(filename, "rb"); + this->close_file = true; + this->filename = filename; + this->file = QUtil::safe_fopen(filename, "rb"); } void FileInputSource::setFile(char const* description, FILE* filep, bool close_file) { - this->m = std::shared_ptr<Members>(new Members(close_file)); - this->m->filename = description; - this->m->file = filep; + this->filename = description; + this->file = filep; this->seek(0, SEEK_SET); } -FileInputSource::~FileInputSource() -{ - // Must be explicit and not inline -- see QPDF_DLL_CLASS in - // README-maintainer -} - qpdf_offset_t FileInputSource::findAndSkipNextEOL() { @@ -66,7 +58,7 @@ FileInputSource::findAndSkipNextEOL() bool done = false; char buf[10240]; while (!done) { - qpdf_offset_t cur_offset = QUtil::tell(this->m->file); + qpdf_offset_t cur_offset = QUtil::tell(this->file); size_t len = this->read(buf, sizeof(buf)); if (len == 0) { done = true; @@ -98,41 +90,42 @@ FileInputSource::findAndSkipNextEOL() std::string const& FileInputSource::getName() const { - return this->m->filename; + return this->filename; } qpdf_offset_t FileInputSource::tell() { - return QUtil::tell(this->m->file); + return QUtil::tell(this->file); } void FileInputSource::seek(qpdf_offset_t offset, int whence) { - QUtil::os_wrapper( - (std::string("seek to ") + this->m->filename + ", offset " + - QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) + - ")"), - QUtil::seek(this->m->file, offset, whence)); + if (QUtil::seek(this->file, offset, whence) == -1) { + QUtil::throw_system_error( + std::string("seek to ") + this->filename + ", offset " + + QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) + + ")"); + } } void FileInputSource::rewind() { - ::rewind(this->m->file); + ::rewind(this->file); } size_t FileInputSource::read(char* buffer, size_t length) { - this->last_offset = this->tell(); - size_t len = fread(buffer, 1, length, this->m->file); + this->last_offset = QUtil::tell(this->file); + size_t len = fread(buffer, 1, length, this->file); if (len == 0) { - if (ferror(this->m->file)) { + if (ferror(this->file)) { throw QPDFExc( qpdf_e_system, - this->m->filename, + this->filename, "", this->last_offset, (std::string("read ") + QUtil::uint_to_string(length) + @@ -148,7 +141,7 @@ FileInputSource::read(char* buffer, size_t length) void FileInputSource::unreadCh(char ch) { - QUtil::os_wrapper( - this->m->filename + ": unread character", - ungetc(static_cast<unsigned char>(ch), this->m->file)); + if (ungetc(static_cast<unsigned char>(ch), this->file) == -1) { + QUtil::throw_system_error(this->filename + ": unread character"); + } } diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index c825413e..07180cf5 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -204,7 +204,7 @@ JSON::JSON_blob::write(Pipeline* p, size_t) const void JSON::write(Pipeline* p, size_t depth) const { - if (nullptr == this->m->value.get()) { + if (nullptr == this->m->value) { *p << "null"; } else { this->m->value->write(p, depth); @@ -1122,7 +1122,7 @@ JSONParser::handleToken() break; } - if ((item.get() == nullptr) == (delimiter == '\0')) { + if ((item == nullptr) == (delimiter == '\0')) { throw std::logic_error( "JSONParser::handleToken: logic error: exactly one of item" " or delimiter must be set"); diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc index c7e3f923..791656d8 100644 --- a/libqpdf/Pl_Buffer.cc +++ b/libqpdf/Pl_Buffer.cc @@ -26,7 +26,7 @@ Pl_Buffer::~Pl_Buffer() void Pl_Buffer::write(unsigned char const* buf, size_t len) { - if (this->m->data.get() == nullptr) { + if (this->m->data == nullptr) { this->m->data = std::make_shared<Buffer>(len); } size_t cur_size = this->m->data->getSize(); diff --git a/libqpdf/Pl_Flate.cc b/libqpdf/Pl_Flate.cc index 4183b7e4..7f2fa4a6 100644 --- a/libqpdf/Pl_Flate.cc +++ b/libqpdf/Pl_Flate.cc @@ -86,7 +86,7 @@ Pl_Flate::warn(char const* msg, int code) void Pl_Flate::write(unsigned char const* data, size_t len) { - if (this->m->outbuf.get() == nullptr) { + if (this->m->outbuf == nullptr) { throw std::logic_error( this->identifier + ": Pl_Flate: write() called after finish() called"); diff --git a/libqpdf/Pl_RC4.cc b/libqpdf/Pl_RC4.cc index 823b22ea..ba0cb201 100644 --- a/libqpdf/Pl_RC4.cc +++ b/libqpdf/Pl_RC4.cc @@ -18,7 +18,7 @@ Pl_RC4::Pl_RC4( void Pl_RC4::write(unsigned char const* data, size_t len) { - if (this->outbuf.get() == nullptr) { + if (this->outbuf == nullptr) { throw std::logic_error( this->identifier + ": Pl_RC4: write() called after finish() called"); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index c2c764db..9593c44f 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -24,7 +24,9 @@ #include <qpdf/QPDF_Array.hh> #include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Null.hh> +#include <qpdf/QPDF_Reserved.hh> #include <qpdf/QPDF_Stream.hh> +#include <qpdf/QPDF_Unresolved.hh> #include <qpdf/QTC.hh> #include <qpdf/QUtil.hh> @@ -222,7 +224,6 @@ QPDF::Members::Members() : immediate_copy_from(false), in_parse(false), parsed(false), - ever_replaced_objects(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -258,6 +259,7 @@ QPDF::~QPDF() this->m->xref_table.clear(); for (auto const& iter: this->m->obj_cache) { QPDFObject::ObjAccessor::releaseResolved(iter.second.object.get()); + iter.second.object->resetObjGen(); } } @@ -1397,7 +1399,7 @@ QPDF::fixDanglingReferences(bool force) std::list<QPDFObjectHandle> queue; queue.push_back(this->m->trailer); for (auto const& og: to_process) { - QPDFObjectHandle obj = QPDFObjectHandle::Factory::newIndirect(this, og); + auto obj = getObject(og); if (obj.isDictionary() || obj.isArray()) { queue.push_back(obj); } else if (obj.isStream()) { @@ -1419,18 +1421,15 @@ QPDF::fixDanglingReferences(bool force) to_check.push_back(iter.second); } } else if (obj.isArray()) { - QPDF_Array* arr = dynamic_cast<QPDF_Array*>( - QPDFObjectHandle::ObjAccessor::getObject(obj).get()); + auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj); arr->addExplicitElementsToList(to_check); } for (auto sub: to_check) { if (sub.isIndirect()) { - if (sub.getOwningQPDF() == this) { - QPDFObjGen og(sub.getObjGen()); - if (this->m->obj_cache.count(og) == 0) { - QTC::TC("qpdf", "QPDF detected dangling ref"); - queue.push_back(sub); - } + if ((sub.getOwningQPDF() == this) && + isUnresolved(sub.getObjGen())) { + QTC::TC("qpdf", "QPDF detected dangling ref"); + queue.push_back(sub); } } else { queue.push_back(sub); @@ -1462,8 +1461,7 @@ QPDF::getAllObjects() fixDanglingReferences(true); std::vector<QPDFObjectHandle> result; for (auto const& iter: this->m->obj_cache) { - QPDFObjGen const& og = iter.first; - result.push_back(QPDFObjectHandle::Factory::newIndirect(this, og)); + result.push_back(newIndirect(iter.first, iter.second.object)); } return result; } @@ -1888,7 +1886,7 @@ QPDF::readObjectAtOffset( "expected endobj"); } - if (!this->m->obj_cache.count(og)) { + if (isUnresolved(og)) { // Store the object in the cache here so it gets cached // whether we first know the offset or whether we first know // the object ID and generation (in which we case we would get @@ -1919,8 +1917,8 @@ QPDF::readObjectAtOffset( } } qpdf_offset_t end_after_space = this->m->file->tell(); - - this->m->obj_cache[og] = ObjCache( + updateCache( + og, QPDFObjectHandle::ObjAccessor::getObject(oh), end_before_space, end_after_space); @@ -1929,31 +1927,14 @@ QPDF::readObjectAtOffset( return oh; } -bool -QPDF::objectChanged(QPDFObjGen const& og, std::shared_ptr<QPDFObject>& oph) -{ - // See if the object cached at og, if any, is the one passed in. - // QPDFObjectHandle uses this to detect outdated handles to - // replaced or swapped objects. This is a somewhat expensive check - // because it happens with every dereference of a - // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the - // check if we never called a function that replaces an object - // already in cache. It is important for functions that do this to - // set ever_replaced_objects = true. - - if (!this->m->ever_replaced_objects) { - return false; - } - auto c = this->m->obj_cache.find(og); - if (c == this->m->obj_cache.end()) { - return true; - } - return (c->second.object.get() != oph.get()); -} - -std::shared_ptr<QPDFObject> +void QPDF::resolve(QPDFObjGen const& og) { + if (isCached(og) && !isUnresolved(og)) { + // We only need to resolve unresolved objects + return; + } + // Check object cache before checking xref table. This allows us // to insert things into the object cache that don't actually // exist in the file. @@ -1967,11 +1948,12 @@ QPDF::resolve(QPDFObjGen const& og) "", this->m->file->getLastOffset(), ("loop detected resolving object " + og.unparse(' '))); - return QPDF_Null::create(); + updateCache(og, QPDF_Null::create(), -1, -1); + return; } ResolveRecorder rr(this, og); - if ((!this->m->obj_cache.count(og)) && this->m->xref_table.count(og)) { + if (m->xref_table.count(og) != 0) { QPDFXRefEntry const& entry = this->m->xref_table[og]; try { switch (entry.getType()) { @@ -2009,19 +1991,17 @@ QPDF::resolve(QPDFObjGen const& og) ": error reading object: " + e.what())); } } - if (this->m->obj_cache.count(og) == 0) { + + if (isUnresolved(og)) { // PDF spec says unknown objects resolve to the null object. QTC::TC("qpdf", "QPDF resolve failure to null"); - QPDFObjectHandle oh = QPDFObjectHandle::newNull(); - this->m->obj_cache[og] = - ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); + updateCache(og, QPDF_Null::create(), -1, -1); } - std::shared_ptr<QPDFObject> result(this->m->obj_cache[og].object); + auto result(this->m->obj_cache[og].object); if (!result->hasDescription()) { result->setDescription(this, ("object " + og.unparse(' '))); } - return result; } void @@ -2109,15 +2089,15 @@ QPDF::resolveObjectsInStream(int obj_stream_number) // objects appended to the file, so it is necessary to recheck the // xref table and only cache what would actually be resolved here. for (auto const& iter: offsets) { - int obj = iter.first; - QPDFObjGen og(obj, 0); + QPDFObjGen og(iter.first, 0); QPDFXRefEntry const& entry = this->m->xref_table[og]; if ((entry.getType() == 2) && (entry.getObjStreamNumber() == obj_stream_number)) { int offset = iter.second; input->seek(offset, SEEK_SET); QPDFObjectHandle oh = readObject(input, "", og, true); - this->m->obj_cache[og] = ObjCache( + updateCache( + og, QPDFObjectHandle::ObjAccessor::getObject(oh), end_before_space, end_after_space); @@ -2128,6 +2108,47 @@ QPDF::resolveObjectsInStream(int obj_stream_number) } QPDFObjectHandle +QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj) +{ + obj->setObjGen(this, og); + if (!obj->hasDescription()) { + obj->setDescription(this, "object " + og.unparse(' ')); + } + return QPDFObjectHandle::Factory::newIndirect(obj); +} + +void +QPDF::updateCache( + QPDFObjGen const& og, + std::shared_ptr<QPDFObject> const& object, + qpdf_offset_t end_before_space, + qpdf_offset_t end_after_space) +{ + object->setObjGen(this, og); + if (isCached(og)) { + auto& cache = m->obj_cache[og]; + cache.object->resetObjGen(); + cache.object->assign(object); + cache.end_before_space = end_before_space; + cache.end_after_space = end_after_space; + } else { + m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space); + } +} + +bool +QPDF::isCached(QPDFObjGen const& og) +{ + return m->obj_cache.count(og) != 0; +} + +bool +QPDF::isUnresolved(QPDFObjGen const& og) +{ + return !isCached(og) || m->obj_cache[og].object->isUnresolved(); +} + +QPDFObjectHandle QPDF::makeIndirectObject(QPDFObjectHandle oh) { int max_objid = toI(getObjectCount()); @@ -2136,19 +2157,21 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) "max object id is too high to create new objects"); } QPDFObjGen next(max_objid + 1, 0); - this->m->obj_cache[next] = + m->obj_cache[next] = ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); - return QPDFObjectHandle::Factory::newIndirect(this, next); + return newIndirect(next, m->obj_cache[next].object); } QPDFObjectHandle QPDF::reserveObjectIfNotExists(QPDFObjGen const& og) { - if ((!this->m->obj_cache.count(og)) && (!this->m->xref_table.count(og))) { + if (!isCached(og) && !m->xref_table.count(og)) { resolve(og); - replaceObject(og, QPDFObjectHandle::Factory::makeReserved()); + m->obj_cache[og].object = QPDF_Reserved::create(); + return newIndirect(og, m->obj_cache[og].object); + } else { + return getObject(og); } - return getObjectByObjGen(og); } QPDFObjectHandle @@ -2159,15 +2182,33 @@ QPDF::reserveStream(QPDFObjGen const& og) } QPDFObjectHandle +QPDF::getObject(QPDFObjGen const& og) +{ + if (!og.isIndirect()) { + return QPDFObjectHandle::newNull(); + } + if (!isCached(og)) { + m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); + } + return newIndirect(og, m->obj_cache[og].object); +} + +QPDFObjectHandle +QPDF::getObject(int objid, int generation) +{ + return getObject(QPDFObjGen(objid, generation)); +} + +QPDFObjectHandle QPDF::getObjectByObjGen(QPDFObjGen const& og) { - return QPDFObjectHandle::Factory::newIndirect(this, og); + return getObject(og); } QPDFObjectHandle QPDF::getObjectByID(int objid, int generation) { - return getObjectByObjGen(QPDFObjGen(objid, generation)); + return getObject(QPDFObjGen(objid, generation)); } void @@ -2184,14 +2225,11 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) throw std::logic_error( "QPDF::replaceObject called with indirect object handle"); } - // Force new object to appear in the cache resolve(og); // Replace the object in the object cache - this->m->ever_replaced_objects = true; - this->m->obj_cache[og] = - ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); + updateCache(og, QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); } void @@ -2254,7 +2292,7 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) throw std::logic_error( "QPDF::copyForeign called with direct object handle"); } - QPDF* other = foreign.getOwningQPDF(); + QPDF* other = foreign.getOwningQPDF(false); if (other == this) { QTC::TC("qpdf", "QPDF copyForeign not foreign"); throw std::logic_error( @@ -2444,20 +2482,18 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) QPDFObjGen local_og(result.getObjGen()); // Copy information from the foreign stream so we can pipe its // data later without keeping the original QPDF object around. - QPDF* foreign_stream_qpdf = foreign.getOwningQPDF(); - if (!foreign_stream_qpdf) { - throw std::logic_error("unable to retrieve owning qpdf" - " from foreign stream"); - } - QPDF_Stream* stream = dynamic_cast<QPDF_Stream*>( - QPDFObjectHandle::ObjAccessor::getObject(foreign).get()); - if (!stream) { + + QPDF* foreign_stream_qpdf = foreign.getOwningQPDF( + false, "unable to retrieve owning qpdf from foreign stream"); + + auto stream = QPDFObjectHandle::ObjAccessor::asStream(foreign); + if (stream == nullptr) { throw std::logic_error("unable to retrieve underlying" " stream object from foreign stream"); } std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); if ((foreign_stream_qpdf->m->immediate_copy_from) && - (stream_buffer.get() == nullptr)) { + (stream_buffer == nullptr)) { // Pull the stream data into a buffer before attempting // the copy operation. Do it on the source stream so that // if the source stream is copied multiple times, we don't @@ -2515,10 +2551,7 @@ QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) // cache. resolve(og1); resolve(og2); - ObjCache t = this->m->obj_cache[og1]; - this->m->ever_replaced_objects = true; - this->m->obj_cache[og1] = this->m->obj_cache[og2]; - this->m->obj_cache[og2] = t; + m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); } unsigned long long diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 23d021ff..6fec0587 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -183,7 +183,7 @@ QPDFAcroFormDocumentHelper::getFormFields() analyze(); std::vector<QPDFFormFieldObjectHelper> result; for (auto const& iter: this->m->field_to_annotations) { - result.push_back(this->qpdf.getObjectByObjGen(iter.first)); + result.push_back(this->qpdf.getObject(iter.first)); } return result; } diff --git a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc index fd706c27..030f32d3 100644 --- a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc +++ b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc @@ -50,7 +50,7 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF& qpdf) : bool QPDFEmbeddedFileDocumentHelper::hasEmbeddedFiles() const { - return (this->m->embedded_files.get() != nullptr); + return (this->m->embedded_files != nullptr); } void diff --git a/libqpdf/QPDFFormFieldObjectHelper.cc b/libqpdf/QPDFFormFieldObjectHelper.cc index 5ff41edd..e56024af 100644 --- a/libqpdf/QPDFFormFieldObjectHelper.cc +++ b/libqpdf/QPDFFormFieldObjectHelper.cc @@ -362,13 +362,11 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) setFieldAttribute("/V", value); } if (need_appearances) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::logic_error( - "QPDFFormFieldObjectHelper::setV called with" - " need_appearances = true on an object that is" - " not associated with an owning QPDF"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFFormFieldObjectHelper::setV called with need_appearances = " + "true on an object that is not associated with an owning QPDF"); + QPDFAcroFormDocumentHelper(*qpdf).setNeedAppearances(true); } } @@ -883,7 +881,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance( if (found_font_in_dr && resources.isDictionary()) { QTC::TC("qpdf", "QPDFFormFieldObjectHelper get font from /DR"); if (resources.isIndirect()) { - resources = resources.getOwningQPDF()->makeIndirectObject( + resources = resources.getOwningQPDF(false)->makeIndirectObject( resources.shallowCopy()); AS.getDict().replaceKey("/Resources", resources); } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index f1b35f56..7bd563aa 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -223,7 +223,7 @@ ImageOptimizer::evaluate(std::string const& description) Pl_Discard d; Pl_Count c("count", &d); std::shared_ptr<Pipeline> p = makePipeline(description, &c); - if (p.get() == nullptr) { + if (p == nullptr) { // message issued by makePipeline return false; } @@ -252,7 +252,7 @@ void ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline) { std::shared_ptr<Pipeline> p = makePipeline("", pipeline); - if (p.get() == nullptr) { + if (p == nullptr) { // Should not be possible image.warnIfPossible("unable to create pipeline after previous" " success; image data will be lost"); @@ -417,7 +417,8 @@ QPDFJob::Members::Members() : check_is_encrypted(false), check_requires_password(false), json_input(false), - json_output(false) + json_output(false), + report_mem_usage(false) { } @@ -625,6 +626,14 @@ QPDFJob::run() << ": operation succeeded with warnings\n"; } } + if (m->report_mem_usage) { + // Call get_max_memory_usage before generating output. When + // debugging, it's easier if print statements from + // get_max_memory_usage are not interleaved with the output. + auto mem_usage = QUtil::get_max_memory_usage(); + *this->m->log->getWarn() + << "qpdf-max-memory-usage " << mem_usage << "\n"; + } } bool @@ -2161,7 +2170,7 @@ QPDFJob::doUnderOverlayForPage( std::map<unsigned long long, std::shared_ptr<QPDFAcroFormDocumentHelper>> afdh; auto make_afdh = [&](QPDFPageObjectHelper& ph) { - QPDF* q = ph.getObjectHandle().getOwningQPDF(); + QPDF* q = ph.getObjectHandle().getOwningQPDF(false); return get_afdh_for_qpdf(afdh, q); }; auto dest_afdh = make_afdh(dest_page); @@ -2243,8 +2252,7 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) { validateUnderOverlay(pdf, &m->underlay); validateUnderOverlay(pdf, &m->overlay); - if ((nullptr == m->underlay.pdf.get()) && - (nullptr == m->overlay.pdf.get())) { + if ((nullptr == m->underlay.pdf) && (nullptr == m->overlay.pdf)) { return; } std::map<int, std::vector<int>> underlay_pagenos; @@ -2598,7 +2606,7 @@ static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) { QPDFObjectHandle result = page; - if (page.getOwningQPDF() != &pdf) { + if (page.getOwningQPDF(false) != &pdf) { // Calling copyForeignObject on an object we already copied // will give us the already existing copy. result = pdf.copyForeignObject(page); diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 8a9c1470..3e148fca 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -503,6 +503,13 @@ QPDFJob::Config::removePageLabels() } QPDFJob::Config* +QPDFJob::Config::reportMemUsage() +{ + o.m->report_mem_usage = true; + return this; +} + +QPDFJob::Config* QPDFJob::Config::requiresPassword() { o.m->check_requires_password = true; diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc index f1adca0a..f388dccf 100644 --- a/libqpdf/QPDFNameTreeObjectHelper.cc +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -34,6 +34,13 @@ namespace static NameTreeDetails name_tree_details; +QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper() +{ + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer. For this specific class, see github issue + // #745. +} + QPDFNameTreeObjectHelper::Members::Members( QPDFObjectHandle& oh, QPDF& q, bool auto_repair) : impl(std::make_shared<NNTreeImpl>(name_tree_details, q, oh, auto_repair)) diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc index 76eec678..6443416d 100644 --- a/libqpdf/QPDFNumberTreeObjectHelper.cc +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -35,6 +35,13 @@ namespace static NumberTreeDetails number_tree_details; +QPDFNumberTreeObjectHelper::~QPDFNumberTreeObjectHelper() +{ + // Must be explicit and not inline -- see QPDF_DLL_CLASS in + // README-maintainer. For this specific class, see github issue + // #745. +} + QPDFNumberTreeObjectHelper::Members::Members( QPDFObjectHandle& oh, QPDF& q, bool auto_repair) : impl(std::make_shared<NNTreeImpl>(number_tree_details, q, oh, auto_repair)) diff --git a/libqpdf/QPDFObject.cc b/libqpdf/QPDFObject.cc index 382dd6c6..8b538021 100644 --- a/libqpdf/QPDFObject.cc +++ b/libqpdf/QPDFObject.cc @@ -1,47 +1,10 @@ #include <qpdf/QPDFObject.hh> -QPDFObject::QPDFObject() : - owning_qpdf(nullptr), - parsed_offset(-1) -{ -} - -std::shared_ptr<QPDFObject> -QPDFObject::do_create(QPDFObject* object) -{ - std::shared_ptr<QPDFObject> obj(object); - return obj; -} - -void -QPDFObject::setDescription(QPDF* qpdf, std::string const& description) -{ - this->owning_qpdf = qpdf; - this->object_description = description; -} - -bool -QPDFObject::getDescription(QPDF*& qpdf, std::string& description) -{ - qpdf = this->owning_qpdf; - description = this->object_description; - return this->owning_qpdf != nullptr; -} - -bool -QPDFObject::hasDescription() -{ - return this->owning_qpdf != nullptr; -} +#include <qpdf/QPDF.hh> void -QPDFObject::setParsedOffset(qpdf_offset_t offset) -{ - this->parsed_offset = offset; -} - -qpdf_offset_t -QPDFObject::getParsedOffset() +QPDFObject::doResolve() { - return this->parsed_offset; + auto og = value->og; + QPDF::Resolver::resolve(value->qpdf, og); } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 135b7c39..19a85034 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -8,6 +8,7 @@ #include <qpdf/QPDFLogger.hh> #include <qpdf/QPDFMatrix.hh> #include <qpdf/QPDFPageObjectHelper.hh> +#include <qpdf/QPDFParser.hh> #include <qpdf/QPDF_Array.hh> #include <qpdf/QPDF_Bool.hh> #include <qpdf/QPDF_Dictionary.hh> @@ -20,6 +21,7 @@ #include <qpdf/QPDF_Reserved.hh> #include <qpdf/QPDF_Stream.hh> #include <qpdf/QPDF_String.hh> +#include <qpdf/QPDF_Unresolved.hh> #include <qpdf/SparseOHArray.hh> #include <qpdf/QIntC.hh> @@ -233,29 +235,6 @@ LastChar::getLastChar() return this->last_char; } -QPDFObjectHandle::QPDFObjectHandle() : - initialized(false), - qpdf(nullptr), - reserved(false) -{ -} - -QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, QPDFObjGen const& og) : - initialized(true), - qpdf(qpdf), - og(og), - reserved(false) -{ -} - -QPDFObjectHandle::QPDFObjectHandle(std::shared_ptr<QPDFObject> const& data) : - initialized(true), - qpdf(nullptr), - obj(data), - reserved(false) -{ -} - void QPDFObjectHandle::releaseResolved() { @@ -272,26 +251,6 @@ QPDFObjectHandle::releaseResolved() } } -void -QPDFObjectHandle::setObjectDescriptionFromInput( - QPDFObjectHandle object, - QPDF* context, - std::string const& description, - std::shared_ptr<InputSource> input, - qpdf_offset_t offset) -{ - object.setObjectDescription( - context, - (input->getName() + ", " + description + " at offset " + - QUtil::int_to_string(offset))); -} - -bool -QPDFObjectHandle::isInitialized() const -{ - return this->initialized; -} - QPDFObject::object_type_e QPDFObjectHandle::getTypeCode() { @@ -305,24 +264,90 @@ QPDFObjectHandle::getTypeName() return dereference() ? this->obj->getTypeName() : "uninitialized"; } -namespace +QPDF_Array* +QPDFObjectHandle::asArray() { - template <class T> - class QPDFObjectTypeAccessor - { - public: - static bool - check(std::shared_ptr<QPDFObject> const& o) - { - return (o && dynamic_cast<T const*>(o.get())); - } - }; -} // namespace + return dereference() ? obj->as<QPDF_Array>() : nullptr; +} + +QPDF_Bool* +QPDFObjectHandle::asBool() +{ + return dereference() ? obj->as<QPDF_Bool>() : nullptr; +} + +QPDF_Dictionary* +QPDFObjectHandle::asDictionary() +{ + return dereference() ? obj->as<QPDF_Dictionary>() : nullptr; +} + +QPDF_InlineImage* +QPDFObjectHandle::asInlineImage() +{ + return dereference() ? obj->as<QPDF_InlineImage>() : nullptr; +} + +QPDF_Integer* +QPDFObjectHandle::asInteger() +{ + return dereference() ? obj->as<QPDF_Integer>() : nullptr; +} + +QPDF_Name* +QPDFObjectHandle::asName() +{ + return dereference() ? obj->as<QPDF_Name>() : nullptr; +} + +QPDF_Null* +QPDFObjectHandle::asNull() +{ + return dereference() ? obj->as<QPDF_Null>() : nullptr; +} + +QPDF_Operator* +QPDFObjectHandle::asOperator() +{ + return dereference() ? obj->as<QPDF_Operator>() : nullptr; +} + +QPDF_Real* +QPDFObjectHandle::asReal() +{ + return dereference() ? obj->as<QPDF_Real>() : nullptr; +} + +QPDF_Reserved* +QPDFObjectHandle::asReserved() +{ + return dereference() ? obj->as<QPDF_Reserved>() : nullptr; +} + +QPDF_Stream* +QPDFObjectHandle::asStream() +{ + return dereference() ? obj->as<QPDF_Stream>() : nullptr; +} + +QPDF_Stream* +QPDFObjectHandle::asStreamWithAssert() +{ + auto stream = asStream(); + assertType("stream", stream); + return stream; +} + +QPDF_String* +QPDFObjectHandle::asString() +{ + return dereference() ? obj->as<QPDF_String>() : nullptr; +} bool QPDFObjectHandle::isBool() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Bool>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_boolean); } bool @@ -331,26 +356,26 @@ QPDFObjectHandle::isDirectNull() const // Don't call dereference() -- this is a const method, and we know // objid == 0, so there's nothing to resolve. return ( - this->initialized && (getObjectID() == 0) && - QPDFObjectTypeAccessor<QPDF_Null>::check(obj)); + isInitialized() && (getObjectID() == 0) && + (obj->getTypeCode() == QPDFObject::ot_null)); } bool QPDFObjectHandle::isNull() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Null>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_null); } bool QPDFObjectHandle::isInteger() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Integer>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_integer); } bool QPDFObjectHandle::isReal() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Real>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_real); } bool @@ -387,57 +412,49 @@ QPDFObjectHandle::getValueAsNumber(double& value) bool QPDFObjectHandle::isName() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Name>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_name); } bool QPDFObjectHandle::isString() { - return dereference() && QPDFObjectTypeAccessor<QPDF_String>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_string); } bool QPDFObjectHandle::isOperator() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Operator>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_operator); } bool QPDFObjectHandle::isInlineImage() { - return dereference() && - QPDFObjectTypeAccessor<QPDF_InlineImage>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_inlineimage); } bool QPDFObjectHandle::isArray() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Array>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_array); } bool QPDFObjectHandle::isDictionary() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_dictionary); } bool QPDFObjectHandle::isStream() { - return dereference() && QPDFObjectTypeAccessor<QPDF_Stream>::check(obj); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_stream); } bool QPDFObjectHandle::isReserved() { - // dereference will clear reserved if this has been replaced - return dereference() && this->reserved; -} - -bool -QPDFObjectHandle::isIndirect() -{ - return this->initialized && (getObjectID() != 0); + return dereference() && (obj->getTypeCode() == QPDFObject::ot_reserved); } bool @@ -475,8 +492,9 @@ QPDFObjectHandle::isStreamOfType( bool QPDFObjectHandle::getBoolValue() { - if (isBool()) { - return dynamic_cast<QPDF_Bool*>(obj.get())->getVal(); + auto boolean = asBool(); + if (boolean) { + return boolean->getVal(); } else { typeWarning("boolean", "returning false"); QTC::TC("qpdf", "QPDFObjectHandle boolean returning false"); @@ -487,10 +505,11 @@ QPDFObjectHandle::getBoolValue() bool QPDFObjectHandle::getValueAsBool(bool& value) { - if (!isBool()) { + auto boolean = asBool(); + if (boolean == nullptr) { return false; } - value = dynamic_cast<QPDF_Bool*>(obj.get())->getVal(); + value = boolean->getVal(); return true; } @@ -499,8 +518,9 @@ QPDFObjectHandle::getValueAsBool(bool& value) long long QPDFObjectHandle::getIntValue() { - if (isInteger()) { - return dynamic_cast<QPDF_Integer*>(obj.get())->getVal(); + auto integer = asInteger(); + if (integer) { + return integer->getVal(); } else { typeWarning("integer", "returning 0"); QTC::TC("qpdf", "QPDFObjectHandle integer returning 0"); @@ -511,10 +531,11 @@ QPDFObjectHandle::getIntValue() bool QPDFObjectHandle::getValueAsInt(long long& value) { - if (!isInteger()) { + auto integer = asInteger(); + if (integer == nullptr) { return false; } - value = dynamic_cast<QPDF_Integer*>(obj.get())->getVal(); + value = integer->getVal(); return true; } @@ -610,8 +631,9 @@ QPDFObjectHandle::getValueAsUInt(unsigned int& value) std::string QPDFObjectHandle::getRealValue() { - if (isReal()) { - return dynamic_cast<QPDF_Real*>(obj.get())->getVal(); + auto real = asReal(); + if (real) { + return real->getVal(); } else { typeWarning("real", "returning 0.0"); QTC::TC("qpdf", "QPDFObjectHandle real returning 0.0"); @@ -622,10 +644,11 @@ QPDFObjectHandle::getRealValue() bool QPDFObjectHandle::getValueAsReal(std::string& value) { - if (!isReal()) { + auto real = asReal(); + if (real == nullptr) { return false; } - value = dynamic_cast<QPDF_Real*>(obj.get())->getVal(); + value = real->getVal(); return true; } @@ -634,8 +657,9 @@ QPDFObjectHandle::getValueAsReal(std::string& value) std::string QPDFObjectHandle::getName() { - if (isName()) { - return dynamic_cast<QPDF_Name*>(obj.get())->getName(); + auto name = asName(); + if (name) { + return name->getName(); } else { typeWarning("name", "returning dummy name"); QTC::TC("qpdf", "QPDFObjectHandle name returning dummy name"); @@ -646,10 +670,11 @@ QPDFObjectHandle::getName() bool QPDFObjectHandle::getValueAsName(std::string& value) { - if (!isName()) { + auto name = asName(); + if (name == nullptr) { return false; } - value = dynamic_cast<QPDF_Name*>(obj.get())->getName(); + value = name->getName(); return true; } @@ -658,8 +683,9 @@ QPDFObjectHandle::getValueAsName(std::string& value) std::string QPDFObjectHandle::getStringValue() { - if (isString()) { - return dynamic_cast<QPDF_String*>(obj.get())->getVal(); + auto str = asString(); + if (str) { + return str->getVal(); } else { typeWarning("string", "returning empty string"); QTC::TC("qpdf", "QPDFObjectHandle string returning empty string"); @@ -670,18 +696,20 @@ QPDFObjectHandle::getStringValue() bool QPDFObjectHandle::getValueAsString(std::string& value) { - if (!isString()) { + auto str = asString(); + if (str == nullptr) { return false; } - value = dynamic_cast<QPDF_String*>(obj.get())->getVal(); + value = str->getVal(); return true; } std::string QPDFObjectHandle::getUTF8Value() { - if (isString()) { - return dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val(); + auto str = asString(); + if (str) { + return str->getUTF8Val(); } else { typeWarning("string", "returning empty string"); QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8"); @@ -692,10 +720,11 @@ QPDFObjectHandle::getUTF8Value() bool QPDFObjectHandle::getValueAsUTF8(std::string& value) { - if (!isString()) { + auto str = asString(); + if (str == nullptr) { return false; } - value = dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val(); + value = str->getUTF8Val(); return true; } @@ -704,8 +733,9 @@ QPDFObjectHandle::getValueAsUTF8(std::string& value) std::string QPDFObjectHandle::getOperatorValue() { - if (isOperator()) { - return dynamic_cast<QPDF_Operator*>(obj.get())->getVal(); + auto op = asOperator(); + if (op) { + return op->getVal(); } else { typeWarning("operator", "returning fake value"); QTC::TC("qpdf", "QPDFObjectHandle operator returning fake value"); @@ -716,18 +746,20 @@ QPDFObjectHandle::getOperatorValue() bool QPDFObjectHandle::getValueAsOperator(std::string& value) { - if (!isOperator()) { + auto op = asOperator(); + if (op == nullptr) { return false; } - value = dynamic_cast<QPDF_Operator*>(obj.get())->getVal(); + value = op->getVal(); return true; } std::string QPDFObjectHandle::getInlineImageValue() { - if (isInlineImage()) { - return dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal(); + auto image = asInlineImage(); + if (image) { + return image->getVal(); } else { typeWarning("inlineimage", "returning empty data"); QTC::TC("qpdf", "QPDFObjectHandle inlineimage returning empty data"); @@ -738,10 +770,11 @@ QPDFObjectHandle::getInlineImageValue() bool QPDFObjectHandle::getValueAsInlineImage(std::string& value) { - if (!isInlineImage()) { + auto image = asInlineImage(); + if (image == nullptr) { return false; } - value = dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal(); + value = image->getVal(); return true; } @@ -756,8 +789,9 @@ QPDFObjectHandle::aitems() int QPDFObjectHandle::getArrayNItems() { - if (isArray()) { - return dynamic_cast<QPDF_Array*>(obj.get())->getNItems(); + auto array = asArray(); + if (array) { + return array->getNItems(); } else { typeWarning("array", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle array treating as empty"); @@ -769,11 +803,12 @@ QPDFObjectHandle QPDFObjectHandle::getArrayItem(int n) { QPDFObjectHandle result; - if (isArray() && (n < getArrayNItems()) && (n >= 0)) { - result = dynamic_cast<QPDF_Array*>(obj.get())->getItem(n); + auto array = asArray(); + if (array && (n < array->getNItems()) && (n >= 0)) { + result = array->getItem(n); } else { result = newNull(); - if (isArray()) { + if (array) { objectWarning("returning null for out of bounds array access"); QTC::TC("qpdf", "QPDFObjectHandle array bounds"); } else { @@ -782,7 +817,7 @@ QPDFObjectHandle::getArrayItem(int n) } QPDF* context = nullptr; std::string description; - if (this->obj->getDescription(context, description)) { + if (obj->getDescription(context, description)) { result.setObjectDescription( context, description + " -> null returned from invalid array access"); @@ -794,14 +829,12 @@ QPDFObjectHandle::getArrayItem(int n) bool QPDFObjectHandle::isRectangle() { - if (!isArray()) { - return false; - } - if (getArrayNItems() != 4) { + auto array = asArray(); + if ((array == nullptr) || (array->getNItems() != 4)) { return false; } for (int i = 0; i < 4; ++i) { - if (!getArrayItem(i).isNumber()) { + if (!array->getItem(i).isNumber()) { return false; } } @@ -811,14 +844,12 @@ QPDFObjectHandle::isRectangle() bool QPDFObjectHandle::isMatrix() { - if (!isArray()) { - return false; - } - if (getArrayNItems() != 6) { + auto array = asArray(); + if ((array == nullptr) || (array->getNItems() != 6)) { return false; } for (int i = 0; i < 6; ++i) { - if (!getArrayItem(i).isNumber()) { + if (!array->getItem(i).isNumber()) { return false; } } @@ -830,13 +861,14 @@ QPDFObjectHandle::getArrayAsRectangle() { Rectangle result; if (isRectangle()) { + auto array = asArray(); // Rectangle coordinates are always supposed to be llx, lly, // urx, ury, but files have been found in the wild where // llx > urx or lly > ury. - double i0 = getArrayItem(0).getNumericValue(); - double i1 = getArrayItem(1).getNumericValue(); - double i2 = getArrayItem(2).getNumericValue(); - double i3 = getArrayItem(3).getNumericValue(); + double i0 = array->getItem(0).getNumericValue(); + double i1 = array->getItem(1).getNumericValue(); + double i2 = array->getItem(2).getNumericValue(); + double i3 = array->getItem(3).getNumericValue(); result = Rectangle( std::min(i0, i2), std::min(i1, i3), @@ -851,13 +883,14 @@ QPDFObjectHandle::getArrayAsMatrix() { Matrix result; if (isMatrix()) { + auto array = asArray(); result = Matrix( - getArrayItem(0).getNumericValue(), - getArrayItem(1).getNumericValue(), - getArrayItem(2).getNumericValue(), - getArrayItem(3).getNumericValue(), - getArrayItem(4).getNumericValue(), - getArrayItem(5).getNumericValue()); + array->getItem(0).getNumericValue(), + array->getItem(1).getNumericValue(), + array->getItem(2).getNumericValue(), + array->getItem(3).getNumericValue(), + array->getItem(4).getNumericValue(), + array->getItem(5).getNumericValue()); } return result; } @@ -866,8 +899,9 @@ std::vector<QPDFObjectHandle> QPDFObjectHandle::getArrayAsVector() { std::vector<QPDFObjectHandle> result; - if (isArray()) { - dynamic_cast<QPDF_Array*>(obj.get())->getAsVector(result); + auto array = asArray(); + if (array) { + array->getAsVector(result); } else { typeWarning("array", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle array treating as empty vector"); @@ -880,9 +914,10 @@ QPDFObjectHandle::getArrayAsVector() void QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item) { - if (isArray()) { + auto array = asArray(); + if (array) { checkOwnership(item); - dynamic_cast<QPDF_Array*>(obj.get())->setItem(n, item); + array->setItem(n, item); } else { typeWarning("array", "ignoring attempt to set item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring set item"); @@ -892,11 +927,12 @@ QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item) void QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items) { - if (isArray()) { + auto array = asArray(); + if (array) { for (auto const& item: items) { checkOwnership(item); } - dynamic_cast<QPDF_Array*>(obj.get())->setFromVector(items); + array->setFromVector(items); } else { typeWarning("array", "ignoring attempt to replace items"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring replace items"); @@ -906,8 +942,9 @@ QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items) void QPDFObjectHandle::insertItem(int at, QPDFObjectHandle const& item) { - if (isArray()) { - dynamic_cast<QPDF_Array*>(obj.get())->insertItem(at, item); + auto array = asArray(); + if (array) { + array->insertItem(at, item); } else { typeWarning("array", "ignoring attempt to insert item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring insert item"); @@ -924,9 +961,10 @@ QPDFObjectHandle::insertItemAndGetNew(int at, QPDFObjectHandle const& item) void QPDFObjectHandle::appendItem(QPDFObjectHandle const& item) { - if (isArray()) { + auto array = asArray(); + if (array) { checkOwnership(item); - dynamic_cast<QPDF_Array*>(obj.get())->appendItem(item); + array->appendItem(item); } else { typeWarning("array", "ignoring attempt to append item"); QTC::TC("qpdf", "QPDFObjectHandle array ignoring append item"); @@ -943,10 +981,11 @@ QPDFObjectHandle::appendItemAndGetNew(QPDFObjectHandle const& item) void QPDFObjectHandle::eraseItem(int at) { - if (isArray() && (at < getArrayNItems()) && (at >= 0)) { - dynamic_cast<QPDF_Array*>(obj.get())->eraseItem(at); + auto array = asArray(); + if (array && (at < array->getNItems()) && (at >= 0)) { + array->eraseItem(at); } else { - if (isArray()) { + if (array) { objectWarning("ignoring attempt to erase out of bounds array item"); QTC::TC("qpdf", "QPDFObjectHandle erase array bounds"); } else { @@ -960,8 +999,9 @@ QPDFObjectHandle QPDFObjectHandle::eraseItemAndGetOld(int at) { auto result = QPDFObjectHandle::newNull(); - if (isArray() && (at < getArrayNItems()) && (at >= 0)) { - result = getArrayItem(at); + auto array = asArray(); + if (array && (at < array->getNItems()) && (at >= 0)) { + result = array->getItem(at); } eraseItem(at); return result; @@ -978,8 +1018,9 @@ QPDFObjectHandle::ditems() bool QPDFObjectHandle::hasKey(std::string const& key) { - if (isDictionary()) { - return dynamic_cast<QPDF_Dictionary*>(obj.get())->hasKey(key); + auto dict = asDictionary(); + if (dict) { + return dict->hasKey(key); } else { typeWarning( "dictionary", "returning false for a key containment request"); @@ -992,15 +1033,16 @@ QPDFObjectHandle QPDFObjectHandle::getKey(std::string const& key) { QPDFObjectHandle result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKey(key); + auto dict = asDictionary(); + if (dict) { + result = dict->getKey(key); } else { typeWarning("dictionary", "returning null for attempted key retrieval"); QTC::TC("qpdf", "QPDFObjectHandle dictionary null for getKey"); result = newNull(); QPDF* qpdf = nullptr; std::string description; - if (this->obj->getDescription(qpdf, description)) { + if (obj->getDescription(qpdf, description)) { result.setObjectDescription( qpdf, (description + " -> null returned from getting key " + key + @@ -1020,8 +1062,9 @@ std::set<std::string> QPDFObjectHandle::getKeys() { std::set<std::string> result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKeys(); + auto dict = asDictionary(); + if (dict) { + result = dict->getKeys(); } else { typeWarning("dictionary", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle dictionary empty set for getKeys"); @@ -1033,8 +1076,9 @@ std::map<std::string, QPDFObjectHandle> QPDFObjectHandle::getDictAsMap() { std::map<std::string, QPDFObjectHandle> result; - if (isDictionary()) { - result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getAsMap(); + auto dict = asDictionary(); + if (dict) { + result = dict->getAsMap(); } else { typeWarning("dictionary", "treating as empty"); QTC::TC("qpdf", "QPDFObjectHandle dictionary empty map for asMap"); @@ -1219,23 +1263,16 @@ QPDFObjectHandle::getUniqueResourceName( " QPDFObjectHandle::getUniqueResourceName"); } -// Indirect object accessors -QPDF* -QPDFObjectHandle::getOwningQPDF() -{ - // Will be null for direct objects - return this->qpdf; -} - // Dictionary mutators void QPDFObjectHandle::replaceKey( std::string const& key, QPDFObjectHandle const& value) { - if (isDictionary()) { + auto dict = asDictionary(); + if (dict) { checkOwnership(value); - dynamic_cast<QPDF_Dictionary*>(obj.get())->replaceKey(key, value); + dict->replaceKey(key, value); } else { typeWarning("dictionary", "ignoring key replacement request"); QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring replaceKey"); @@ -1262,8 +1299,9 @@ QPDFObjectHandle::replaceKeyAndGetOld( void QPDFObjectHandle::removeKey(std::string const& key) { - if (isDictionary()) { - dynamic_cast<QPDF_Dictionary*>(obj.get())->removeKey(key); + auto dict = asDictionary(); + if (dict) { + dict->removeKey(key); } else { typeWarning("dictionary", "ignoring key removal request"); QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removeKey"); @@ -1274,8 +1312,9 @@ QPDFObjectHandle QPDFObjectHandle::removeKeyAndGetOld(std::string const& key) { auto result = QPDFObjectHandle::newNull(); - if (isDictionary()) { - result = getKey(key); + auto dict = asDictionary(); + if (dict) { + result = dict->getKey(key); } removeKey(key); return result; @@ -1292,50 +1331,43 @@ QPDFObjectHandle::replaceOrRemoveKey( QPDFObjectHandle QPDFObjectHandle::getDict() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getDict(); + return asStreamWithAssert()->getDict(); } void QPDFObjectHandle::setFilterOnWrite(bool val) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->setFilterOnWrite(val); + asStreamWithAssert()->setFilterOnWrite(val); } bool QPDFObjectHandle::getFilterOnWrite() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getFilterOnWrite(); + return asStreamWithAssert()->getFilterOnWrite(); } bool QPDFObjectHandle::isDataModified() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->isDataModified(); + return asStreamWithAssert()->isDataModified(); } void QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceDict(new_dict); + asStreamWithAssert()->replaceDict(new_dict); } std::shared_ptr<Buffer> QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamData(level); + return asStreamWithAssert()->getStreamData(level); } std::shared_ptr<Buffer> QPDFObjectHandle::getRawStreamData() { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getRawStreamData(); + return asStreamWithAssert()->getRawStreamData(); } bool @@ -1347,8 +1379,7 @@ QPDFObjectHandle::pipeStreamData( bool suppress_warnings, bool will_retry) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData( + return asStreamWithAssert()->pipeStreamData( p, filtering_attempted, encode_flags, @@ -1365,9 +1396,8 @@ QPDFObjectHandle::pipeStreamData( bool suppress_warnings, bool will_retry) { - assertStream(); bool filtering_attempted; - dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData( + asStreamWithAssert()->pipeStreamData( p, &filtering_attempted, encode_flags, @@ -1401,9 +1431,7 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - data, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(data, filter, decode_parms); } void @@ -1412,14 +1440,12 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto b = std::make_shared<Buffer>(data.length()); unsigned char* bp = b->getBuffer(); if (bp) { memcpy(bp, data.c_str(), data.length()); } - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - b, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(b, filter, decode_parms); } void @@ -1428,9 +1454,7 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - provider, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(provider, filter, decode_parms); } namespace @@ -1479,11 +1503,9 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - sdp, filter, decode_parms); + asStreamWithAssert()->replaceStreamData(sdp, filter, decode_parms); } void @@ -1492,29 +1514,9 @@ QPDFObjectHandle::replaceStreamData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms) { - assertStream(); auto sdp = std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider)); - dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData( - sdp, filter, decode_parms); -} - -QPDFObjGen -QPDFObjectHandle::getObjGen() const -{ - return og; -} - -int -QPDFObjectHandle::getObjectID() const -{ - return og.getObj(); -} - -int -QPDFObjectHandle::getGeneration() const -{ - return og.getGen(); + asStreamWithAssert()->replaceStreamData(sdp, filter, decode_parms); } std::map<std::string, QPDFObjectHandle> @@ -1529,10 +1531,11 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( { all_description = description; std::vector<QPDFObjectHandle> result; - if (isArray()) { - int n_items = getArrayNItems(); + auto array = asArray(); + if (array) { + int n_items = array->getNItems(); for (int i = 0; i < n_items; ++i) { - QPDFObjectHandle item = getArrayItem(i); + QPDFObjectHandle item = array->getItem(i); if (item.isStream()) { result.push_back(item); } else { @@ -1664,16 +1667,15 @@ QPDFObjectHandle::coalesceContentStreams() // files may have pages that are invalid in other ways. return; } - QPDF* qpdf = getOwningQPDF(); - if (qpdf == nullptr) { - // Should not be possible for a page object to not have an - // owning PDF unless it was manually constructed in some - // incorrect way. However, it can happen in a PDF file whose - // page structure is direct, which is against spec but still - // possible to hand construct, as in fuzz issue 27393. - throw std::runtime_error("coalesceContentStreams called on object" - " with no associated PDF file"); - } + // Should not be possible for a page object to not have an + // owning PDF unless it was manually constructed in some + // incorrect way. However, it can happen in a PDF file whose + // page structure is direct, which is against spec but still + // possible to hand construct, as in fuzz issue 27393. + QPDF* qpdf = getOwningQPDF( + false, + "coalesceContentStreams called on object with no associated PDF file"); + QPDFObjectHandle new_contents = newStream(qpdf); this->replaceKey("/Contents", new_contents); @@ -1700,18 +1702,16 @@ QPDFObjectHandle::unparseResolved() if (!dereference()) { throw std::logic_error( "attempted to dereference an uninitialized QPDFObjectHandle"); - } else if (this->reserved) { - throw std::logic_error( - "QPDFObjectHandle: attempting to unparse a reserved object"); } - return this->obj->unparse(); + return obj->unparse(); } std::string QPDFObjectHandle::unparseBinary() { - if (this->isString()) { - return dynamic_cast<QPDF_String*>(this->obj.get())->unparse(true); + auto str = asString(); + if (str) { + return str->unparse(true); } else { return unparse(); } @@ -1727,16 +1727,13 @@ QPDFObjectHandle::getJSON(bool dereference_indirect) JSON QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) { - if ((!dereference_indirect) && this->isIndirect()) { + if ((!dereference_indirect) && isIndirect()) { return JSON::makeString(unparse()); } else if (!dereference()) { throw std::logic_error( "attempted to dereference an uninitialized QPDFObjectHandle"); - } else if (this->reserved) { - throw std::logic_error( - "QPDFObjectHandle: attempting to unparse a reserved object"); } else { - return this->obj->getJSON(json_version); + return obj->getJSON(json_version); } } @@ -1748,8 +1745,7 @@ QPDFObjectHandle::getStreamJSON( Pipeline* p, std::string const& data_filename) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON( + return asStreamWithAssert()->getStreamJSON( json_version, json_data, decode_level, p, data_filename); } @@ -1918,8 +1914,8 @@ QPDFObjectHandle::parseContentStream_data( tokenizer.readToken(input, "content", true); qpdf_offset_t offset = input->getLastOffset(); input->seek(offset, SEEK_SET); - QPDFObjectHandle obj = parseInternal( - input, "content", tokenizer, empty, nullptr, context, true); + auto obj = QPDFParser(input, "content", tokenizer, nullptr, context) + .parse(empty, true); if (!obj.isInitialized()) { // EOF break; @@ -1969,8 +1965,7 @@ QPDFObjectHandle::addContentTokenFilter(std::shared_ptr<TokenFilter> filter) void QPDFObjectHandle::addTokenFilter(std::shared_ptr<TokenFilter> filter) { - assertStream(); - return dynamic_cast<QPDF_Stream*>(obj.get())->addTokenFilter(filter); + return asStreamWithAssert()->addTokenFilter(filter); } QPDFObjectHandle @@ -1982,497 +1977,8 @@ QPDFObjectHandle::parse( StringDecrypter* decrypter, QPDF* context) { - return parseInternal( - input, object_description, tokenizer, empty, decrypter, context, false); -} - -QPDFObjectHandle -QPDFObjectHandle::parseInternal( - std::shared_ptr<InputSource> input, - std::string const& object_description, - QPDFTokenizer& tokenizer, - bool& empty, - StringDecrypter* decrypter, - QPDF* context, - bool content_stream) -{ - // This method must take care not to resolve any objects. Don't - // check the type of any object without first ensuring that it is - // a direct object. Otherwise, doing so may have the side effect - // of reading the object and changing the file pointer. If you do - // this, it will cause a logic error to be thrown from - // QPDF::inParse(). - - QPDF::ParseGuard pg(context); - - empty = false; - - QPDFObjectHandle object; - bool set_offset = false; - - std::vector<SparseOHArray> olist_stack; - olist_stack.push_back(SparseOHArray()); - std::vector<parser_state_e> state_stack; - state_stack.push_back(st_top); - std::vector<qpdf_offset_t> offset_stack; - qpdf_offset_t offset = input->tell(); - offset_stack.push_back(offset); - bool done = false; - int bad_count = 0; - int good_count = 0; - bool b_contents = false; - std::vector<std::string> contents_string_stack; - contents_string_stack.push_back(""); - std::vector<qpdf_offset_t> contents_offset_stack; - contents_offset_stack.push_back(-1); - while (!done) { - bool bad = false; - SparseOHArray& olist = olist_stack.back(); - parser_state_e state = state_stack.back(); - offset = offset_stack.back(); - std::string& contents_string = contents_string_stack.back(); - qpdf_offset_t& contents_offset = contents_offset_stack.back(); - - object = QPDFObjectHandle(); - set_offset = false; - - QPDFTokenizer::Token token = - tokenizer.readToken(input, object_description, true); - std::string const& token_error_message = token.getErrorMessage(); - if (!token_error_message.empty()) { - // Tokens other than tt_bad can still generate warnings. - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - token_error_message)); - } - - switch (token.getType()) { - case QPDFTokenizer::tt_eof: - if (!content_stream) { - QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unexpected EOF")); - } - bad = true; - state = st_eof; - break; - - case QPDFTokenizer::tt_bad: - QTC::TC("qpdf", "QPDFObjectHandle bad token in parse"); - bad = true; - object = newNull(); - break; - - case QPDFTokenizer::tt_brace_open: - case QPDFTokenizer::tt_brace_close: - QTC::TC("qpdf", "QPDFObjectHandle bad brace"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unexpected brace token as null")); - bad = true; - object = newNull(); - break; - - case QPDFTokenizer::tt_array_close: - if (state == st_array) { - state = st_stop; - } else { - QTC::TC("qpdf", "QPDFObjectHandle bad array close"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unexpected array close token as null")); - bad = true; - object = newNull(); - } - break; - - case QPDFTokenizer::tt_dict_close: - if (state == st_dictionary) { - state = st_stop; - } else { - QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unexpected dictionary close token")); - bad = true; - object = newNull(); - } - break; - - case QPDFTokenizer::tt_array_open: - case QPDFTokenizer::tt_dict_open: - if (olist_stack.size() > 500) { - QTC::TC("qpdf", "QPDFObjectHandle too deep"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "ignoring excessively deeply nested data structure")); - bad = true; - object = newNull(); - state = st_top; - } else { - olist_stack.push_back(SparseOHArray()); - state = st_start; - offset_stack.push_back(input->tell()); - state_stack.push_back( - (token.getType() == QPDFTokenizer::tt_array_open) - ? st_array - : st_dictionary); - b_contents = false; - contents_string_stack.push_back(""); - contents_offset_stack.push_back(-1); - } - break; - - case QPDFTokenizer::tt_bool: - object = newBool((token.getValue() == "true")); - break; - - case QPDFTokenizer::tt_null: - object = newNull(); - break; - - case QPDFTokenizer::tt_integer: - object = newInteger(QUtil::string_to_ll(token.getValue().c_str())); - break; - - case QPDFTokenizer::tt_real: - object = newReal(token.getValue()); - break; - - case QPDFTokenizer::tt_name: - { - std::string name = token.getValue(); - object = newName(name); - - if (name == "/Contents") { - b_contents = true; - } else { - b_contents = false; - } - } - break; - - case QPDFTokenizer::tt_word: - { - std::string const& value = token.getValue(); - if (content_stream) { - object = QPDFObjectHandle::newOperator(value); - } else if ( - (value == "R") && (state != st_top) && - (olist.size() >= 2) && - (!olist.at(olist.size() - 1).isIndirect()) && - (olist.at(olist.size() - 1).isInteger()) && - (!olist.at(olist.size() - 2).isIndirect()) && - (olist.at(olist.size() - 2).isInteger())) { - if (context == nullptr) { - QTC::TC( - "qpdf", - "QPDFObjectHandle indirect without context"); - throw std::logic_error( - "QPDFObjectHandle::parse called without context" - " on an object with indirect references"); - } - // Try to resolve indirect objects - object = newIndirect( - context, - QPDFObjGen( - olist.at(olist.size() - 2).getIntValueAsInt(), - olist.at(olist.size() - 1).getIntValueAsInt())); - olist.remove_last(); - olist.remove_last(); - } else if ((value == "endobj") && (state == st_top)) { - // We just saw endobj without having read - // anything. Treat this as a null and do not move - // the input source's offset. - object = newNull(); - input->seek(input->getLastOffset(), SEEK_SET); - empty = true; - } else { - QTC::TC("qpdf", "QPDFObjectHandle treat word as string"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "unknown token while reading object;" - " treating as string")); - bad = true; - object = newString(value); - } - } - break; - - case QPDFTokenizer::tt_string: - { - std::string val = token.getValue(); - if (decrypter) { - if (b_contents) { - contents_string = val; - contents_offset = input->getLastOffset(); - b_contents = false; - } - decrypter->decryptString(val); - } - object = QPDFObjectHandle::newString(val); - } - - break; - - default: - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "treating unknown token type as null while " - "reading object")); - bad = true; - object = newNull(); - break; - } - - if ((!object.isInitialized()) && - (!((state == st_start) || (state == st_stop) || - (state == st_eof)))) { - throw std::logic_error("QPDFObjectHandle::parseInternal: " - "unexpected uninitialized object"); - object = newNull(); - } - - if (bad) { - ++bad_count; - good_count = 0; - } else { - ++good_count; - if (good_count > 3) { - bad_count = 0; - } - } - if (bad_count > 5) { - // We had too many consecutive errors without enough - // intervening successful objects. Give up. - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "too many errors; giving up on reading object")); - state = st_top; - object = newNull(); - } - - switch (state) { - case st_eof: - if (state_stack.size() > 1) { - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - input->getLastOffset(), - "parse error while reading object")); - } - done = true; - // In content stream mode, leave object uninitialized to - // indicate EOF - if (!content_stream) { - object = newNull(); - } - break; - - case st_dictionary: - case st_array: - setObjectDescriptionFromInput( - object, - context, - object_description, - input, - input->getLastOffset()); - object.setParsedOffset(input->getLastOffset()); - set_offset = true; - olist.append(object); - break; - - case st_top: - done = true; - break; - - case st_start: - break; - - case st_stop: - if ((state_stack.size() < 2) || (olist_stack.size() < 2)) { - throw std::logic_error( - "QPDFObjectHandle::parseInternal: st_stop encountered" - " with insufficient elements in stack"); - } - parser_state_e old_state = state_stack.back(); - state_stack.pop_back(); - if (old_state == st_array) { - // There's no newArray(SparseOHArray) since - // SparseOHArray is not part of the public API. - object = QPDFObjectHandle(QPDF_Array::create(olist)); - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - // The `offset` points to the next of "[". Set the - // rewind offset to point to the beginning of "[". - // This has been explicitly tested with whitespace - // surrounding the array start delimiter. - // getLastOffset points to the array end token and - // therefore can't be used here. - object.setParsedOffset(offset - 1); - set_offset = true; - } else if (old_state == st_dictionary) { - // Convert list to map. Alternating elements are keys. - // Attempt to recover more or less gracefully from - // invalid dictionaries. - std::set<std::string> names; - size_t n_elements = olist.size(); - for (size_t i = 0; i < n_elements; ++i) { - QPDFObjectHandle oh = olist.at(i); - if ((!oh.isIndirect()) && oh.isName()) { - names.insert(oh.getName()); - } - } - - std::map<std::string, QPDFObjectHandle> dict; - int next_fake_key = 1; - for (unsigned int i = 0; i < olist.size(); ++i) { - QPDFObjectHandle key_obj = olist.at(i); - QPDFObjectHandle val; - if (key_obj.isIndirect() || (!key_obj.isName())) { - bool found_fake = false; - std::string candidate; - while (!found_fake) { - candidate = "/QPDFFake" + - QUtil::int_to_string(next_fake_key++); - found_fake = (names.count(candidate) == 0); - QTC::TC( - "qpdf", - "QPDFObjectHandle found fake", - (found_fake ? 0 : 1)); - } - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "expected dictionary key but found" - " non-name object; inserting key " + - candidate)); - val = key_obj; - key_obj = newName(candidate); - } else if (i + 1 >= olist.size()) { - QTC::TC("qpdf", "QPDFObjectHandle no val for last key"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "dictionary ended prematurely; " - "using null as value for last key")); - val = newNull(); - setObjectDescriptionFromInput( - val, context, object_description, input, offset); - } else { - val = olist.at(++i); - } - std::string key = key_obj.getName(); - if (dict.count(key) > 0) { - QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key"); - warn( - context, - QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - object_description, - offset, - "dictionary has duplicated key " + key + - "; last occurrence overrides earlier " - "ones")); - } - dict[key] = val; - } - if (!contents_string.empty() && dict.count("/Type") && - dict["/Type"].isNameAndEquals("/Sig") && - dict.count("/ByteRange") && dict.count("/Contents") && - dict["/Contents"].isString()) { - dict["/Contents"] = - QPDFObjectHandle::newString(contents_string); - dict["/Contents"].setParsedOffset(contents_offset); - } - object = newDictionary(dict); - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - // The `offset` points to the next of "<<". Set the - // rewind offset to point to the beginning of "<<". - // This has been explicitly tested with whitespace - // surrounding the dictionary start delimiter. - // getLastOffset points to the dictionary end token - // and therefore can't be used here. - object.setParsedOffset(offset - 2); - set_offset = true; - } - olist_stack.pop_back(); - offset_stack.pop_back(); - if (state_stack.back() == st_top) { - done = true; - } else { - olist_stack.back().append(object); - } - contents_string_stack.pop_back(); - contents_offset_stack.pop_back(); - } - } - - if (!set_offset) { - setObjectDescriptionFromInput( - object, context, object_description, input, offset); - object.setParsedOffset(offset); - } - return object; + return QPDFParser(input, object_description, tokenizer, decrypter, context) + .parse(empty, false); } qpdf_offset_t @@ -2485,31 +1991,6 @@ QPDFObjectHandle::getParsedOffset() } } -void -QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset) -{ - // This is called during parsing on newly created direct objects, - // so we can't call dereference() here. - if (this->obj.get()) { - this->obj->setParsedOffset(offset); - } -} - -QPDFObjectHandle -QPDFObjectHandle::newIndirect(QPDF* qpdf, QPDFObjGen const& og) -{ - if (!og.isIndirect()) { - // Special case: QPDF uses objid 0 as a sentinel for direct - // objects, and the PDF specification doesn't allow for object - // 0. Treat indirect references to object 0 as null so that we - // never create an indirect object with objid 0. - QTC::TC("qpdf", "QPDFObjectHandle indirect with 0 objid"); - return newNull(); - } - - return QPDFObjectHandle(qpdf, og); -} - QPDFObjectHandle QPDFObjectHandle::newBool(bool value) { @@ -2679,8 +2160,7 @@ QPDFObjectHandle::newStream(QPDF* qpdf) QPDFObjectHandle stream_dict = newDictionary(); QPDFObjectHandle result = qpdf->makeIndirectObject(QPDFObjectHandle( QPDF_Stream::create(qpdf, QPDFObjGen(), stream_dict, 0, 0))); - result.dereference(); - QPDF_Stream* stream = dynamic_cast<QPDF_Stream*>(result.obj.get()); + auto stream = result.asStream(); stream->setObjGen(result.getObjGen()); return result; } @@ -2706,18 +2186,7 @@ QPDFObjectHandle::newStream(QPDF* qpdf, std::string const& data) QPDFObjectHandle QPDFObjectHandle::newReserved(QPDF* qpdf) { - // Reserve a spot for this object by assigning it an object - // number, but then return an unresolved handle to the object. - QPDFObjectHandle reserved = qpdf->makeIndirectObject(makeReserved()); - QPDFObjectHandle result = newIndirect(qpdf, reserved.getObjGen()); - result.reserved = true; - return result; -} - -QPDFObjectHandle -QPDFObjectHandle::makeReserved() -{ - return QPDFObjectHandle(QPDF_Reserved::create()); + return qpdf->makeIndirectObject(QPDFObjectHandle(QPDF_Reserved::create())); } void @@ -2763,12 +2232,7 @@ QPDFObjectHandle::shallowCopyInternal( QTC::TC("qpdf", "QPDFObjectHandle ERR shallow copy stream"); throw std::runtime_error("attempt to make a shallow copy of a stream"); } - - if (isArray() || isDictionary()) { - new_obj = QPDFObjectHandle(obj->shallowCopy()); - } else { - new_obj = *this; - } + new_obj = QPDFObjectHandle(obj->shallowCopy()); std::set<QPDFObjGen> visited; new_obj.copyObject(visited, false, first_level_only, false); @@ -2809,9 +2273,6 @@ QPDFObjectHandle::copyObject( " reserved object handle direct"); } - qpdf = nullptr; - og = QPDFObjGen(); - std::shared_ptr<QPDFObject> new_obj; if (isBool() || isInteger() || isName() || isNull() || isReal() || @@ -2819,9 +2280,10 @@ QPDFObjectHandle::copyObject( new_obj = obj->shallowCopy(); } else if (isArray()) { std::vector<QPDFObjectHandle> items; - int n = getArrayNItems(); + auto array = asArray(); + int n = array->getNItems(); for (int i = 0; i < n; ++i) { - items.push_back(getArrayItem(i)); + items.push_back(array->getItem(i)); if ((!first_level_only) && (cross_indirect || (!items.back().isIndirect()))) { items.back().copyObject( @@ -2831,8 +2293,9 @@ QPDFObjectHandle::copyObject( new_obj = QPDF_Array::create(items); } else if (isDictionary()) { std::map<std::string, QPDFObjectHandle> items; + auto dict = asDictionary(); for (auto const& key: getKeys()) { - items[key] = getKey(key); + items[key] = dict->getKey(key); if ((!first_level_only) && (cross_indirect || (!items[key].isIndirect()))) { items[key].copyObject( @@ -2880,7 +2343,7 @@ QPDFObjectHandle::makeDirect(bool allow_streams) void QPDFObjectHandle::assertInitialized() const { - if (!this->initialized) { + if (!isInitialized()) { throw std::logic_error("operation attempted on uninitialized " "QPDFObjectHandle"); } @@ -3095,8 +2558,9 @@ QPDFObjectHandle::isImage(bool exclude_imagemask) void QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const { - if ((this->qpdf != nullptr) && (item.qpdf != nullptr) && - (this->qpdf != item.qpdf)) { + auto qpdf = getOwningQPDF(); + auto item_qpdf = item.getOwningQPDF(); + if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { QTC::TC("qpdf", "QPDFObjectHandle check ownership"); throw std::logic_error( "Attempting to add an object from a different QPDF." @@ -3115,28 +2579,10 @@ QPDFObjectHandle::assertPageObject() bool QPDFObjectHandle::dereference() { - if (!this->initialized) { + if (!isInitialized()) { return false; } - if (this->obj.get() && getObjectID() && - QPDF::Resolver::objectChanged(this->qpdf, getObjGen(), this->obj)) { - this->obj = nullptr; - } - if (this->obj.get() == nullptr) { - std::shared_ptr<QPDFObject> obj = - QPDF::Resolver::resolve(this->qpdf, getObjGen()); - if (obj.get() == nullptr) { - // QPDF::resolve never returns an uninitialized object, but - // check just in case. - this->obj = QPDF_Null::create(); - } else if (dynamic_cast<QPDF_Reserved*>(obj.get())) { - // Do not resolve - this->reserved = true; - } else { - this->reserved = false; - this->obj = obj; - } - } + this->obj->resolve(); return true; } diff --git a/libqpdf/QPDFOutlineDocumentHelper.cc b/libqpdf/QPDFOutlineDocumentHelper.cc index 4149ea1e..5b2f71f6 100644 --- a/libqpdf/QPDFOutlineDocumentHelper.cc +++ b/libqpdf/QPDFOutlineDocumentHelper.cc @@ -82,7 +82,7 @@ QPDFOutlineDocumentHelper::resolveNamedDest(QPDFObjectHandle name) result = this->m->dest_dict.getKey(name.getName()); } } else if (name.isString()) { - if (nullptr == this->m->names_dest.get()) { + if (nullptr == this->m->names_dest) { QPDFObjectHandle names = this->qpdf.getRoot().getKey("/Names"); if (names.isDictionary()) { QPDFObjectHandle dests = names.getKey("/Dests"); diff --git a/libqpdf/QPDFPageLabelDocumentHelper.cc b/libqpdf/QPDFPageLabelDocumentHelper.cc index 805dc40f..08a35097 100644 --- a/libqpdf/QPDFPageLabelDocumentHelper.cc +++ b/libqpdf/QPDFPageLabelDocumentHelper.cc @@ -16,7 +16,7 @@ QPDFPageLabelDocumentHelper::QPDFPageLabelDocumentHelper(QPDF& qpdf) : bool QPDFPageLabelDocumentHelper::hasPageLabels() { - return nullptr != this->m->labels.get(); + return nullptr != this->m->labels; } QPDFObjectHandle diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 9ad75cf8..23a54231 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -432,7 +432,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) this->oh.replaceKey( "/Contents", QPDFObjectHandle::newStream( - this->oh.getOwningQPDF(), b.getBufferSharedPointer())); + this->oh.getOwningQPDF(false), + b.getBufferSharedPointer())); } } } else { @@ -683,11 +684,10 @@ QPDFPageObjectHelper::removeUnreferencedResources() QPDFPageObjectHelper QPDFPageObjectHelper::shallowCopyPage() { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::shallowCopyPage" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::shallowCopyPage called with a direct object"); + QPDFObjectHandle new_page = this->oh.shallowCopy(); return QPDFPageObjectHelper(qpdf->makeIndirectObject(new_page)); } @@ -743,11 +743,10 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) QPDFObjectHandle QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::getFormXObjectForPage" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::getFormXObjectForPage called with a direct " + "object"); QPDFObjectHandle result = QPDFObjectHandle::newStream(qpdf); QPDFObjectHandle newdict = result.getDict(); newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); @@ -917,11 +916,9 @@ QPDFPageObjectHelper::placeFormXObject( void QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) { - QPDF* qpdf = this->oh.getOwningQPDF(); - if (!qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::flattenRotation" - " called with a direct object"); - } + QPDF* qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::flattenRotation called with a direct object"); auto rotate_oh = this->oh.getKey("/Rotate"); int rotate = 0; @@ -1066,16 +1063,12 @@ QPDFPageObjectHelper::copyAnnotations( return; } - QPDF* from_qpdf = from_page.getObjectHandle().getOwningQPDF(); - if (!from_qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::copyAnnotations:" - " from page is a direct object"); - } - QPDF* this_qpdf = this->oh.getOwningQPDF(); - if (!this_qpdf) { - throw std::runtime_error("QPDFPageObjectHelper::copyAnnotations:" - " this page is a direct object"); - } + QPDF* from_qpdf = from_page.getObjectHandle().getOwningQPDF( + false, + "QPDFPageObjectHelper::copyAnnotations: from page is a direct object"); + QPDF* this_qpdf = this->oh.getOwningQPDF( + false, + "QPDFPageObjectHelper::copyAnnotations: this page is a direct object"); std::vector<QPDFObjectHandle> new_annots; std::vector<QPDFObjectHandle> new_fields; diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc new file mode 100644 index 00000000..9aa1f426 --- /dev/null +++ b/libqpdf/QPDFParser.cc @@ -0,0 +1,453 @@ +#include <qpdf/QPDFParser.hh> + +#include <qpdf/QPDF.hh> +#include <qpdf/QPDFObjGen.hh> +#include <qpdf/QPDFObjectHandle.hh> +#include <qpdf/QTC.hh> +#include <qpdf/QUtil.hh> + +namespace +{ + struct StackFrame + { + StackFrame(std::shared_ptr<InputSource> input) : + offset(input->tell()), + contents_string(""), + contents_offset(-1) + { + } + + std::vector<QPDFObjectHandle> olist; + qpdf_offset_t offset; + std::string contents_string; + qpdf_offset_t contents_offset; + }; +} // namespace + +QPDFObjectHandle +QPDFParser::parse(bool& empty, bool content_stream) +{ + // This method must take care not to resolve any objects. Don't + // check the type of any object without first ensuring that it is + // a direct object. Otherwise, doing so may have the side effect + // of reading the object and changing the file pointer. If you do + // this, it will cause a logic error to be thrown from + // QPDF::inParse(). + + QPDF::ParseGuard pg(context); + + empty = false; + + QPDFObjectHandle object; + bool set_offset = false; + + std::vector<StackFrame> stack; + stack.push_back(StackFrame(input)); + std::vector<parser_state_e> state_stack; + state_stack.push_back(st_top); + qpdf_offset_t offset; + bool done = false; + int bad_count = 0; + int good_count = 0; + bool b_contents = false; + bool is_null = false; + auto null_oh = QPDFObjectHandle::newNull(); + + while (!done) { + bool bad = false; + bool indirect_ref = false; + is_null = false; + auto& frame = stack.back(); + auto& olist = frame.olist; + parser_state_e state = state_stack.back(); + offset = frame.offset; + + object = QPDFObjectHandle(); + set_offset = false; + + QPDFTokenizer::Token token = + tokenizer.readToken(input, object_description, true); + std::string const& token_error_message = token.getErrorMessage(); + if (!token_error_message.empty()) { + // Tokens other than tt_bad can still generate warnings. + warn(token_error_message); + } + + switch (token.getType()) { + case QPDFTokenizer::tt_eof: + if (!content_stream) { + QTC::TC("qpdf", "QPDFParser eof in parse"); + warn("unexpected EOF"); + } + bad = true; + state = st_eof; + break; + + case QPDFTokenizer::tt_bad: + QTC::TC("qpdf", "QPDFParser bad token in parse"); + bad = true; + is_null = true; + break; + + case QPDFTokenizer::tt_brace_open: + case QPDFTokenizer::tt_brace_close: + QTC::TC("qpdf", "QPDFParser bad brace"); + warn("treating unexpected brace token as null"); + bad = true; + is_null = true; + break; + + case QPDFTokenizer::tt_array_close: + if (state == st_array) { + state = st_stop; + } else { + QTC::TC("qpdf", "QPDFParser bad array close"); + warn("treating unexpected array close token as null"); + bad = true; + is_null = true; + } + break; + + case QPDFTokenizer::tt_dict_close: + if (state == st_dictionary) { + state = st_stop; + } else { + QTC::TC("qpdf", "QPDFParser bad dictionary close"); + warn("unexpected dictionary close token"); + bad = true; + is_null = true; + } + break; + + case QPDFTokenizer::tt_array_open: + case QPDFTokenizer::tt_dict_open: + if (stack.size() > 500) { + QTC::TC("qpdf", "QPDFParser too deep"); + warn("ignoring excessively deeply nested data structure"); + bad = true; + is_null = true; + state = st_top; + } else { + state = st_start; + state_stack.push_back( + (token.getType() == QPDFTokenizer::tt_array_open) + ? st_array + : st_dictionary); + b_contents = false; + stack.push_back(StackFrame(input)); + } + break; + + case QPDFTokenizer::tt_bool: + object = QPDFObjectHandle::newBool((token.getValue() == "true")); + break; + + case QPDFTokenizer::tt_null: + is_null = true; + break; + + case QPDFTokenizer::tt_integer: + object = QPDFObjectHandle::newInteger( + QUtil::string_to_ll(token.getValue().c_str())); + break; + + case QPDFTokenizer::tt_real: + object = QPDFObjectHandle::newReal(token.getValue()); + break; + + case QPDFTokenizer::tt_name: + { + std::string name = token.getValue(); + object = QPDFObjectHandle::newName(name); + + if (name == "/Contents") { + b_contents = true; + } else { + b_contents = false; + } + } + break; + + case QPDFTokenizer::tt_word: + { + std::string const& value = token.getValue(); + auto size = olist.size(); + if (content_stream) { + object = QPDFObjectHandle::newOperator(value); + } else if ( + (value == "R") && (state != st_top) && (size >= 2) && + (!olist.back().isIndirect()) && + (olist.back().isInteger()) && + (!olist.at(size - 2).isIndirect()) && + (olist.at(size - 2).isInteger())) { + if (context == nullptr) { + QTC::TC("qpdf", "QPDFParser indirect without context"); + throw std::logic_error( + "QPDFObjectHandle::parse called without context" + " on an object with indirect references"); + } + auto ref_og = QPDFObjGen( + olist.at(size - 2).getIntValueAsInt(), + olist.back().getIntValueAsInt()); + if (ref_og.isIndirect()) { + object = context->getObject(ref_og); + indirect_ref = true; + } else { + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); + is_null = true; + } + olist.pop_back(); + olist.pop_back(); + } else if ((value == "endobj") && (state == st_top)) { + // We just saw endobj without having read + // anything. Treat this as a null and do not move + // the input source's offset. + is_null = true; + input->seek(input->getLastOffset(), SEEK_SET); + empty = true; + } else { + QTC::TC("qpdf", "QPDFParser treat word as string"); + warn("unknown token while reading object;" + " treating as string"); + bad = true; + object = QPDFObjectHandle::newString(value); + } + } + break; + + case QPDFTokenizer::tt_string: + { + std::string val = token.getValue(); + if (decrypter) { + if (b_contents) { + frame.contents_string = val; + frame.contents_offset = input->getLastOffset(); + b_contents = false; + } + decrypter->decryptString(val); + } + object = QPDFObjectHandle::newString(val); + } + + break; + + default: + warn("treating unknown token type as null while " + "reading object"); + bad = true; + is_null = true; + break; + } + + if (!object.isInitialized() && !is_null && + (!((state == st_start) || (state == st_stop) || + (state == st_eof)))) { + throw std::logic_error("QPDFObjectHandle::parseInternal: " + "unexpected uninitialized object"); + is_null = true; + } + + if (bad) { + ++bad_count; + good_count = 0; + } else { + ++good_count; + if (good_count > 3) { + bad_count = 0; + } + } + if (bad_count > 5) { + // We had too many consecutive errors without enough + // intervening successful objects. Give up. + warn("too many errors; giving up on reading object"); + state = st_top; + is_null = true; + } + + switch (state) { + case st_eof: + if (state_stack.size() > 1) { + warn("parse error while reading object"); + } + done = true; + // In content stream mode, leave object uninitialized to + // indicate EOF + if (!content_stream) { + is_null = true; + } + break; + + case st_dictionary: + case st_array: + if (!indirect_ref && !object.isDirectNull()) { + // No need to set description for direct nulls - they will + // become implicit. + setDescriptionFromInput(object, input->getLastOffset()); + object.setParsedOffset(input->getLastOffset()); + } + set_offset = true; + olist.push_back(is_null ? null_oh : object); + break; + + case st_top: + done = true; + break; + + case st_start: + break; + + case st_stop: + if ((state_stack.size() < 2) || (stack.size() < 2)) { + throw std::logic_error( + "QPDFObjectHandle::parseInternal: st_stop encountered" + " with insufficient elements in stack"); + } + parser_state_e old_state = state_stack.back(); + state_stack.pop_back(); + if (old_state == st_array) { + object = QPDFObjectHandle::newArray(olist); + setDescriptionFromInput(object, offset); + // The `offset` points to the next of "[". Set the rewind + // offset to point to the beginning of "[". This has been + // explicitly tested with whitespace surrounding the array start + // delimiter. getLastOffset points to the array end token and + // therefore can't be used here. + object.setParsedOffset(offset - 1); + set_offset = true; + } else if (old_state == st_dictionary) { + // Convert list to map. Alternating elements are keys. Attempt + // to recover more or less gracefully from invalid dictionaries. + std::set<std::string> names; + size_t n_elements = olist.size(); + for (size_t i = 0; i < n_elements; ++i) { + QPDFObjectHandle oh = olist.at(i); + if ((!oh.isIndirect()) && oh.isName()) { + names.insert(oh.getName()); + } + } + + std::map<std::string, QPDFObjectHandle> dict; + int next_fake_key = 1; + for (unsigned int i = 0; i < n_elements; ++i) { + QPDFObjectHandle key_obj = olist.at(i); + QPDFObjectHandle val; + if (key_obj.isIndirect() || (!key_obj.isName())) { + bool found_fake = false; + std::string candidate; + while (!found_fake) { + candidate = "/QPDFFake" + + QUtil::int_to_string(next_fake_key++); + found_fake = (names.count(candidate) == 0); + QTC::TC( + "qpdf", + "QPDFParser found fake", + (found_fake ? 0 : 1)); + } + warn( + offset, + "expected dictionary key but found" + " non-name object; inserting key " + + candidate); + val = key_obj; + key_obj = QPDFObjectHandle::newName(candidate); + } else if (i + 1 >= olist.size()) { + QTC::TC("qpdf", "QPDFParser no val for last key"); + warn( + offset, + "dictionary ended prematurely; " + "using null as value for last key"); + val = QPDFObjectHandle::newNull(); + setDescriptionFromInput(val, offset); + } else { + val = olist.at(++i); + } + std::string key = key_obj.getName(); + if (dict.count(key) > 0) { + QTC::TC("qpdf", "QPDFParser duplicate dict key"); + warn( + offset, + "dictionary has duplicated key " + key + + "; last occurrence overrides earlier " + "ones"); + } + dict[key] = val; + } + if (!frame.contents_string.empty() && dict.count("/Type") && + dict["/Type"].isNameAndEquals("/Sig") && + dict.count("/ByteRange") && dict.count("/Contents") && + dict["/Contents"].isString()) { + dict["/Contents"] = + QPDFObjectHandle::newString(frame.contents_string); + dict["/Contents"].setParsedOffset(frame.contents_offset); + } + object = QPDFObjectHandle::newDictionary(dict); + setDescriptionFromInput(object, offset); + // The `offset` points to the next of "<<". Set the rewind + // offset to point to the beginning of "<<". This has been + // explicitly tested with whitespace surrounding the dictionary + // start delimiter. getLastOffset points to the dictionary end + // token and therefore can't be used here. + object.setParsedOffset(offset - 2); + set_offset = true; + } + stack.pop_back(); + if (state_stack.back() == st_top) { + done = true; + } else { + stack.back().olist.push_back(is_null ? null_oh : object); + } + } + } + + if (is_null) { + object = QPDFObjectHandle::newNull(); + } + if (!set_offset) { + setDescriptionFromInput(object, offset); + object.setParsedOffset(offset); + } + return object; +} + +void +QPDFParser::setDescriptionFromInput( + QPDFObjectHandle oh, qpdf_offset_t offset) const +{ + oh.setObjectDescription( + context, + (input->getName() + ", " + object_description + " at offset " + + QUtil::int_to_string(offset))); +} + +void +QPDFParser::warn(QPDF* qpdf, QPDFExc const& e) +{ + // If parsing on behalf of a QPDF object and want to give a + // warning, we can warn through the object. If parsing for some + // other reason, such as an explicit creation of an object from a + // string, then just throw the exception. + if (qpdf) { + qpdf->warn(e); + } else { + throw e; + } +} + +void +QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const +{ + warn( + context, + QPDFExc( + qpdf_e_damaged_pdf, + input->getName(), + object_description, + offset, + msg)); +} + +void +QPDFParser::warn(std::string const& msg) const +{ + warn(input->getLastOffset(), msg); +} diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 1726e1b9..cd8f932d 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -73,28 +73,20 @@ QPDFWordTokenFinder::check() return true; } -QPDFTokenizer::Members::Members() : - allow_eof(false), - include_ignorable(false) -{ - reset(); -} - void -QPDFTokenizer::Members::reset() +QPDFTokenizer::reset() { - state = st_top; + state = st_before_token; type = tt_bad; - val = ""; - raw_val = ""; + val.clear(); + raw_val.clear(); error_message = ""; - unread_char = false; + before_token = true; + in_token = false; char_to_unread = '\0'; inline_image_bytes = 0; string_depth = 0; - string_ignoring_newline = false; - last_char_was_bs = false; - last_char_was_cr = false; + bad = false; } QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : @@ -110,20 +102,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : } QPDFTokenizer::QPDFTokenizer() : - m(new Members()) + allow_eof(false), + include_ignorable(false) { + reset(); } void QPDFTokenizer::allowEOF() { - this->m->allow_eof = true; + this->allow_eof = true; } void QPDFTokenizer::includeIgnorable() { - this->m->include_ignorable = true; + this->include_ignorable = true; } bool @@ -139,376 +133,719 @@ QPDFTokenizer::isDelimiter(char ch) } void -QPDFTokenizer::resolveLiteral() -{ - if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) { - this->m->type = tt_name; - // Deal with # in name token. Note: '/' by itself is a - // valid name, so don't strip leading /. That way we - // don't have to deal with the empty string as a name. - std::string nval = "/"; - size_t len = this->m->val.length(); - for (size_t i = 1; i < len; ++i) { - char ch = this->m->val.at(i); - if (ch == '#') { - if ((i + 2 < len) && - QUtil::is_hex_digit(this->m->val.at(i + 1)) && - QUtil::is_hex_digit(this->m->val.at(i + 2))) { - char num[3]; - num[0] = this->m->val.at(i + 1); - num[1] = this->m->val.at(i + 2); - num[2] = '\0'; - char ch2 = static_cast<char>(strtol(num, nullptr, 16)); - if (ch2 == '\0') { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer null in name"); - this->m->error_message = - "null character not allowed in name token"; - nval += "#00"; - } else { - nval.append(1, ch2); - } - i += 2; - } else { - QTC::TC("qpdf", "QPDFTokenizer bad name"); - this->m->error_message = - "name with stray # will not work with PDF >= 1.2"; - // Use null to encode a bad # -- this is reversed - // in QPDF_Name::normalizeName. - nval += '\0'; - } - } else { - nval.append(1, ch); - } - } - this->m->val = nval; - } else if (QUtil::is_number(this->m->val.c_str())) { - if (this->m->val.find('.') != std::string::npos) { - this->m->type = tt_real; - } else { - this->m->type = tt_integer; - } - } else if ((this->m->val == "true") || (this->m->val == "false")) { - this->m->type = tt_bool; - } else if (this->m->val == "null") { - this->m->type = tt_null; - } else { - // I don't really know what it is, so leave it as tt_word. - // Lots of cases ($, #, etc.) other than actual words fall - // into this category, but that's okay at least for now. - this->m->type = tt_word; +QPDFTokenizer::presentCharacter(char ch) +{ + handleCharacter(ch); + + if (this->in_token) { + this->raw_val += ch; } } void -QPDFTokenizer::presentCharacter(char ch) +QPDFTokenizer::handleCharacter(char ch) { - if (this->m->state == st_token_ready) { + // State machine is implemented such that the final character may not be + // handled. This happens whenever you have to use a character from the + // next token to detect the end of the current token. + + switch (this->state) { + case st_top: + inTop(ch); + return; + + case st_in_space: + inSpace(ch); + return; + + case st_in_comment: + inComment(ch); + return; + + case st_lt: + inLt(ch); + return; + + case st_gt: + inGt(ch); + return; + + case st_in_string: + inString(ch); + return; + + case st_name: + inName(ch); + return; + + case st_number: + inNumber(ch); + return; + + case st_real: + inReal(ch); + return; + + case st_string_after_cr: + inStringAfterCR(ch); + return; + + case st_string_escape: + inStringEscape(ch); + return; + + case st_char_code: + inCharCode(ch); + return; + + case st_literal: + inLiteral(ch); + return; + + case st_inline_image: + inInlineImage(ch); + return; + + case st_in_hexstring: + inHexstring(ch); + return; + + case st_in_hexstring_2nd: + inHexstring2nd(ch); + return; + + case st_name_hex1: + inNameHex1(ch); + return; + + case st_name_hex2: + inNameHex2(ch); + return; + + case st_sign: + inSign(ch); + return; + + case st_decimal: + inDecimal(ch); + return; + + case (st_before_token): + inBeforeToken(ch); + return; + + case (st_token_ready): + inTokenReady(ch); + return; + + default: throw std::logic_error( - "INTERNAL ERROR: QPDF tokenizer presented character " - "while token is waiting"); + "INTERNAL ERROR: invalid state while reading token"); } +} - char orig_ch = ch; - - // State machine is implemented such that some characters may be - // handled more than once. This happens whenever you have to use - // the character that caused a state change in the new state. +void +QPDFTokenizer::inTokenReady(char ch) +{ + throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " + "while token is waiting"); +} - bool handled = true; - if (this->m->state == st_top) { - // Note: we specifically do not use ctype here. It is - // locale-dependent. - if (isSpace(ch)) { - if (this->m->include_ignorable) { - this->m->state = st_in_space; - this->m->val += ch; - } - } else if (ch == '%') { - this->m->state = st_in_comment; - if (this->m->include_ignorable) { - this->m->val += ch; - } - } else if (ch == '(') { - this->m->string_depth = 1; - this->m->string_ignoring_newline = false; - memset( - this->m->bs_num_register, - '\0', - sizeof(this->m->bs_num_register)); - this->m->last_char_was_bs = false; - this->m->last_char_was_cr = false; - this->m->state = st_in_string; - } else if (ch == '<') { - this->m->state = st_lt; - } else if (ch == '>') { - this->m->state = st_gt; - } else { - this->m->val += ch; - if (ch == ')') { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad )"); - this->m->error_message = "unexpected )"; - this->m->state = st_token_ready; - } else if (ch == '[') { - this->m->type = tt_array_open; - this->m->state = st_token_ready; - } else if (ch == ']') { - this->m->type = tt_array_close; - this->m->state = st_token_ready; - } else if (ch == '{') { - this->m->type = tt_brace_open; - this->m->state = st_token_ready; - } else if (ch == '}') { - this->m->type = tt_brace_close; - this->m->state = st_token_ready; - } else { - this->m->state = st_literal; - } - } - } else if (this->m->state == st_in_space) { - // We only enter this state if include_ignorable is true. - if (!isSpace(ch)) { - this->m->type = tt_space; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; - } else { - this->m->val += ch; - } - } else if (this->m->state == st_in_comment) { - if ((ch == '\r') || (ch == '\n')) { - if (this->m->include_ignorable) { - this->m->type = tt_comment; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; - } else { - this->m->state = st_top; - } - } else if (this->m->include_ignorable) { - this->m->val += ch; - } - } else if (this->m->state == st_lt) { - if (ch == '<') { - this->m->val = "<<"; - this->m->type = tt_dict_open; - this->m->state = st_token_ready; - } else { - handled = false; - this->m->state = st_in_hexstring; - } - } else if (this->m->state == st_gt) { - if (ch == '>') { - this->m->val = ">>"; - this->m->type = tt_dict_close; - this->m->state = st_token_ready; - } else { - this->m->val = ">"; - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad >"); - this->m->error_message = "unexpected >"; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; +void +QPDFTokenizer::inBeforeToken(char ch) +{ + // Note: we specifically do not use ctype here. It is + // locale-dependent. + if (isSpace(ch)) { + this->before_token = !this->include_ignorable; + this->in_token = this->include_ignorable; + if (this->include_ignorable) { + this->state = st_in_space; + this->val += ch; } - } else if (this->m->state == st_in_string) { - if (this->m->string_ignoring_newline && (ch != '\n')) { - this->m->string_ignoring_newline = false; + } else if (ch == '%') { + this->before_token = !this->include_ignorable; + this->in_token = this->include_ignorable; + this->state = st_in_comment; + if (this->include_ignorable) { + this->val += ch; } + } else { + this->before_token = false; + this->in_token = true; + inTop(ch); + } +} - size_t bs_num_count = strlen(this->m->bs_num_register); - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { - // We've accumulated \ddd. PDF Spec says to ignore - // high-order overflow. - this->m->val += - static_cast<char>(strtol(this->m->bs_num_register, nullptr, 8)); - memset( - this->m->bs_num_register, - '\0', - sizeof(this->m->bs_num_register)); - bs_num_count = 0; - } +void +QPDFTokenizer::inTop(char ch) +{ + switch (ch) { + case '(': + this->string_depth = 1; + this->state = st_in_string; + return; - if (this->m->string_ignoring_newline && (ch == '\n')) { - // ignore - this->m->string_ignoring_newline = false; - } else if ( - ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) { - this->m->bs_num_register[bs_num_count++] = ch; - } else if (this->m->last_char_was_bs) { - switch (ch) { - case 'n': - this->m->val += '\n'; - break; + case '<': + this->state = st_lt; + return; - case 'r': - this->m->val += '\r'; - break; + case '>': + this->state = st_gt; + return; - case 't': - this->m->val += '\t'; - break; + case (')'): + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad )"); + this->error_message = "unexpected )"; + this->val += ch; + this->state = st_token_ready; + return; - case 'b': - this->m->val += '\b'; - break; + case '[': + this->type = tt_array_open; + this->state = st_token_ready; + this->val += ch; + return; - case 'f': - this->m->val += '\f'; - break; + case ']': + this->type = tt_array_close; + this->val += ch; + this->state = st_token_ready; + return; - case '\n': - break; + case '{': + this->type = tt_brace_open; + this->state = st_token_ready; + this->val += ch; + return; - case '\r': - this->m->string_ignoring_newline = true; - break; + case '}': + this->type = tt_brace_close; + this->state = st_token_ready; + this->val += ch; + return; - default: - // PDF spec says backslash is ignored before anything else - this->m->val += ch; - break; - } - } else if (ch == '\\') { - // last_char_was_bs is set/cleared below as appropriate - if (bs_num_count) { - throw std::logic_error( - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " - "when ch == '\\'"); - } - } else if (ch == '(') { - this->m->val += ch; - ++this->m->string_depth; - } else if ((ch == ')') && (--this->m->string_depth == 0)) { - this->m->type = tt_string; - this->m->state = st_token_ready; - } else if (ch == '\r') { - // CR by itself is converted to LF - this->m->val += '\n'; - } else if (ch == '\n') { - // CR LF is converted to LF - if (!this->m->last_char_was_cr) { - this->m->val += ch; - } - } else { - this->m->val += ch; - } + case '/': + this->state = st_name; + this->val += ch; + return; - this->m->last_char_was_cr = - ((!this->m->string_ignoring_newline) && (ch == '\r')); - this->m->last_char_was_bs = - ((!this->m->last_char_was_bs) && (ch == '\\')); - } else if (this->m->state == st_literal) { - if (isDelimiter(ch)) { - // A C-locale whitespace character or delimiter terminates - // token. It is important to unread the whitespace - // character even though it is ignored since it may be the - // newline after a stream keyword. Removing it here could - // make the stream-reading code break on some files, - // though not on any files in the test suite as of this - // writing. - - this->m->type = tt_word; - this->m->unread_char = true; - this->m->char_to_unread = ch; - this->m->state = st_token_ready; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this->state = st_number; + this->val += ch; + return; + + case '+': + case '-': + this->state = st_sign; + this->val += ch; + return; + + case '.': + this->state = st_decimal; + this->val += ch; + return; + + default: + this->state = st_literal; + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inSpace(char ch) +{ + // We only enter this state if include_ignorable is true. + if (!isSpace(ch)) { + this->type = tt_space; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + return; + } else { + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inComment(char ch) +{ + if ((ch == '\r') || (ch == '\n')) { + if (this->include_ignorable) { + this->type = tt_comment; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; } else { - this->m->val += ch; + this->state = st_before_token; } - } else if (this->m->state == st_inline_image) { - this->m->val += ch; - size_t len = this->m->val.length(); - if (len == this->m->inline_image_bytes) { - QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); - this->m->type = tt_inline_image; - this->m->inline_image_bytes = 0; - this->m->state = st_token_ready; + } else if (this->include_ignorable) { + this->val += ch; + } +} + +void +QPDFTokenizer::inString(char ch) +{ + switch (ch) { + case '\\': + this->state = st_string_escape; + return; + + case '(': + this->val += ch; + ++this->string_depth; + return; + + case ')': + if (--this->string_depth == 0) { + this->type = tt_string; + this->state = st_token_ready; + return; } + + this->val += ch; + return; + + case '\r': + // CR by itself is converted to LF + this->val += '\n'; + this->state = st_string_after_cr; + return; + + case '\n': + this->val += ch; + return; + + default: + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inName(char ch) +{ + if (isDelimiter(ch)) { + // A C-locale whitespace character or delimiter terminates + // token. It is important to unread the whitespace + // character even though it is ignored since it may be the + // newline after a stream keyword. Removing it here could + // make the stream-reading code break on some files, + // though not on any files in the test suite as of this + // writing. + + this->type = this->bad ? tt_bad : tt_name; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + } else if (ch == '#') { + this->char_code = 0; + this->state = st_name_hex1; } else { - handled = false; - } - - if (handled) { - // okay - } else if (this->m->state == st_in_hexstring) { - if (ch == '>') { - this->m->type = tt_string; - this->m->state = st_token_ready; - if (this->m->val.length() % 2) { - // PDF spec says odd hexstrings have implicit - // trailing 0. - this->m->val += '0'; - } - char num[3]; - num[2] = '\0'; - std::string nval; - for (unsigned int i = 0; i < this->m->val.length(); i += 2) { - num[0] = this->m->val.at(i); - num[1] = this->m->val.at(i + 1); - char nch = static_cast<char>(strtol(num, nullptr, 16)); - nval += nch; - } - this->m->val = nval; - } else if (QUtil::is_hex_digit(ch)) { - this->m->val += ch; - } else if (isSpace(ch)) { - // ignore - } else { - this->m->type = tt_bad; - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); - this->m->error_message = - std::string("invalid character (") + ch + ") in hexstring"; - this->m->state = st_token_ready; - } + this->val += ch; + } +} + +void +QPDFTokenizer::inNameHex1(char ch) +{ + this->hex_char = ch; + + if ('0' <= ch && ch <= '9') { + this->char_code = 16 * (int(ch) - int('0')); + this->state = st_name_hex2; + + } else if ('A' <= ch && ch <= 'F') { + this->char_code = 16 * (10 + int(ch) - int('A')); + this->state = st_name_hex2; + + } else if ('a' <= ch && ch <= 'f') { + this->char_code = 16 * (10 + int(ch) - int('a')); + this->state = st_name_hex2; + } else { - throw std::logic_error( - "INTERNAL ERROR: invalid state while reading token"); + QTC::TC("qpdf", "QPDFTokenizer bad name 1"); + this->error_message = "name with stray # will not work with PDF >= 1.2"; + // Use null to encode a bad # -- this is reversed + // in QPDF_Name::normalizeName. + this->val += '\0'; + this->state = st_name; + inName(ch); + } +} + +void +QPDFTokenizer::inNameHex2(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->char_code += int(ch) - int('0'); + + } else if ('A' <= ch && ch <= 'F') { + this->char_code += 10 + int(ch) - int('A'); + + } else if ('a' <= ch && ch <= 'f') { + this->char_code += 10 + int(ch) - int('a'); + + } else { + QTC::TC("qpdf", "QPDFTokenizer bad name 2"); + this->error_message = "name with stray # will not work with PDF >= 1.2"; + // Use null to encode a bad # -- this is reversed + // in QPDF_Name::normalizeName. + this->val += '\0'; + this->val += this->hex_char; + this->state = st_name; + inName(ch); + return; + } + if (this->char_code == 0) { + QTC::TC("qpdf", "QPDFTokenizer null in name"); + this->error_message = "null character not allowed in name token"; + this->val += "#00"; + this->state = st_name; + this->bad = true; + } else { + this->val += char(this->char_code); + this->state = st_name; + } +} + +void +QPDFTokenizer::inSign(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_number; + this->val += ch; + } else if (ch == '.') { + this->state = st_decimal; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); } +} - if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) { - resolveLiteral(); +void +QPDFTokenizer::inDecimal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_real; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); } +} + +void +QPDFTokenizer::inNumber(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (ch == '.') { + this->state = st_real; + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_integer; + this->state = st_token_ready; + this->in_token = false; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} + +void +QPDFTokenizer::inReal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_real; + this->state = st_token_ready; + this->in_token = false; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} +void +QPDFTokenizer::inStringEscape(char ch) +{ + this->state = st_in_string; + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + this->state = st_char_code; + this->char_code = 0; + this->digit_count = 0; + inCharCode(ch); + return; + + case 'n': + this->val += '\n'; + return; + + case 'r': + this->val += '\r'; + return; + + case 't': + this->val += '\t'; + return; + + case 'b': + this->val += '\b'; + return; - if (!(betweenTokens() || - ((this->m->state == st_token_ready) && this->m->unread_char))) { - this->m->raw_val += orig_ch; + case 'f': + this->val += '\f'; + return; + + case '\n': + return; + + case '\r': + this->state = st_string_after_cr; + return; + + default: + // PDF spec says backslash is ignored before anything else + this->val += ch; + return; + } +} + +void +QPDFTokenizer::inStringAfterCR(char ch) +{ + this->state = st_in_string; + if (ch != '\n') { + inString(ch); + } +} + +void +QPDFTokenizer::inLt(char ch) +{ + if (ch == '<') { + this->val += "<<"; + this->type = tt_dict_open; + this->state = st_token_ready; + return; + } + + this->state = st_in_hexstring; + inHexstring(ch); +} + +void +QPDFTokenizer::inGt(char ch) +{ + if (ch == '>') { + this->val += ">>"; + this->type = tt_dict_close; + this->state = st_token_ready; + } else { + this->val += ">"; + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad >"); + this->error_message = "unexpected >"; + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inLiteral(char ch) +{ + if (isDelimiter(ch)) { + // A C-locale whitespace character or delimiter terminates + // token. It is important to unread the whitespace + // character even though it is ignored since it may be the + // newline after a stream keyword. Removing it here could + // make the stream-reading code break on some files, + // though not on any files in the test suite as of this + // writing. + + this->in_token = false; + this->char_to_unread = ch; + this->state = st_token_ready; + this->type = (this->val == "true") || (this->val == "false") + ? tt_bool + : (this->val == "null" ? tt_null : tt_word); + } else { + this->val += ch; + } +} + +void +QPDFTokenizer::inHexstring(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->char_code = 16 * (int(ch) - int('0')); + this->state = st_in_hexstring_2nd; + + } else if ('A' <= ch && ch <= 'F') { + this->char_code = 16 * (10 + int(ch) - int('A')); + this->state = st_in_hexstring_2nd; + + } else if ('a' <= ch && ch <= 'f') { + this->char_code = 16 * (10 + int(ch) - int('a')); + this->state = st_in_hexstring_2nd; + + } else if (ch == '>') { + this->type = tt_string; + this->state = st_token_ready; + + } else if (isSpace(ch)) { + // ignore + + } else { + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); + this->error_message = + std::string("invalid character (") + ch + ") in hexstring"; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inHexstring2nd(char ch) +{ + if ('0' <= ch && ch <= '9') { + this->val += char(this->char_code + int(ch) - int('0')); + this->state = st_in_hexstring; + + } else if ('A' <= ch && ch <= 'F') { + this->val += char(this->char_code + 10 + int(ch) - int('A')); + this->state = st_in_hexstring; + + } else if ('a' <= ch && ch <= 'f') { + this->val += char(this->char_code + 10 + int(ch) - int('a')); + this->state = st_in_hexstring; + + } else if (ch == '>') { + // PDF spec says odd hexstrings have implicit trailing 0. + this->val += char(this->char_code); + this->type = tt_string; + this->state = st_token_ready; + + } else if (isSpace(ch)) { + // ignore + + } else { + this->type = tt_bad; + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); + this->error_message = + std::string("invalid character (") + ch + ") in hexstring"; + this->state = st_token_ready; + } +} + +void +QPDFTokenizer::inCharCode(char ch) +{ + if (('0' <= ch) && (ch <= '7')) { + this->char_code = 8 * this->char_code + (int(ch) - int('0')); + if (++(this->digit_count) < 3) { + return; + } + // We've accumulated \ddd. PDF Spec says to ignore + // high-order overflow. + } + this->val += char(this->char_code % 256); + this->state = st_in_string; + return; +} + +void +QPDFTokenizer::inInlineImage(char ch) +{ + this->val += ch; + if (this->val.length() == this->inline_image_bytes) { + QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); + this->type = tt_inline_image; + this->inline_image_bytes = 0; + this->state = st_token_ready; } } void QPDFTokenizer::presentEOF() { - if (this->m->state == st_literal) { + switch (this->state) { + case st_name: + case st_name_hex1: + case st_name_hex2: + case st_number: + case st_real: + case st_sign: + case st_decimal: + case st_literal: QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); - resolveLiteral(); - } else if ( - (this->m->include_ignorable) && (this->m->state == st_in_space)) { - this->m->type = tt_space; - } else if ( - (this->m->include_ignorable) && (this->m->state == st_in_comment)) { - this->m->type = tt_comment; - } else if (betweenTokens()) { - this->m->type = tt_eof; - } else if (this->m->state != st_token_ready) { + // Push any delimiter to the state machine to finish off the final + // token. + presentCharacter('\f'); + this->in_token = true; + break; + + case st_top: + case st_before_token: + this->type = tt_eof; + break; + + case st_in_space: + this->type = this->include_ignorable ? tt_space : tt_eof; + break; + + case st_in_comment: + this->type = this->include_ignorable ? tt_comment : tt_bad; + break; + + case st_token_ready: + break; + + default: QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); - this->m->type = tt_bad; - this->m->error_message = "EOF while reading token"; + this->type = tt_bad; + this->error_message = "EOF while reading token"; } - - this->m->state = st_token_ready; + this->state = st_token_ready; } void QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) { - if (this->m->state != st_top) { + if (this->state != st_before_token) { throw std::logic_error("QPDFTokenizer::expectInlineImage called" " when tokenizer is in improper state"); } findEI(input); - this->m->state = st_inline_image; + this->before_token = false; + this->in_token = true; + this->state = st_inline_image; } void @@ -537,7 +874,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) if (!input->findFirst("EI", input->tell(), 0, f)) { break; } - this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); + this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); QPDFTokenizer check; bool found_bad = false; @@ -610,19 +947,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) bool QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) { - bool ready = (this->m->state == st_token_ready); - unread_char = this->m->unread_char; - ch = this->m->char_to_unread; + bool ready = (this->state == st_token_ready); + unread_char = !this->in_token && !this->before_token; + ch = this->char_to_unread; if (ready) { - if (this->m->type == tt_bad) { - this->m->val = this->m->raw_val; - } - token = Token( - this->m->type, - this->m->val, - this->m->raw_val, - this->m->error_message); - this->m->reset(); + token = (this->type == tt_bad) + ? Token( + this->type, this->raw_val, this->raw_val, this->error_message) + : Token(this->type, this->val, this->raw_val, this->error_message); + + this->reset(); } return ready; } @@ -630,11 +964,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) bool QPDFTokenizer::betweenTokens() { - return ( - (this->m->state == st_top) || - ((!this->m->include_ignorable) && - ((this->m->state == st_in_comment) || - (this->m->state == st_in_space)))); + return this->before_token; } QPDFTokenizer::Token @@ -644,49 +974,46 @@ QPDFTokenizer::readToken( bool allow_bad, size_t max_len) { - qpdf_offset_t offset = input->tell(); - Token token; - bool unread_char; - char char_to_unread; - bool presented_eof = false; - while (!getToken(token, unread_char, char_to_unread)) { + qpdf_offset_t offset = input->fastTell(); + + while (this->state != st_token_ready) { char ch; - if (input->read(&ch, 1) == 0) { - if (!presented_eof) { - presentEOF(); - presented_eof = true; - if ((this->m->type == tt_eof) && (!this->m->allow_eof)) { - // Nothing in the qpdf library calls readToken - // without allowEOF anymore, so this case is not - // exercised. - this->m->type = tt_bad; - this->m->error_message = "unexpected EOF"; - offset = input->getLastOffset(); - } - } else { - throw std::logic_error( - "getToken returned false after presenting EOF"); + if (!input->fastRead(ch)) { + presentEOF(); + + if ((this->type == tt_eof) && (!this->allow_eof)) { + // Nothing in the qpdf library calls readToken + // without allowEOF anymore, so this case is not + // exercised. + this->type = tt_bad; + this->error_message = "unexpected EOF"; + offset = input->getLastOffset(); } } else { - presentCharacter(ch); - if (betweenTokens() && (input->getLastOffset() == offset)) { + handleCharacter(ch); + if (this->before_token) { ++offset; } - if (max_len && (this->m->raw_val.length() >= max_len) && - (this->m->state != st_token_ready)) { + if (this->in_token) { + this->raw_val += ch; + } + if (max_len && (this->raw_val.length() >= max_len) && + (this->state != st_token_ready)) { // terminate this token now QTC::TC("qpdf", "QPDFTokenizer block long token"); - this->m->type = tt_bad; - this->m->state = st_token_ready; - this->m->error_message = + this->type = tt_bad; + this->state = st_token_ready; + this->error_message = "exceeded allowable length while reading token"; } } } - if (unread_char) { - input->unreadCh(char_to_unread); - } + Token token; + bool unread_char; + char char_to_unread; + getToken(token, unread_char, char_to_unread); + input->fastUnread(unread_char); if (token.getType() != tt_eof) { input->setLastOffset(offset); diff --git a/libqpdf/QPDFValue.cc b/libqpdf/QPDFValue.cc new file mode 100644 index 00000000..8a6222d2 --- /dev/null +++ b/libqpdf/QPDFValue.cc @@ -0,0 +1,11 @@ +#include <qpdf/QPDFValue.hh> + +#include <qpdf/QPDFObject.hh> + +std::shared_ptr<QPDFObject> +QPDFValue::do_create(QPDFValue* object) +{ + std::shared_ptr<QPDFObject> obj(new QPDFObject()); + obj->value = std::shared_ptr<QPDFValue>(object); + return obj; +} diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index e33d0965..028f73dc 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1897,7 +1897,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) // pass 1. indicateProgress(true, false); } - QPDFObjectHandle obj_to_write = this->m->pdf.getObjectByObjGen(obj); + QPDFObjectHandle obj_to_write = this->m->pdf.getObject(obj); if (obj_to_write.isStream()) { // This condition occurred in a fuzz input. Ideally we // should block it at at parse time, but it's not diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc index 55e4d20a..63fe98d4 100644 --- a/libqpdf/QPDF_Array.cc +++ b/libqpdf/QPDF_Array.cc @@ -4,12 +4,14 @@ #include <qpdf/QUtil.hh> #include <stdexcept> -QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& v) +QPDF_Array::QPDF_Array(std::vector<QPDFObjectHandle> const& v) : + QPDFValue(::ot_array, "array") { setFromVector(v); } QPDF_Array::QPDF_Array(SparseOHArray const& items) : + QPDFValue(::ot_array, "array"), elements(items) { } @@ -62,18 +64,6 @@ QPDF_Array::getJSON(int json_version) return j; } -QPDFObject::object_type_e -QPDF_Array::getTypeCode() const -{ - return QPDFObject::ot_array; -} - -char const* -QPDF_Array::getTypeName() const -{ - return "array"; -} - int QPDF_Array::getNItems() const { diff --git a/libqpdf/QPDF_Bool.cc b/libqpdf/QPDF_Bool.cc index f26325c3..efbfd6c9 100644 --- a/libqpdf/QPDF_Bool.cc +++ b/libqpdf/QPDF_Bool.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_Bool.hh> QPDF_Bool::QPDF_Bool(bool val) : + QPDFValue(::ot_boolean, "boolean"), val(val) { } @@ -29,18 +30,6 @@ QPDF_Bool::getJSON(int json_version) return JSON::makeBool(this->val); } -QPDFObject::object_type_e -QPDF_Bool::getTypeCode() const -{ - return QPDFObject::ot_boolean; -} - -char const* -QPDF_Bool::getTypeName() const -{ - return "boolean"; -} - bool QPDF_Bool::getVal() const { diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc index 60b2339f..845bcad8 100644 --- a/libqpdf/QPDF_Dictionary.cc +++ b/libqpdf/QPDF_Dictionary.cc @@ -1,10 +1,10 @@ #include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Name.hh> -#include <qpdf/QPDF_Null.hh> QPDF_Dictionary::QPDF_Dictionary( std::map<std::string, QPDFObjectHandle> const& items) : + QPDFValue(::ot_dictionary, "dictionary"), items(items) { } @@ -58,18 +58,6 @@ QPDF_Dictionary::getJSON(int json_version) return j; } -QPDFObject::object_type_e -QPDF_Dictionary::getTypeCode() const -{ - return QPDFObject::ot_dictionary; -} - -char const* -QPDF_Dictionary::getTypeName() const -{ - return "dictionary"; -} - bool QPDF_Dictionary::hasKey(std::string const& key) { diff --git a/libqpdf/QPDF_InlineImage.cc b/libqpdf/QPDF_InlineImage.cc index c3c656e0..76318196 100644 --- a/libqpdf/QPDF_InlineImage.cc +++ b/libqpdf/QPDF_InlineImage.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_InlineImage.hh> QPDF_InlineImage::QPDF_InlineImage(std::string const& val) : + QPDFValue(::ot_inlineimage, "inline-image"), val(val) { } @@ -29,18 +30,6 @@ QPDF_InlineImage::getJSON(int json_version) return JSON::makeNull(); } -QPDFObject::object_type_e -QPDF_InlineImage::getTypeCode() const -{ - return QPDFObject::ot_inlineimage; -} - -char const* -QPDF_InlineImage::getTypeName() const -{ - return "inline-image"; -} - std::string QPDF_InlineImage::getVal() const { diff --git a/libqpdf/QPDF_Integer.cc b/libqpdf/QPDF_Integer.cc index e8d23e4a..24812573 100644 --- a/libqpdf/QPDF_Integer.cc +++ b/libqpdf/QPDF_Integer.cc @@ -3,6 +3,7 @@ #include <qpdf/QUtil.hh> QPDF_Integer::QPDF_Integer(long long val) : + QPDFValue(::ot_integer, "integer"), val(val) { } @@ -31,18 +32,6 @@ QPDF_Integer::getJSON(int json_version) return JSON::makeInt(this->val); } -QPDFObject::object_type_e -QPDF_Integer::getTypeCode() const -{ - return QPDFObject::ot_integer; -} - -char const* -QPDF_Integer::getTypeName() const -{ - return "integer"; -} - long long QPDF_Integer::getVal() const { diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc index 73990775..c86d34b4 100644 --- a/libqpdf/QPDF_Name.cc +++ b/libqpdf/QPDF_Name.cc @@ -5,6 +5,7 @@ #include <string.h> QPDF_Name::QPDF_Name(std::string const& name) : + QPDFValue(::ot_name, "name"), name(name) { } @@ -61,18 +62,6 @@ QPDF_Name::getJSON(int json_version) } } -QPDFObject::object_type_e -QPDF_Name::getTypeCode() const -{ - return QPDFObject::ot_name; -} - -char const* -QPDF_Name::getTypeName() const -{ - return "name"; -} - std::string QPDF_Name::getName() const { diff --git a/libqpdf/QPDF_Null.cc b/libqpdf/QPDF_Null.cc index b015ed8b..f60dda1f 100644 --- a/libqpdf/QPDF_Null.cc +++ b/libqpdf/QPDF_Null.cc @@ -1,5 +1,10 @@ #include <qpdf/QPDF_Null.hh> +QPDF_Null::QPDF_Null() : + QPDFValue(::ot_null, "null") +{ +} + std::shared_ptr<QPDFObject> QPDF_Null::create() { @@ -23,15 +28,3 @@ QPDF_Null::getJSON(int json_version) { return JSON::makeNull(); } - -QPDFObject::object_type_e -QPDF_Null::getTypeCode() const -{ - return QPDFObject::ot_null; -} - -char const* -QPDF_Null::getTypeName() const -{ - return "null"; -} diff --git a/libqpdf/QPDF_Operator.cc b/libqpdf/QPDF_Operator.cc index cd5009ae..547ff40a 100644 --- a/libqpdf/QPDF_Operator.cc +++ b/libqpdf/QPDF_Operator.cc @@ -1,6 +1,7 @@ #include <qpdf/QPDF_Operator.hh> QPDF_Operator::QPDF_Operator(std::string const& val) : + QPDFValue(::ot_operator, "operator"), val(val) { } @@ -20,7 +21,7 @@ QPDF_Operator::shallowCopy() std::string QPDF_Operator::unparse() { - return this->val; + return val; } JSON @@ -29,18 +30,6 @@ QPDF_Operator::getJSON(int json_version) return JSON::makeNull(); } -QPDFObject::object_type_e -QPDF_Operator::getTypeCode() const -{ - return QPDFObject::ot_operator; -} - -char const* -QPDF_Operator::getTypeName() const -{ - return "operator"; -} - std::string QPDF_Operator::getVal() const { diff --git a/libqpdf/QPDF_Real.cc b/libqpdf/QPDF_Real.cc index 138bbb3c..85c9ceeb 100644 --- a/libqpdf/QPDF_Real.cc +++ b/libqpdf/QPDF_Real.cc @@ -3,12 +3,14 @@ #include <qpdf/QUtil.hh> QPDF_Real::QPDF_Real(std::string const& val) : + QPDFValue(::ot_real, "real"), val(val) { } QPDF_Real::QPDF_Real( double value, int decimal_places, bool trim_trailing_zeroes) : + QPDFValue(::ot_real, "real"), val(QUtil::double_to_string(value, decimal_places, trim_trailing_zeroes)) { } @@ -60,18 +62,6 @@ QPDF_Real::getJSON(int json_version) return JSON::makeNumber(result); } -QPDFObject::object_type_e -QPDF_Real::getTypeCode() const -{ - return QPDFObject::ot_real; -} - -char const* -QPDF_Real::getTypeName() const -{ - return "real"; -} - std::string QPDF_Real::getVal() { diff --git a/libqpdf/QPDF_Reserved.cc b/libqpdf/QPDF_Reserved.cc index 5808a369..f5af4688 100644 --- a/libqpdf/QPDF_Reserved.cc +++ b/libqpdf/QPDF_Reserved.cc @@ -2,6 +2,11 @@ #include <stdexcept> +QPDF_Reserved::QPDF_Reserved() : + QPDFValue(::ot_reserved, "reserved") +{ +} + std::shared_ptr<QPDFObject> QPDF_Reserved::create() { @@ -17,25 +22,15 @@ QPDF_Reserved::shallowCopy() std::string QPDF_Reserved::unparse() { - throw std::logic_error("attempt to unparse QPDF_Reserved"); + throw std::logic_error( + "QPDFObjectHandle: attempting to unparse a reserved object"); return ""; } JSON QPDF_Reserved::getJSON(int json_version) { - throw std::logic_error("attempt to generate JSON from QPDF_Reserved"); + throw std::logic_error( + "QPDFObjectHandle: attempting to unparse a reserved object"); return JSON::makeNull(); } - -QPDFObject::object_type_e -QPDF_Reserved::getTypeCode() const -{ - return QPDFObject::ot_reserved; -} - -char const* -QPDF_Reserved::getTypeName() const -{ - return "reserved"; -} diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 1b7f9461..9932c15d 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -114,6 +114,7 @@ QPDF_Stream::QPDF_Stream( QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : + QPDFValue(::ot_stream, "stream"), qpdf(qpdf), og(og), filter_on_write(true), @@ -291,22 +292,10 @@ QPDF_Stream::getStreamJSON( return result; } -QPDFObject::object_type_e -QPDF_Stream::getTypeCode() const -{ - return QPDFObject::ot_stream; -} - -char const* -QPDF_Stream::getTypeName() const -{ - return "stream"; -} - void QPDF_Stream::setDescription(QPDF* qpdf, std::string const& description) { - this->QPDFObject::setDescription(qpdf, description); + this->QPDFValue::setDescription(qpdf, description); setDictDescription(); } diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index b038366b..c6cb6c41 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -21,6 +21,7 @@ is_iso_latin1_printable(char ch) } QPDF_String::QPDF_String(std::string const& val) : + QPDFValue(::ot_string, "string"), val(val) { } @@ -84,18 +85,6 @@ QPDF_String::getJSON(int json_version) return JSON::makeString(result); } -QPDFObject::object_type_e -QPDF_String::getTypeCode() const -{ - return QPDFObject::ot_string; -} - -char const* -QPDF_String::getTypeName() const -{ - return "string"; -} - bool QPDF_String::useHexString() const { diff --git a/libqpdf/QPDF_Unresolved.cc b/libqpdf/QPDF_Unresolved.cc new file mode 100644 index 00000000..f824a9a6 --- /dev/null +++ b/libqpdf/QPDF_Unresolved.cc @@ -0,0 +1,36 @@ +#include <qpdf/QPDF_Unresolved.hh> + +#include <stdexcept> + +QPDF_Unresolved::QPDF_Unresolved(QPDF* qpdf, QPDFObjGen const& og) : + QPDFValue(::ot_unresolved, "unresolved", qpdf, og) +{ +} + +std::shared_ptr<QPDFObject> +QPDF_Unresolved::create(QPDF* qpdf, QPDFObjGen const& og) +{ + return do_create(new QPDF_Unresolved(qpdf, og)); +} + +std::shared_ptr<QPDFObject> +QPDF_Unresolved::shallowCopy() +{ + throw std::logic_error( + "attempted to shallow copy unresolved QPDFObjectHandle"); + return create(qpdf, og); +} + +std::string +QPDF_Unresolved::unparse() +{ + throw std::logic_error( + "attempted to unparse an unresolved QPDFObjectHandle"); + return ""; +} + +JSON +QPDF_Unresolved::getJSON(int json_version) +{ + return JSON::makeNull(); +} diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index a3d50cfb..131e7dee 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -14,8 +14,11 @@ // | st_initial // { | -> st_top -// "qpdf-v2": { | -> st_qpdf -// "objects": { | -> st_objects +// "qpdf": [ | -> st_qpdf +// { | -> st_qpdf_meta +// ... | ... +// }, | ... +// { | -> st_objects // "obj:1 0 R": { | -> st_object_top // "value": { | -> st_object // "/Pages": "2 0 R", | ... @@ -41,7 +44,7 @@ // } | <- st_trailer // } | <- st_objects // } | <- st_qpdf -// } | <- st_top +// ] | <- st_top // } | <- st_initial static char const* JSON_PDF = ( @@ -394,7 +397,7 @@ QPDF::JSONReactor::replaceObject( auto og = to_replace.getObjGen(); this->reserved.erase(og); this->pdf.replaceObject(og, replacement); - auto oh = pdf.getObjectByObjGen(og); + auto oh = pdf.getObject(og); setObjectDescription(oh, value); } diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index f89ed188..e09f7b68 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -137,8 +137,7 @@ QPDF::isLinearized() return false; } - QPDFObjectHandle candidate = QPDFObjectHandle::Factory::newIndirect( - this, QPDFObjGen(lindict_obj, 0)); + auto candidate = getObjectByID(lindict_obj, 0); if (!candidate.isDictionary()) { return false; } @@ -706,7 +705,7 @@ QPDF::getUncompressedObject( return obj; } else { int repl = (*(object_stream_data.find(obj.getObjectID()))).second; - return getObjectByObjGen(QPDFObjGen(repl, 0)); + return getObject(repl, 0); } } @@ -1381,9 +1380,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) stopOnError("found other than one root while" " calculating linearization data"); } - this->m->part4.push_back(getObjectByObjGen(*(lc_root.begin()))); + this->m->part4.push_back(getObject(*(lc_root.begin()))); for (auto const& og: lc_open_document) { - this->m->part4.push_back(getObjectByObjGen(og)); + this->m->part4.push_back(getObject(og)); } // Part 6: first page objects. Note: implementation note 124 @@ -1412,11 +1411,11 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // hint tables. for (auto const& og: lc_first_page_private) { - this->m->part6.push_back(getObjectByObjGen(og)); + this->m->part6.push_back(getObject(og)); } for (auto const& og: lc_first_page_shared) { - this->m->part6.push_back(getObjectByObjGen(og)); + this->m->part6.push_back(getObject(og)); } // Place the outline dictionary if it goes in the first page section. @@ -1462,7 +1461,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: this->m->obj_user_to_objects[ou]) { if (lc_other_page_private.count(og)) { lc_other_page_private.erase(og); - this->m->part7.push_back(getObjectByObjGen(og)); + this->m->part7.push_back(getObject(og)); ++this->m->c_page_offset_data.entries.at(i).nobjects; } } @@ -1479,7 +1478,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Order is unimportant. for (auto const& og: lc_other_page_shared) { - this->m->part8.push_back(getObjectByObjGen(og)); + this->m->part8.push_back(getObject(og)); } // Part 9: other objects @@ -1501,7 +1500,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: pages_ogs) { if (lc_other.count(og)) { lc_other.erase(og); - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } } @@ -1531,7 +1530,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) for (auto const& og: ogs) { if (lc_thumbnail_private.count(og)) { lc_thumbnail_private.erase(og); - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } } } @@ -1544,7 +1543,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Place shared thumbnail objects for (auto const& og: lc_thumbnail_shared) { - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } // Place outlines unless in first page @@ -1554,7 +1553,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) // Place all remaining objects for (auto const& og: lc_other) { - this->m->part9.push_back(getObjectByObjGen(og)); + this->m->part9.push_back(getObject(og)); } // Make sure we got everything exactly once. @@ -1656,7 +1655,7 @@ QPDF::pushOutlinesToPart( lc_outlines.erase(outlines_og); part.push_back(outlines); for (auto const& og: lc_outlines) { - part.push_back(getObjectByObjGen(og)); + part.push_back(getObject(og)); ++this->m->c_outline_data.nobjects; } } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index a1bede25..80e89b02 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -233,7 +233,7 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) newpage = makeIndirectObject(newpage); } else if (newpage.getOwningQPDF() != this) { QTC::TC("qpdf", "QPDF insert foreign page"); - newpage.getOwningQPDF()->pushInheritedAttributesToPage(); + newpage.getOwningQPDF(false)->pushInheritedAttributesToPage(); newpage = copyForeignObject(newpage); } else { QTC::TC("qpdf", "QPDF insert indirect page"); diff --git a/libqpdf/QTC.cc b/libqpdf/QTC.cc index d27bfa8b..8188846c 100644 --- a/libqpdf/QTC.cc +++ b/libqpdf/QTC.cc @@ -1,6 +1,7 @@ #include <qpdf/QTC.hh> #include <qpdf/QUtil.hh> +#include <map> #include <set> #include <stdio.h> @@ -12,14 +13,21 @@ tc_active(char const* const scope) } void -QTC::TC(char const* const scope, char const* const ccase, int n) +QTC::TC_real(char const* const scope, char const* const ccase, int n) { - static std::set<std::pair<std::string, int>> cache; + static std::map<std::string, bool> active; + auto is_active = active.find(scope); + if (is_active == active.end()) { + active[scope] = tc_active(scope); + is_active = active.find(scope); + } - if (!tc_active(scope)) { + if (!is_active->second) { return; } + static std::set<std::pair<std::string, int>> cache; + std::string filename; #ifdef _WIN32 # define TC_ENV "TC_WIN_FILENAME" diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 4e58aaf7..98a8f318 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -37,6 +37,9 @@ # include <sys/stat.h> # include <unistd.h> #endif +#ifdef HAVE_MALLOC_INFO +# include <malloc.h> +#endif // First element is 24 static unsigned short pdf_doc_low_to_unicode[] = { @@ -1207,52 +1210,6 @@ QUtil::random() return result; } -bool -QUtil::is_hex_digit(char ch) -{ - return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr)); -} - -bool -QUtil::is_space(char ch) -{ - return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr)); -} - -bool -QUtil::is_digit(char ch) -{ - return ((ch >= '0') && (ch <= '9')); -} - -bool -QUtil::is_number(char const* p) -{ - // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ - if (!*p) { - return false; - } - if ((*p == '-') || (*p == '+')) { - ++p; - } - bool found_dot = false; - bool found_digit = false; - for (; *p; ++p) { - if (*p == '.') { - if (found_dot) { - // only one dot - return false; - } - found_dot = true; - } else if (QUtil::is_digit(*p)) { - found_digit = true; - } else { - return false; - } - } - return found_digit; -} - void QUtil::read_file_into_memory( char const* filename, std::shared_ptr<char>& file_buf, size_t& size) @@ -2014,3 +1971,73 @@ QUtil::call_main_from_wmain( } #endif // QPDF_NO_WCHAR_T + +size_t +QUtil::get_max_memory_usage() +{ +#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM) + static std::regex tag_re("<(/?\\w+)([^>]*?)>"); + static std::regex attr_re("(\\w+)=\"(.*?)\""); + + char* buf; + size_t size; + FILE* f = open_memstream(&buf, &size); + if (f == nullptr) { + return 0; + } + malloc_info(0, f); + fclose(f); + if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) { + fprintf(stderr, "%s", buf); + } + + // Warning: this code uses regular expression to extract data from + // an XML string. This is generally a bad idea, but we're going to + // do it anyway because QUtil.hh warns against using this function + // for other than development/testing, and if this function fails + // to generate reasonable output during performance testing, it + // will be noticed. + + // This is my best guess at how to interpret malloc_info. Anyway + // it seems to provide useful information for detecting code + // changes that drastically change memory usage. + size_t result = 0; + try { + std::cregex_iterator m_begin(buf, buf + size, tag_re); + std::cregex_iterator cr_end; + std::sregex_iterator sr_end; + + int in_heap = 0; + for (auto m = m_begin; m != cr_end; ++m) { + std::string tag(m->str(1)); + if (tag == "heap") { + ++in_heap; + } else if (tag == "/heap") { + --in_heap; + } else if (in_heap == 0) { + std::string rest = m->str(2); + std::map<std::string, std::string> attrs; + std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re); + for (auto m2 = a_begin; m2 != sr_end; ++m2) { + attrs[m2->str(1)] = m2->str(2); + } + if (tag == "total") { + if (attrs.count("size") > 0) { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } else if (tag == "system" && attrs["type"] == "max") { + result += QIntC::to_size( + QUtil::string_to_ull(attrs["size"].c_str())); + } + } + } + } catch (...) { + // ignore -- just return 0 + } + free(buf); + return result; +#else + return 0; +#endif +} diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh new file mode 100644 index 00000000..b83dbb1c --- /dev/null +++ b/libqpdf/qpdf/QPDFParser.hh @@ -0,0 +1,53 @@ +#ifndef QPDFPARSER_HH +#define QPDFPARSER_HH + +#include <qpdf/QPDFObjectHandle.hh> + +#include <memory> +#include <string> + +class QPDFParser +{ + public: + QPDFParser() = delete; + QPDFParser( + std::shared_ptr<InputSource> input, + std::string const& object_description, + QPDFTokenizer& tokenizer, + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF* context) : + input(input), + object_description(object_description), + tokenizer(tokenizer), + decrypter(decrypter), + context(context) + { + } + virtual ~QPDFParser() = default; + + QPDFObjectHandle parse(bool& empty, bool content_stream); + + private: + enum parser_state_e { + st_top, + st_start, + st_stop, + st_eof, + st_dictionary, + st_array + }; + + void warn(qpdf_offset_t offset, std::string const& msg) const; + void warn(std::string const& msg) const; + static void warn(QPDF*, QPDFExc const&); + void setParsedOffset(qpdf_offset_t offset); + void + setDescriptionFromInput(QPDFObjectHandle oh, qpdf_offset_t offset) const; + std::shared_ptr<InputSource> input; + std::string const& object_description; + QPDFTokenizer& tokenizer; + QPDFObjectHandle::StringDecrypter* decrypter; + QPDF* context; +}; + +#endif // QPDFPARSER_HH diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh index 3e095637..426efe36 100644 --- a/libqpdf/qpdf/QPDF_Array.hh +++ b/libqpdf/qpdf/QPDF_Array.hh @@ -1,13 +1,13 @@ #ifndef QPDF_ARRAY_HH #define QPDF_ARRAY_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> #include <qpdf/SparseOHArray.hh> #include <list> #include <vector> -class QPDF_Array: public QPDFObject +class QPDF_Array: public QPDFValue { public: virtual ~QPDF_Array() = default; @@ -17,8 +17,6 @@ class QPDF_Array: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; int getNItems() const; QPDFObjectHandle getItem(int n) const; diff --git a/libqpdf/qpdf/QPDF_Bool.hh b/libqpdf/qpdf/QPDF_Bool.hh index dbedc70a..3e45cd8e 100644 --- a/libqpdf/qpdf/QPDF_Bool.hh +++ b/libqpdf/qpdf/QPDF_Bool.hh @@ -1,9 +1,9 @@ #ifndef QPDF_BOOL_HH #define QPDF_BOOL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Bool: public QPDFObject +class QPDF_Bool: public QPDFValue { public: virtual ~QPDF_Bool() = default; @@ -11,8 +11,6 @@ class QPDF_Bool: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; bool getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh index cacc8961..19ab8d9b 100644 --- a/libqpdf/qpdf/QPDF_Dictionary.hh +++ b/libqpdf/qpdf/QPDF_Dictionary.hh @@ -1,14 +1,14 @@ #ifndef QPDF_DICTIONARY_HH #define QPDF_DICTIONARY_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> #include <map> #include <set> #include <qpdf/QPDFObjectHandle.hh> -class QPDF_Dictionary: public QPDFObject +class QPDF_Dictionary: public QPDFValue { public: virtual ~QPDF_Dictionary() = default; @@ -17,8 +17,6 @@ class QPDF_Dictionary: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; // hasKey() and getKeys() treat keys with null values as if they // aren't there. getKey() returns null for the value of a diff --git a/libqpdf/qpdf/QPDF_InlineImage.hh b/libqpdf/qpdf/QPDF_InlineImage.hh index caaeaf87..b7bea9c7 100644 --- a/libqpdf/qpdf/QPDF_InlineImage.hh +++ b/libqpdf/qpdf/QPDF_InlineImage.hh @@ -1,9 +1,9 @@ #ifndef QPDF_INLINEIMAGE_HH #define QPDF_INLINEIMAGE_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_InlineImage: public QPDFObject +class QPDF_InlineImage: public QPDFValue { public: virtual ~QPDF_InlineImage() = default; @@ -11,8 +11,6 @@ class QPDF_InlineImage: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Integer.hh b/libqpdf/qpdf/QPDF_Integer.hh index 2c17daf0..7e09673c 100644 --- a/libqpdf/qpdf/QPDF_Integer.hh +++ b/libqpdf/qpdf/QPDF_Integer.hh @@ -1,9 +1,9 @@ #ifndef QPDF_INTEGER_HH #define QPDF_INTEGER_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Integer: public QPDFObject +class QPDF_Integer: public QPDFValue { public: virtual ~QPDF_Integer() = default; @@ -11,8 +11,6 @@ class QPDF_Integer: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; long long getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Name.hh b/libqpdf/qpdf/QPDF_Name.hh index cf653b2e..74fc7e44 100644 --- a/libqpdf/qpdf/QPDF_Name.hh +++ b/libqpdf/qpdf/QPDF_Name.hh @@ -1,9 +1,9 @@ #ifndef QPDF_NAME_HH #define QPDF_NAME_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Name: public QPDFObject +class QPDF_Name: public QPDFValue { public: virtual ~QPDF_Name() = default; @@ -11,8 +11,6 @@ class QPDF_Name: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getName() const; // Put # into strings with characters unsuitable for name token diff --git a/libqpdf/qpdf/QPDF_Null.hh b/libqpdf/qpdf/QPDF_Null.hh index 16833424..68973de9 100644 --- a/libqpdf/qpdf/QPDF_Null.hh +++ b/libqpdf/qpdf/QPDF_Null.hh @@ -1,9 +1,9 @@ #ifndef QPDF_NULL_HH #define QPDF_NULL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Null: public QPDFObject +class QPDF_Null: public QPDFValue { public: virtual ~QPDF_Null() = default; @@ -11,11 +11,9 @@ class QPDF_Null: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; private: - QPDF_Null() = default; + QPDF_Null(); }; #endif // QPDF_NULL_HH diff --git a/libqpdf/qpdf/QPDF_Operator.hh b/libqpdf/qpdf/QPDF_Operator.hh index 1da43d72..767c0ba0 100644 --- a/libqpdf/qpdf/QPDF_Operator.hh +++ b/libqpdf/qpdf/QPDF_Operator.hh @@ -1,9 +1,9 @@ #ifndef QPDF_OPERATOR_HH #define QPDF_OPERATOR_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Operator: public QPDFObject +class QPDF_Operator: public QPDFValue { public: virtual ~QPDF_Operator() = default; @@ -11,8 +11,6 @@ class QPDF_Operator: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal() const; private: diff --git a/libqpdf/qpdf/QPDF_Real.hh b/libqpdf/qpdf/QPDF_Real.hh index f5ab4bd6..dc0f3ff8 100644 --- a/libqpdf/qpdf/QPDF_Real.hh +++ b/libqpdf/qpdf/QPDF_Real.hh @@ -1,9 +1,9 @@ #ifndef QPDF_REAL_HH #define QPDF_REAL_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Real: public QPDFObject +class QPDF_Real: public QPDFValue { public: virtual ~QPDF_Real() = default; @@ -13,8 +13,6 @@ class QPDF_Real: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string getVal(); private: diff --git a/libqpdf/qpdf/QPDF_Reserved.hh b/libqpdf/qpdf/QPDF_Reserved.hh index 243a1728..f90242a9 100644 --- a/libqpdf/qpdf/QPDF_Reserved.hh +++ b/libqpdf/qpdf/QPDF_Reserved.hh @@ -1,9 +1,9 @@ #ifndef QPDF_RESERVED_HH #define QPDF_RESERVED_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> -class QPDF_Reserved: public QPDFObject +class QPDF_Reserved: public QPDFValue { public: virtual ~QPDF_Reserved() = default; @@ -11,11 +11,9 @@ class QPDF_Reserved: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; private: - QPDF_Reserved() = default; + QPDF_Reserved(); }; #endif // QPDF_RESERVED_HH diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 8980c751..3a16160e 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -3,9 +3,9 @@ #include <qpdf/Types.h> -#include <qpdf/QPDFObject.hh> #include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFStreamFilter.hh> +#include <qpdf/QPDFValue.hh> #include <functional> #include <memory> @@ -13,7 +13,7 @@ class Pipeline; class QPDF; -class QPDF_Stream: public QPDFObject +class QPDF_Stream: public QPDFValue { public: virtual ~QPDF_Stream() = default; @@ -26,8 +26,6 @@ class QPDF_Stream: public QPDFObject virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; virtual void setDescription(QPDF*, std::string const&); QPDFObjectHandle getDict() const; bool isDataModified() const; diff --git a/libqpdf/qpdf/QPDF_String.hh b/libqpdf/qpdf/QPDF_String.hh index b6d77637..a92427e3 100644 --- a/libqpdf/qpdf/QPDF_String.hh +++ b/libqpdf/qpdf/QPDF_String.hh @@ -1,11 +1,11 @@ #ifndef QPDF_STRING_HH #define QPDF_STRING_HH -#include <qpdf/QPDFObject.hh> +#include <qpdf/QPDFValue.hh> // QPDF_Strings may included embedded null characters. -class QPDF_String: public QPDFObject +class QPDF_String: public QPDFValue { friend class QPDFWriter; @@ -16,8 +16,6 @@ class QPDF_String: public QPDFObject create_utf16(std::string const& utf8_val); virtual std::shared_ptr<QPDFObject> shallowCopy(); virtual std::string unparse(); - virtual QPDFObject::object_type_e getTypeCode() const; - virtual char const* getTypeName() const; std::string unparse(bool force_binary); virtual JSON getJSON(int json_version); std::string getVal() const; diff --git a/libqpdf/qpdf/QPDF_Unresolved.hh b/libqpdf/qpdf/QPDF_Unresolved.hh new file mode 100644 index 00000000..efcf4e3d --- /dev/null +++ b/libqpdf/qpdf/QPDF_Unresolved.hh @@ -0,0 +1,19 @@ +#ifndef QPDF_UNRESOLVED_HH +#define QPDF_UNRESOLVED_HH + +#include <qpdf/QPDFValue.hh> + +class QPDF_Unresolved: public QPDFValue +{ + public: + virtual ~QPDF_Unresolved() = default; + static std::shared_ptr<QPDFObject> create(QPDF* qpdf, QPDFObjGen const& og); + virtual std::shared_ptr<QPDFObject> shallowCopy(); + virtual std::string unparse(); + virtual JSON getJSON(int json_version); + + private: + QPDF_Unresolved(QPDF* qpdf, QPDFObjGen const& og); +}; + +#endif // QPDF_UNRESOLVED_HH diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 7c3bb266..eb272a04 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -883,6 +883,9 @@ for debugging qpdf. ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between the output of qpdf --json and the output of qpdf --json-help. )"); +ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the +maximum amount of memory used in supported environments. +)"); } static void add_help(QPDFArgParser& ap) { diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index b90592e0..ad110d16 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -69,6 +69,7 @@ this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); +this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();}); this->ap.addBare("requires-password", [this](){c_main->requiresPassword();}); this->ap.addBare("show-encryption", [this](){c_main->showEncryption();}); this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index 8f8fb987..1cd69368 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -409,6 +409,9 @@ popHandler(); // key: pages pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels +pushKey("reportMemUsage"); +addBare([this]() { c_main->reportMemUsage(); }); +popHandler(); // key: reportMemUsage pushKey("rotate"); addParameter([this](std::string const& p) { c_main->rotate(p); }); popHandler(); // key: rotate diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index aa69c192..9272c596 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ } ], "removePageLabels": "remove explicit page numbers", + "reportMemUsage": "best effort report of memory usage", "rotate": "rotate pages", "overlay": { "file": "source file for overlay", diff --git a/libqpdf/qpdf/qpdf-config.h.in b/libqpdf/qpdf/qpdf-config.h.in index 8a22b875..500f55cc 100644 --- a/libqpdf/qpdf/qpdf-config.h.in +++ b/libqpdf/qpdf/qpdf-config.h.in @@ -21,6 +21,8 @@ #cmakedefine HAVE_LOCALTIME_R 1 #cmakedefine HAVE_RANDOM 1 #cmakedefine HAVE_TM_GMTOFF 1 +#cmakedefine HAVE_MALLOC_INFO 1 +#cmakedefine HAVE_OPEN_MEMSTREAM 1 /* printf format for long long */ #cmakedefine LL_FMT "${LL_FMT}" |