From c422b918b1402ac9e2052ff426c7e64d4342bc99 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 8 Jul 2023 11:38:23 +0100 Subject: Add new private method QPDF::readStream --- libqpdf/QPDF.cc | 157 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 76 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 48f1e186..9391c987 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) std::shared_ptr decrypter_ph; StringDecrypter* decrypter = nullptr; if (m->encp->encrypted) { - decrypter_ph = std::make_shared(this, og); + decrypter_ph = std::make_unique(this, og); decrypter = decrypter_ph.get(); } auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this) @@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) // check for stream qpdf_offset_t cur_offset = m->file->tell(); if (readToken(m->file).isWord("stream")) { - // The PDF specification states that the word "stream" should be followed by either a - // carriage return and a newline or by a newline alone. It specifically disallowed - // following it by a carriage return alone since, in that case, there would be no way to - // tell whether the NL in a CR NL sequence was part of the stream data. However, some - // readers, including Adobe reader, accept a carriage return by itself when followed by - // a non-newline character, so that's what we do here. We have also seen files that have - // extraneous whitespace between the stream keyword and the newline. - bool done = false; - while (!done) { - done = true; - char ch; - if (m->file->read(&ch, 1) == 0) { - // A premature EOF here will result in some other problem that will get reported - // at another time. - } else if (ch == '\n') { - // ready to read stream data - QTC::TC("qpdf", "QPDF stream with NL only"); - } else if (ch == '\r') { - // Read another character - if (m->file->read(&ch, 1) != 0) { - if (ch == '\n') { - // Ready to read stream data - QTC::TC("qpdf", "QPDF stream with CRNL"); - } else { - // Treat the \r by itself as the whitespace after endstream and start - // reading stream data in spite of not having seen a newline. - QTC::TC("qpdf", "QPDF stream with CR only"); - m->file->unreadCh(ch); - warn(damagedPDF( - m->file->tell(), - "stream keyword followed by carriage return only")); - } - } - } else if (QUtil::is_space(ch)) { - warn(damagedPDF( - m->file->tell(), "stream keyword followed by extraneous whitespace")); - done = false; + readStream(object, og, offset); + } else { + m->file->seek(cur_offset, SEEK_SET); + } + } + + // Override last_offset so that it points to the beginning of the object we just read + m->file->setLastOffset(offset); + return object; +} + +// After reading stream dictionary and stream keyword, read rest of stream. +void +QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) +{ + // The PDF specification states that the word "stream" should be followed by either a carriage + // return and a newline or by a newline alone. It specifically disallowed following it by a + // carriage return alone since, in that case, there would be no way to tell whether the NL in a + // CR NL sequence was part of the stream data. However, some readers, including Adobe reader, + // accept a carriage return by itself when followed by a non-newline character, so that's what + // we do here. We have also seen files that have extraneous whitespace between the stream + // keyword and the newline. + bool done = false; + while (!done) { + done = true; + char ch; + if (m->file->read(&ch, 1) == 0) { + // A premature EOF here will result in some other problem that will get reported at + // another time. + } else if (ch == '\n') { + // ready to read stream data + QTC::TC("qpdf", "QPDF stream with NL only"); + } else if (ch == '\r') { + // Read another character + if (m->file->read(&ch, 1) != 0) { + if (ch == '\n') { + // Ready to read stream data + QTC::TC("qpdf", "QPDF stream with CRNL"); } else { - QTC::TC("qpdf", "QPDF stream without newline"); + // Treat the \r by itself as the whitespace after endstream and start reading + // stream data in spite of not having seen a newline. + QTC::TC("qpdf", "QPDF stream with CR only"); m->file->unreadCh(ch); warn(damagedPDF( - m->file->tell(), "stream keyword not followed by proper line terminator")); + m->file->tell(), "stream keyword followed by carriage return only")); } } + } else if (QUtil::is_space(ch)) { + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); + done = false; + } else { + QTC::TC("qpdf", "QPDF stream without newline"); + m->file->unreadCh(ch); + warn(damagedPDF( + m->file->tell(), "stream keyword not followed by proper line terminator")); + } + } - // Must get offset before accessing any additional objects since resolving a previously - // unresolved indirect object will change file position. - qpdf_offset_t stream_offset = m->file->tell(); - size_t length = 0; - - try { - auto length_obj = object.getKey("/Length"); + // Must get offset before accessing any additional objects since resolving a previously + // unresolved indirect object will change file position. + qpdf_offset_t stream_offset = m->file->tell(); + size_t length = 0; - if (!length_obj.isInteger()) { - if (length_obj.isNull()) { - QTC::TC("qpdf", "QPDF stream without length"); - throw damagedPDF(offset, "stream dictionary lacks /Length key"); - } - QTC::TC("qpdf", "QPDF stream length not integer"); - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); - } + try { + auto length_obj = object.getKey("/Length"); - length = toS(length_obj.getUIntValue()); - // Seek in two steps to avoid potential integer overflow - m->file->seek(stream_offset, SEEK_SET); - m->file->seek(toO(length), SEEK_CUR); - if (!readToken(m->file).isWord("endstream")) { - QTC::TC("qpdf", "QPDF missing endstream"); - throw damagedPDF("expected endstream"); - } - } catch (QPDFExc& e) { - if (m->attempt_recovery) { - warn(e); - length = recoverStreamLength(m->file, og, stream_offset); - } else { - throw; - } + if (!length_obj.isInteger()) { + if (length_obj.isNull()) { + QTC::TC("qpdf", "QPDF stream without length"); + throw damagedPDF(offset, "stream dictionary lacks /Length key"); } - object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); + QTC::TC("qpdf", "QPDF stream length not integer"); + throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); + } + + length = toS(length_obj.getUIntValue()); + // Seek in two steps to avoid potential integer overflow + m->file->seek(stream_offset, SEEK_SET); + m->file->seek(toO(length), SEEK_CUR); + if (!readToken(m->file).isWord("endstream")) { + QTC::TC("qpdf", "QPDF missing endstream"); + throw damagedPDF("expected endstream"); + } + } catch (QPDFExc& e) { + if (m->attempt_recovery) { + warn(e); + length = recoverStreamLength(m->file, og, stream_offset); } else { - m->file->seek(cur_offset, SEEK_SET); + throw; } } - - // Override last_offset so that it points to the beginning of the object we just read - m->file->setLastOffset(offset); - return object; + object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); } QPDFObjectHandle -- cgit v1.2.3-54-g00ecf