summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2023-07-08 12:38:23 +0200
committerm-holger <m-holger@kubitscheck.org>2023-07-08 17:31:44 +0200
commitc422b918b1402ac9e2052ff426c7e64d4342bc99 (patch)
treee7a2376a624461a94f170b28f465f5b820ce32fd
parent2643ed4dd9bd1a8e9bb3dcd95794273e9cf5de0c (diff)
downloadqpdf-c422b918b1402ac9e2052ff426c7e64d4342bc99.tar.zst
Add new private method QPDF::readStream
-rw-r--r--include/qpdf/QPDF.hh1
-rw-r--r--libqpdf/QPDF.cc157
2 files changed, 82 insertions, 76 deletions
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index c836b5fd..ead09a0a 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -1008,6 +1008,7 @@ class QPDF
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
QPDFObjectHandle readTrailer();
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
+ void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
size_t recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 48f1e186..9391c987 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
std::shared_ptr<StringDecrypter> decrypter_ph;
StringDecrypter* decrypter = nullptr;
if (m->encp->encrypted) {
- decrypter_ph = std::make_shared<StringDecrypter>(this, og);
+ decrypter_ph = std::make_unique<StringDecrypter>(this, og);
decrypter = decrypter_ph.get();
}
auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
@@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
// check for stream
qpdf_offset_t cur_offset = m->file->tell();
if (readToken(m->file).isWord("stream")) {
- // The PDF specification states that the word "stream" should be followed by either a
- // carriage return and a newline or by a newline alone. It specifically disallowed
- // following it by a carriage return alone since, in that case, there would be no way to
- // tell whether the NL in a CR NL sequence was part of the stream data. However, some
- // readers, including Adobe reader, accept a carriage return by itself when followed by
- // a non-newline character, so that's what we do here. We have also seen files that have
- // extraneous whitespace between the stream keyword and the newline.
- bool done = false;
- while (!done) {
- done = true;
- char ch;
- if (m->file->read(&ch, 1) == 0) {
- // A premature EOF here will result in some other problem that will get reported
- // at another time.
- } else if (ch == '\n') {
- // ready to read stream data
- QTC::TC("qpdf", "QPDF stream with NL only");
- } else if (ch == '\r') {
- // Read another character
- if (m->file->read(&ch, 1) != 0) {
- if (ch == '\n') {
- // Ready to read stream data
- QTC::TC("qpdf", "QPDF stream with CRNL");
- } else {
- // Treat the \r by itself as the whitespace after endstream and start
- // reading stream data in spite of not having seen a newline.
- QTC::TC("qpdf", "QPDF stream with CR only");
- m->file->unreadCh(ch);
- warn(damagedPDF(
- m->file->tell(),
- "stream keyword followed by carriage return only"));
- }
- }
- } else if (QUtil::is_space(ch)) {
- warn(damagedPDF(
- m->file->tell(), "stream keyword followed by extraneous whitespace"));
- done = false;
+ readStream(object, og, offset);
+ } else {
+ m->file->seek(cur_offset, SEEK_SET);
+ }
+ }
+
+ // Override last_offset so that it points to the beginning of the object we just read
+ m->file->setLastOffset(offset);
+ return object;
+}
+
+// After reading stream dictionary and stream keyword, read rest of stream.
+void
+QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
+{
+ // The PDF specification states that the word "stream" should be followed by either a carriage
+ // return and a newline or by a newline alone. It specifically disallowed following it by a
+ // carriage return alone since, in that case, there would be no way to tell whether the NL in a
+ // CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
+ // accept a carriage return by itself when followed by a non-newline character, so that's what
+ // we do here. We have also seen files that have extraneous whitespace between the stream
+ // keyword and the newline.
+ bool done = false;
+ while (!done) {
+ done = true;
+ char ch;
+ if (m->file->read(&ch, 1) == 0) {
+ // A premature EOF here will result in some other problem that will get reported at
+ // another time.
+ } else if (ch == '\n') {
+ // ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with NL only");
+ } else if (ch == '\r') {
+ // Read another character
+ if (m->file->read(&ch, 1) != 0) {
+ if (ch == '\n') {
+ // Ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with CRNL");
} else {
- QTC::TC("qpdf", "QPDF stream without newline");
+ // Treat the \r by itself as the whitespace after endstream and start reading
+ // stream data in spite of not having seen a newline.
+ QTC::TC("qpdf", "QPDF stream with CR only");
m->file->unreadCh(ch);
warn(damagedPDF(
- m->file->tell(), "stream keyword not followed by proper line terminator"));
+ m->file->tell(), "stream keyword followed by carriage return only"));
}
}
+ } else if (QUtil::is_space(ch)) {
+ warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
+ done = false;
+ } else {
+ QTC::TC("qpdf", "QPDF stream without newline");
+ m->file->unreadCh(ch);
+ warn(damagedPDF(
+ m->file->tell(), "stream keyword not followed by proper line terminator"));
+ }
+ }
- // Must get offset before accessing any additional objects since resolving a previously
- // unresolved indirect object will change file position.
- qpdf_offset_t stream_offset = m->file->tell();
- size_t length = 0;
-
- try {
- auto length_obj = object.getKey("/Length");
+ // Must get offset before accessing any additional objects since resolving a previously
+ // unresolved indirect object will change file position.
+ qpdf_offset_t stream_offset = m->file->tell();
+ size_t length = 0;
- if (!length_obj.isInteger()) {
- if (length_obj.isNull()) {
- QTC::TC("qpdf", "QPDF stream without length");
- throw damagedPDF(offset, "stream dictionary lacks /Length key");
- }
- QTC::TC("qpdf", "QPDF stream length not integer");
- throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
- }
+ try {
+ auto length_obj = object.getKey("/Length");
- length = toS(length_obj.getUIntValue());
- // Seek in two steps to avoid potential integer overflow
- m->file->seek(stream_offset, SEEK_SET);
- m->file->seek(toO(length), SEEK_CUR);
- if (!readToken(m->file).isWord("endstream")) {
- QTC::TC("qpdf", "QPDF missing endstream");
- throw damagedPDF("expected endstream");
- }
- } catch (QPDFExc& e) {
- if (m->attempt_recovery) {
- warn(e);
- length = recoverStreamLength(m->file, og, stream_offset);
- } else {
- throw;
- }
+ if (!length_obj.isInteger()) {
+ if (length_obj.isNull()) {
+ QTC::TC("qpdf", "QPDF stream without length");
+ throw damagedPDF(offset, "stream dictionary lacks /Length key");
}
- object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
+ QTC::TC("qpdf", "QPDF stream length not integer");
+ throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
+ }
+
+ length = toS(length_obj.getUIntValue());
+ // Seek in two steps to avoid potential integer overflow
+ m->file->seek(stream_offset, SEEK_SET);
+ m->file->seek(toO(length), SEEK_CUR);
+ if (!readToken(m->file).isWord("endstream")) {
+ QTC::TC("qpdf", "QPDF missing endstream");
+ throw damagedPDF("expected endstream");
+ }
+ } catch (QPDFExc& e) {
+ if (m->attempt_recovery) {
+ warn(e);
+ length = recoverStreamLength(m->file, og, stream_offset);
} else {
- m->file->seek(cur_offset, SEEK_SET);
+ throw;
}
}
-
- // Override last_offset so that it points to the beginning of the object we just read
- m->file->setLastOffset(offset);
- return object;
+ object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
}
QPDFObjectHandle