From fabff0f3ec571b8dc423946e252c060e229c49d7 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 22 Aug 2017 10:24:19 -0400 Subject: Limit token length during xref recovery While scanning the file looking for objects, limit the length of tokens we allow. This prevents us from getting caught up in reading a file character by character while digging through large streams. --- libqpdf/QPDF.cc | 17 +++++++++++------ libqpdf/QPDFTokenizer.cc | 11 ++++++++++- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index caf5ef7b..3cfb9a34 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e) this->m->file->seek(0, SEEK_SET); bool in_obj = false; qpdf_offset_t line_start = 0; + // Don't allow very long tokens here during recovery. + static size_t const MAX_LEN = 100; while (this->m->file->tell() < eof) { this->m->file->findAndSkipNextEOL(); qpdf_offset_t next_line_start = this->m->file->tell(); this->m->file->seek(line_start, SEEK_SET); - QPDFTokenizer::Token t1 = readToken(this->m->file, true); + QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN); qpdf_offset_t token_start = this->m->file->tell() - t1.getValue().length(); if (token_start >= next_line_start) @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e) { if (t1.getType() == QPDFTokenizer::tt_integer) { - QPDFTokenizer::Token t2 = readToken(this->m->file, true); - QPDFTokenizer::Token t3 = readToken(this->m->file, true); + QPDFTokenizer::Token t2 = + readToken(this->m->file, true, MAX_LEN); + QPDFTokenizer::Token t3 = + readToken(this->m->file, true, MAX_LEN); if ((t2.getType() == QPDFTokenizer::tt_integer) && (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) { @@ -1411,7 +1415,7 @@ bool QPDF::findEndstream() { // Find endstream or endobj. Position the input at that token. - QPDFTokenizer::Token t = readToken(this->m->file, true); + QPDFTokenizer::Token t = readToken(this->m->file, true, 20); if ((t.getType() == QPDFTokenizer::tt_word) && ((t.getValue() == "endobj") || (t.getValue() == "endstream"))) @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder input, } QPDFTokenizer::Token -QPDF::readToken(PointerHolder input, bool allow_bad) +QPDF::readToken(PointerHolder input, + bool allow_bad, size_t max_len) { return this->m->tokenizer.readToken( - input, this->m->last_object_description, allow_bad); + input, this->m->last_object_description, allow_bad, max_len); } QPDFObjectHandle diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 6447b93b..6cababfe 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens() QPDFTokenizer::Token QPDFTokenizer::readToken(PointerHolder input, std::string const& context, - bool allow_bad) + bool allow_bad, + size_t max_len) { qpdf_offset_t offset = input->tell(); Token token; @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder input, ++offset; } presentCharacter(ch); + if (max_len && (raw_val.length() >= max_len) && + (this->state != st_token_ready)) + { + // terminate this token now + QTC::TC("qpdf", "QPDFTokenizer block long token"); + this->type = tt_bad; + this->state = st_token_ready; + } } } -- cgit v1.2.3-54-g00ecf