aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-22 16:24:19 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-22 20:13:10 +0200
commitfabff0f3ec571b8dc423946e252c060e229c49d7 (patch)
tree6aba218b998e0f1bb2928e21d6426352cfdcc072 /libqpdf
parentcaf5e39c2e68553101519fb4ce3664a73032d3a3 (diff)
downloadqpdf-fabff0f3ec571b8dc423946e252c060e229c49d7.tar.zst
Limit token length during xref recovery
While scanning the file looking for objects, limit the length of tokens we allow. This prevents us from getting caught up in reading a file character by character while digging through large streams.
Diffstat (limited to 'libqpdf')
-rw-r--r--libqpdf/QPDF.cc17
-rw-r--r--libqpdf/QPDFTokenizer.cc11
2 files changed, 21 insertions, 7 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index caf5ef7b..3cfb9a34 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->m->file->seek(0, SEEK_SET);
bool in_obj = false;
qpdf_offset_t line_start = 0;
+ // Don't allow very long tokens here during recovery.
+ static size_t const MAX_LEN = 100;
while (this->m->file->tell() < eof)
{
this->m->file->findAndSkipNextEOL();
qpdf_offset_t next_line_start = this->m->file->tell();
this->m->file->seek(line_start, SEEK_SET);
- QPDFTokenizer::Token t1 = readToken(this->m->file, true);
+ QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN);
qpdf_offset_t token_start =
this->m->file->tell() - t1.getValue().length();
if (token_start >= next_line_start)
@@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
{
if (t1.getType() == QPDFTokenizer::tt_integer)
{
- QPDFTokenizer::Token t2 = readToken(this->m->file, true);
- QPDFTokenizer::Token t3 = readToken(this->m->file, true);
+ QPDFTokenizer::Token t2 =
+ readToken(this->m->file, true, MAX_LEN);
+ QPDFTokenizer::Token t3 =
+ readToken(this->m->file, true, MAX_LEN);
if ((t2.getType() == QPDFTokenizer::tt_integer) &&
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
{
@@ -1411,7 +1415,7 @@ bool
QPDF::findEndstream()
{
// Find endstream or endobj. Position the input at that token.
- QPDFTokenizer::Token t = readToken(this->m->file, true);
+ QPDFTokenizer::Token t = readToken(this->m->file, true, 20);
if ((t.getType() == QPDFTokenizer::tt_word) &&
((t.getValue() == "endobj") ||
(t.getValue() == "endstream")))
@@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
}
QPDFTokenizer::Token
-QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
+QPDF::readToken(PointerHolder<InputSource> input,
+ bool allow_bad, size_t max_len)
{
return this->m->tokenizer.readToken(
- input, this->m->last_object_description, allow_bad);
+ input, this->m->last_object_description, allow_bad, max_len);
}
QPDFObjectHandle
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 6447b93b..6cababfe 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token
QPDFTokenizer::readToken(PointerHolder<InputSource> input,
std::string const& context,
- bool allow_bad)
+ bool allow_bad,
+ size_t max_len)
{
qpdf_offset_t offset = input->tell();
Token token;
@@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
++offset;
}
presentCharacter(ch);
+ if (max_len && (raw_val.length() >= max_len) &&
+ (this->state != st_token_ready))
+ {
+ // terminate this token now
+ QTC::TC("qpdf", "QPDFTokenizer block long token");
+ this->type = tt_bad;
+ this->state = st_token_ready;
+ }
}
}