Limit token length during xref recovery

While scanning the file looking for objects, limit the length of tokens we allow. This prevents us from getting caught up in reading a file character by character while digging through large streams.
author: Jay Berkenbilt <ejb@ql.org> 2017-08-22 16:24:19 +0200
committer: Jay Berkenbilt <ejb@ql.org> 2017-08-22 20:13:10 +0200
commit: fabff0f3ec571b8dc423946e252c060e229c49d7 (patch)
tree: 6aba218b998e0f1bb2928e21d6426352cfdcc072 /libqpdf/QPDFTokenizer.cc
parent: caf5e39c2e68553101519fb4ce3664a73032d3a3 (diff)
download: qpdf-fabff0f3ec571b8dc423946e252c060e229c49d7.tar.zst
1 files changed, 10 insertions, 1 deletions
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 6447b93b..6cababfe 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens()
 QPDFTokenizer::Token
 QPDFTokenizer::readToken(PointerHolder<InputSource> input,
                          std::string const& context,
-                         bool allow_bad)
+                         bool allow_bad,
+                         size_t max_len)
 {
     qpdf_offset_t offset = input->tell();
     Token token;
@@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
 		++offset;
 	    }
 	    presentCharacter(ch);
+            if (max_len && (raw_val.length() >= max_len) &&
+                (this->state != st_token_ready))
+            {
+                // terminate this token now
+                QTC::TC("qpdf", "QPDFTokenizer block long token");
+                this->type = tt_bad;
+                this->state = st_token_ready;
+            }
 	}
     }
author	Jay Berkenbilt <ejb@ql.org>	2017-08-22 16:24:19 +0200
committer	Jay Berkenbilt <ejb@ql.org>	2017-08-22 20:13:10 +0200
commit	fabff0f3ec571b8dc423946e252c060e229c49d7 (patch)
tree	6aba218b998e0f1bb2928e21d6426352cfdcc072 /libqpdf/QPDFTokenizer.cc
parent	caf5e39c2e68553101519fb4ce3664a73032d3a3 (diff)
download	qpdf-fabff0f3ec571b8dc423946e252c060e229c49d7.tar.zst