diff options
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 9 | ||||
-rw-r--r-- | libqpdf/QPDF.cc | 40 | ||||
-rw-r--r-- | libqpdf/QPDFTokenizer.cc | 49 |
3 files changed, 59 insertions, 39 deletions
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 122c193f..8a1bd216 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -10,6 +10,8 @@ #include <qpdf/DLL.h> +#include <qpdf/InputSource.hh> +#include <qpdf/PointerHolder.hh> #include <string> #include <stdio.h> @@ -122,6 +124,13 @@ class QPDFTokenizer QPDF_DLL bool betweenTokens(); + // Read a token from an input source. Context describes the + // context in which the token is being read and is used in the + // exception thrown if there is an error. + QPDF_DLL + Token readToken(PointerHolder<InputSource> input, + std::string const& context); + private: void reset(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 43757735..e1ce5fc4 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, QPDFTokenizer::Token QPDF::readToken(PointerHolder<InputSource> input) { - qpdf_offset_t offset = input->tell(); - QPDFTokenizer::Token token; - bool unread_char; - char char_to_unread; - while (! this->tokenizer.getToken(token, unread_char, char_to_unread)) - { - char ch; - if (input->read(&ch, 1) == 0) - { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - this->last_object_description, offset, - "EOF while reading token"); - } - else - { - if (isspace((unsigned char)ch) && - (input->getLastOffset() == offset)) - { - ++offset; - } - this->tokenizer.presentCharacter(ch); - } - } - - if (unread_char) - { - input->unreadCh(char_to_unread); - } - - if (token.getType() == QPDFTokenizer::tt_bad) - { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - this->last_object_description, offset, - token.getErrorMessage()); - } - - input->setLastOffset(offset); - - return token; + return this->tokenizer.readToken(input, this->last_object_description); } QPDFObjectHandle diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index a60e8605..979a79bf 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -6,6 +6,7 @@ #include <qpdf/PCRE.hh> #include <qpdf/QTC.hh> +#include <qpdf/QPDFExc.hh> #include <stdexcept> #include <string.h> @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch) { return (strchr("0123456789abcdefABCDEF", ch) != 0); } +static bool is_space(char ch) +{ + return (strchr(" \f\n\r\t\v", ch) != 0); +} QPDFTokenizer::QPDFTokenizer() : pound_special_in_name(true) @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens() { return ((state == st_top) || (state == st_in_comment)); } + +QPDFTokenizer::Token +QPDFTokenizer::readToken(PointerHolder<InputSource> input, + std::string const& context) +{ + qpdf_offset_t offset = input->tell(); + Token token; + bool unread_char; + char char_to_unread; + while (! getToken(token, unread_char, char_to_unread)) + { + char ch; + if (input->read(&ch, 1) == 0) + { + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + context, offset, + "EOF while reading token"); + } + else + { + if (is_space((unsigned char)ch) && + (input->getLastOffset() == offset)) + { + ++offset; + } + presentCharacter(ch); + } + } + + if (unread_char) + { + input->unreadCh(char_to_unread); + } + + if (token.getType() == tt_bad) + { + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + context, offset, token.getErrorMessage()); + } + + input->setLastOffset(offset); + + return token; +} |