From 69a5fb70472a2112d2bbb938ee3718250d2364cf Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 26 Aug 2022 23:55:56 +0100 Subject: Add methods InputSource::fastRead, fastUnRead and fastTell Provide buffered input for QPDFTokenizer. --- include/qpdf/InputSource.hh | 68 +++++++++++++++++++++++++++++++++++++++++++++ libqpdf/QPDFTokenizer.cc | 8 ++---- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh index 9feb8ec3..e9d99cdb 100644 --- a/include/qpdf/InputSource.hh +++ b/include/qpdf/InputSource.hh @@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource // efficient. virtual void unreadCh(char ch) = 0; + // The following methods are for use by QPDFTokenizer + inline qpdf_offset_t fastTell(); + inline bool fastRead(char&); + inline void fastUnread(bool); + inline void loadBuffer(); + protected: qpdf_offset_t last_offset; @@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource }; std::shared_ptr m; + + // State for fast... methods + static const qpdf_offset_t buf_size = 128; + char buffer[buf_size]; + qpdf_offset_t buf_len = 0; + qpdf_offset_t buf_idx = 0; + qpdf_offset_t buf_start = 0; }; +inline void +InputSource::loadBuffer() +{ + this->buf_idx = 0; + this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size)); + // NB read sets last_offset + this->buf_start = this->last_offset; +} + +inline qpdf_offset_t +InputSource::fastTell() +{ + if (this->buf_len == 0) { + loadBuffer(); + } else { + auto curr = tell(); + if (curr < this->buf_start || + curr >= (this->buf_start + this->buf_len)) { + loadBuffer(); + } else { + this->last_offset = curr; + this->buf_idx = curr - this->buf_start; + } + } + return this->last_offset; +} + +inline bool +InputSource::fastRead(char& ch) +{ + // Before calling fastRead, fastTell must be called to prepare the buffer. + // Once reading is complete, fastUnread must be called to set the correct + // file position. + if (this->buf_idx < this->buf_len) { + ch = this->buffer[this->buf_idx]; + ++(this->buf_idx); + ++(this->last_offset); + return true; + + } else if (this->buf_len == 0) { + return false; + } else { + seek(this->buf_start + this->buf_len, SEEK_SET); + fastTell(); + return fastRead(ch); + } +} + +inline void +InputSource::fastUnread(bool back) +{ + this->last_offset -= back ? 1 : 0; + seek(this->last_offset, SEEK_SET); +} + #endif // QPDF_INPUTSOURCE_HH diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 721ea7ea..56550266 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -974,11 +974,11 @@ QPDFTokenizer::readToken( bool allow_bad, size_t max_len) { - qpdf_offset_t offset = input->tell(); + qpdf_offset_t offset = input->fastTell(); while (this->state != st_token_ready) { char ch; - if (input->read(&ch, 1) == 0) { + if (!input->fastRead(ch)) { presentEOF(); if ((this->type == tt_eof) && (!this->allow_eof)) { @@ -1013,9 +1013,7 @@ QPDFTokenizer::readToken( bool unread_char; char char_to_unread; getToken(token, unread_char, char_to_unread); - if (unread_char) { - input->unreadCh(char_to_unread); - } + input->fastUnread(unread_char); if (token.getType() != tt_eof) { input->setLastOffset(offset); -- cgit v1.2.3-54-g00ecf