15 files changed, 1024 insertions, 650 deletions
diff --git a/ChangeLog b/ChangeLog
index b861ddda..093eeaf5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2022-08-27  Jay Berkenbilt  <ejb@ql.org>
+
+        * From m-holger: major refactoring of QPDFTokenizer to improve
+        readability and to optimize performance. This also included some
+        optimizations to some InputSource classes. Thanks for this
+        excellent contribution. Fixes #749, #442.
+
 2022-08-07  Jay Berkenbilt  <ejb@ql.org>
 
         * Add new build configuration option ENABLE_QTC, which is off by
diff --git a/TODO b/TODO
index 1b452805..a4628397 100644
--- a/TODO
+++ b/TODO
@@ -4,6 +4,7 @@ Next
 
 Before Release:
 
+* Review in order #729, #726, #747
 * Make ./performance_check usable by other people by having published
   files to use for testing.
   * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
@@ -26,6 +27,14 @@ Pending changes:
 
 Soon: Break ground on "Document-level work"
 
+Remove raw pointers from the API
+================================
+
+(For qpdf >= 12)
+
+See if we can remove raw pointers from the QPDF API. There's a
+discussion in https://github.com/qpdf/qpdf/pull/747.
+
 Fix Multiple Direct Object Owner Issue
 ======================================
 
diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh
index b965704f..1a93815b 100644
--- a/include/qpdf/BufferInputSource.hh
+++ b/include/qpdf/BufferInputSource.hh
@@ -54,26 +54,11 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource
     virtual void unreadCh(char ch);
 
   private:
-    class QPDF_DLL_PRIVATE Members
-    {
-        friend class BufferInputSource;
-
-      public:
-        QPDF_DLL
-        ~Members() = default;
-
-      private:
-        Members(bool own_memory, std::string const& description, Buffer* buf);
-        Members(Members const&) = delete;
-
-        bool own_memory;
-        std::string description;
-        Buffer* buf;
-        qpdf_offset_t cur_offset;
-        qpdf_offset_t max_offset;
-    };
-
-    std::shared_ptr<Members> m;
+    bool own_memory;
+    std::string description;
+    Buffer* buf;
+    qpdf_offset_t cur_offset;
+    qpdf_offset_t max_offset;
 };
 
 #endif // QPDF_BUFFERINPUTSOURCE_HH
diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh
index c72a1df8..b23c2767 100644
--- a/include/qpdf/ClosedFileInputSource.hh
+++ b/include/qpdf/ClosedFileInputSource.hh
@@ -73,23 +73,10 @@ class QPDF_DLL_CLASS ClosedFileInputSource: public InputSource
     QPDF_DLL_PRIVATE
     void after();
 
-    class QPDF_DLL_PRIVATE Members
-    {
-        friend class ClosedFileInputSource;
-
-      public:
-        QPDF_DLL
-        ~Members() = default;
-
-      private:
-        Members(char const* filename);
-
-        std::string filename;
-        qpdf_offset_t offset;
-        std::shared_ptr<FileInputSource> fis;
-        bool stay_open;
-    };
-    std::shared_ptr<Members> m;
+    std::string filename;
+    qpdf_offset_t offset;
+    std::shared_ptr<FileInputSource> fis;
+    bool stay_open;
 };
 
 #endif // QPDF_CLOSEDFILEINPUTSOURCE_HH
diff --git a/include/qpdf/FileInputSource.hh b/include/qpdf/FileInputSource.hh
index f1e7edf4..9e0d57fb 100644
--- a/include/qpdf/FileInputSource.hh
+++ b/include/qpdf/FileInputSource.hh
@@ -58,24 +58,9 @@ class QPDF_DLL_CLASS FileInputSource: public InputSource
     FileInputSource(FileInputSource const&) = delete;
     FileInputSource& operator=(FileInputSource const&) = delete;
 
-    class QPDF_DLL_PRIVATE Members
-    {
-        friend class FileInputSource;
-
-      public:
-        QPDF_DLL
-        ~Members();
-
-      private:
-        Members(bool close_file);
-        Members(Members const&) = delete;
-
-        bool close_file;
-        std::string filename;
-        FILE* file;
-    };
-
-    std::shared_ptr<Members> m;
+    bool close_file;
+    std::string filename;
+    FILE* file;
 };
 
 #endif // QPDF_FILEINPUTSOURCE_HH
diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh
index 9feb8ec3..e9d99cdb 100644
--- a/include/qpdf/InputSource.hh
+++ b/include/qpdf/InputSource.hh
@@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource
     // efficient.
     virtual void unreadCh(char ch) = 0;
 
+    // The following methods are for use by QPDFTokenizer
+    inline qpdf_offset_t fastTell();
+    inline bool fastRead(char&);
+    inline void fastUnread(bool);
+    inline void loadBuffer();
+
   protected:
     qpdf_offset_t last_offset;
 
@@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource
     };
 
     std::shared_ptr<Members> m;
+
+    // State for fast... methods
+    static const qpdf_offset_t buf_size = 128;
+    char buffer[buf_size];
+    qpdf_offset_t buf_len = 0;
+    qpdf_offset_t buf_idx = 0;
+    qpdf_offset_t buf_start = 0;
 };
 
+inline void
+InputSource::loadBuffer()
+{
+    this->buf_idx = 0;
+    this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size));
+    // NB read sets last_offset
+    this->buf_start = this->last_offset;
+}
+
+inline qpdf_offset_t
+InputSource::fastTell()
+{
+    if (this->buf_len == 0) {
+        loadBuffer();
+    } else {
+        auto curr = tell();
+        if (curr < this->buf_start ||
+            curr >= (this->buf_start + this->buf_len)) {
+            loadBuffer();
+        } else {
+            this->last_offset = curr;
+            this->buf_idx = curr - this->buf_start;
+        }
+    }
+    return this->last_offset;
+}
+
+inline bool
+InputSource::fastRead(char& ch)
+{
+    // Before calling fastRead, fastTell must be called to prepare the buffer.
+    // Once reading is complete, fastUnread must be called to set the correct
+    // file position.
+    if (this->buf_idx < this->buf_len) {
+        ch = this->buffer[this->buf_idx];
+        ++(this->buf_idx);
+        ++(this->last_offset);
+        return true;
+
+    } else if (this->buf_len == 0) {
+        return false;
+    } else {
+        seek(this->buf_start + this->buf_len, SEEK_SET);
+        fastTell();
+        return fastRead(ch);
+    }
+}
+
+inline void
+InputSource::fastUnread(bool back)
+{
+    this->last_offset -= back ? 1 : 0;
+    seek(this->last_offset, SEEK_SET);
+}
+
 #endif // QPDF_INPUTSOURCE_HH
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 2187f21e..33b2e710 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -193,60 +193,82 @@ class QPDFTokenizer
     QPDFTokenizer(QPDFTokenizer const&) = delete;
     QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
 
-    void resolveLiteral();
     bool isSpace(char);
     bool isDelimiter(char);
     void findEI(std::shared_ptr<InputSource> input);
 
     enum state_e {
         st_top,
+        st_in_hexstring,
+        st_in_string,
+        st_in_hexstring_2nd,
+        st_name,
+        st_literal,
         st_in_space,
         st_in_comment,
-        st_in_string,
+        st_string_escape,
+        st_char_code,
+        st_string_after_cr,
         st_lt,
         st_gt,
-        st_literal,
-        st_in_hexstring,
         st_inline_image,
+        st_sign,
+        st_number,
+        st_real,
+        st_decimal,
+        st_name_hex1,
+        st_name_hex2,
+        st_before_token,
         st_token_ready
     };
 
-    class Members
-    {
-        friend class QPDFTokenizer;
-
-      public:
-        QPDF_DLL
-        ~Members() = default;
+    void handleCharacter(char);
+    void inBeforeToken(char);
+    void inTop(char);
+    void inSpace(char);
+    void inComment(char);
+    void inString(char);
+    void inName(char);
+    void inLt(char);
+    void inGt(char);
+    void inStringAfterCR(char);
+    void inStringEscape(char);
+    void inLiteral(char);
+    void inCharCode(char);
+    void inHexstring(char);
+    void inHexstring2nd(char);
+    void inInlineImage(char);
+    void inTokenReady(char);
+    void inNameHex1(char);
+    void inNameHex2(char);
+    void inSign(char);
+    void inDecimal(char);
+    void inNumber(char);
+    void inReal(char);
+    void reset();
 
-      private:
-        Members();
-        Members(Members const&) = delete;
-        void reset();
+    // Lexer state
+    state_e state;
 
-        // Lexer state
-        state_e state;
+    bool allow_eof;
+    bool include_ignorable;
 
-        bool allow_eof;
-        bool include_ignorable;
+    // Current token accumulation
+    token_type_e type;
+    std::string val;
+    std::string raw_val;
+    std::string error_message;
+    bool before_token;
+    bool in_token;
+    char char_to_unread;
+    size_t inline_image_bytes;
+    bool bad;
 
-        // Current token accumulation
-        token_type_e type;
-        std::string val;
-        std::string raw_val;
-        std::string error_message;
-        bool unread_char;
-        char char_to_unread;
-        size_t inline_image_bytes;
-
-        // State for strings
-        int string_depth;
-        bool string_ignoring_newline;
-        char bs_num_register[4];
-        bool last_char_was_bs;
-        bool last_char_was_cr;
-    };
-    std::shared_ptr<Members> m;
+    // State for strings
+    int string_depth;
+    int char_code;
+    char hex_char;
+    int digit_count;
 };
 
 #endif // QPDFTOKENIZER_HH
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index 32aeae1f..41b89da4 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -25,6 +25,7 @@
 #include <qpdf/DLL.h>
 #include <qpdf/PointerHolder.hh>
 #include <qpdf/Types.h>
+#include <cstring>
 #include <functional>
 #include <list>
 #include <memory>
@@ -489,16 +490,16 @@ namespace QUtil
     // classes without using ctype, which we avoid because of locale
     // considerations.
     QPDF_DLL
-    bool is_hex_digit(char);
+    inline bool is_hex_digit(char);
 
     QPDF_DLL
-    bool is_space(char);
+    inline bool is_space(char);
 
     QPDF_DLL
-    bool is_digit(char);
+    inline bool is_digit(char);
 
     QPDF_DLL
-    bool is_number(char const*);
+    inline bool is_number(char const*);
 
     // This method parses the numeric range syntax used by the qpdf
     // command-line tool. May throw std::runtime_error.
@@ -526,4 +527,50 @@ namespace QUtil
 #endif // QPDF_NO_WCHAR_T
 };     // namespace QUtil
 
+inline bool
+QUtil::is_hex_digit(char ch)
+{
+    return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr));
+}
+
+inline bool
+QUtil::is_space(char ch)
+{
+    return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr));
+}
+
+inline bool
+QUtil::is_digit(char ch)
+{
+    return ((ch >= '0') && (ch <= '9'));
+}
+
+inline bool
+QUtil::is_number(char const* p)
+{
+    // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$
+    if (!*p) {
+        return false;
+    }
+    if ((*p == '-') || (*p == '+')) {
+        ++p;
+    }
+    bool found_dot = false;
+    bool found_digit = false;
+    for (; *p; ++p) {
+        if (*p == '.') {
+            if (found_dot) {
+                // only one dot
+                return false;
+            }
+            found_dot = true;
+        } else if (QUtil::is_digit(*p)) {
+            found_digit = true;
+        } else {
+            return false;
+        }
+    }
+    return found_digit;
+}
+
 #endif // QUTIL_HH
diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc
index 5b59c801..6402f639 100644
--- a/libqpdf/BufferInputSource.cc
+++ b/libqpdf/BufferInputSource.cc
@@ -7,8 +7,8 @@
 #include <stdexcept>
 #include <string.h>
 
-BufferInputSource::Members::Members(
-    bool own_memory, std::string const& description, Buffer* buf) :
+BufferInputSource::BufferInputSource(
+    std::string const& description, Buffer* buf, bool own_memory) :
     own_memory(own_memory),
     description(description),
     buf(buf),
@@ -18,60 +18,54 @@ BufferInputSource::Members::Members(
 }
 
 BufferInputSource::BufferInputSource(
-    std::string const& description, Buffer* buf, bool own_memory) :
-    m(new Members(own_memory, description, buf))
-{
-}
-
-BufferInputSource::BufferInputSource(
     std::string const& description, std::string const& contents) :
-    m(new Members(true, description, nullptr))
+    own_memory(true),
+    description(description),
+    buf(new Buffer(contents.length())),
+    cur_offset(0),
+    max_offset(QIntC::to_offset(buf->getSize()))
 {
-    this->m->buf = new Buffer(contents.length());
-    this->m->max_offset = QIntC::to_offset(this->m->buf->getSize());
-    unsigned char* bp = this->m->buf->getBuffer();
-    memcpy(bp, contents.c_str(), contents.length());
+    memcpy(buf->getBuffer(), contents.c_str(), contents.length());
 }
 
 BufferInputSource::~BufferInputSource()
 {
-    if (this->m->own_memory) {
-        delete this->m->buf;
+    if (this->own_memory) {
+        delete this->buf;
     }
 }
 
 qpdf_offset_t
 BufferInputSource::findAndSkipNextEOL()
 {
-    if (this->m->cur_offset < 0) {
+    if (this->cur_offset < 0) {
         throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0");
     }
-    qpdf_offset_t end_pos = this->m->max_offset;
-    if (this->m->cur_offset >= end_pos) {
+    qpdf_offset_t end_pos = this->max_offset;
+    if (this->cur_offset >= end_pos) {
         this->last_offset = end_pos;
-        this->m->cur_offset = end_pos;
+        this->cur_offset = end_pos;
         return end_pos;
     }
 
     qpdf_offset_t result = 0;
-    unsigned char const* buffer = this->m->buf->getBuffer();
+    unsigned char const* buffer = this->buf->getBuffer();
     unsigned char const* end = buffer + end_pos;
-    unsigned char const* p = buffer + this->m->cur_offset;
+    unsigned char const* p = buffer + this->cur_offset;
 
     while ((p < end) && !((*p == '\r') || (*p == '\n'))) {
         ++p;
     }
     if (p < end) {
         result = p - buffer;
-        this->m->cur_offset = result + 1;
+        this->cur_offset = result + 1;
         ++p;
-        while ((this->m->cur_offset < end_pos) &&
-               ((*p == '\r') || (*p == '\n'))) {
+        while ((this->cur_offset < end_pos) && ((*p == '\r') || (*p == '\n'))) {
             ++p;
-            ++this->m->cur_offset;
+            ++this->cur_offset;
         }
     } else {
-        this->m->cur_offset = end_pos;
+        this->cur_offset = end_pos;
         result = end_pos;
     }
     return result;
@@ -80,13 +74,13 @@ BufferInputSource::findAndSkipNextEOL()
 std::string const&
 BufferInputSource::getName() const
 {
-    return this->m->description;
+    return this->description;
 }
 
 qpdf_offset_t
 BufferInputSource::tell()
 {
-    return this->m->cur_offset;
+    return this->cur_offset;
 }
 
 void
@@ -94,17 +88,17 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence)
 {
     switch (whence) {
     case SEEK_SET:
-        this->m->cur_offset = offset;
+        this->cur_offset = offset;
         break;
 
     case SEEK_END:
-        QIntC::range_check(this->m->max_offset, offset);
-        this->m->cur_offset = this->m->max_offset + offset;
+        QIntC::range_check(this->max_offset, offset);
+        this->cur_offset = this->max_offset + offset;
         break;
 
     case SEEK_CUR:
-        QIntC::range_check(this->m->cur_offset, offset);
-        this->m->cur_offset += offset;
+        QIntC::range_check(this->cur_offset, offset);
+        this->cur_offset += offset;
         break;
 
     default:
@@ -113,42 +107,41 @@ BufferInputSource::seek(qpdf_offset_t offset, int whence)
         break;
     }
 
-    if (this->m->cur_offset < 0) {
+    if (this->cur_offset < 0) {
         throw std::runtime_error(
-            this->m->description + ": seek before beginning of buffer");
+            this->description + ": seek before beginning of buffer");
     }
 }
 
 void
 BufferInputSource::rewind()
 {
-    this->m->cur_offset = 0;
+    this->cur_offset = 0;
 }
 
 size_t
 BufferInputSource::read(char* buffer, size_t length)
 {
-    if (this->m->cur_offset < 0) {
+    if (this->cur_offset < 0) {
         throw std::logic_error("INTERNAL ERROR: BufferInputSource offset < 0");
     }
-    qpdf_offset_t end_pos = this->m->max_offset;
-    if (this->m->cur_offset >= end_pos) {
+    qpdf_offset_t end_pos = this->max_offset;
+    if (this->cur_offset >= end_pos) {
         this->last_offset = end_pos;
         return 0;
     }
 
-    this->last_offset = this->m->cur_offset;
-    size_t len =
-        std::min(QIntC::to_size(end_pos - this->m->cur_offset), length);
-    memcpy(buffer, this->m->buf->getBuffer() + this->m->cur_offset, len);
-    this->m->cur_offset += QIntC::to_offset(len);
+    this->last_offset = this->cur_offset;
+    size_t len = std::min(QIntC::to_size(end_pos - this->cur_offset), length);
+    memcpy(buffer, this->buf->getBuffer() + this->cur_offset, len);
+    this->cur_offset += QIntC::to_offset(len);
     return len;
 }
 
 void
 BufferInputSource::unreadCh(char ch)
 {
-    if (this->m->cur_offset > 0) {
-        --this->m->cur_offset;
+    if (this->cur_offset > 0) {
+        --this->cur_offset;
     }
 }
diff --git a/libqpdf/ClosedFileInputSource.cc b/libqpdf/ClosedFileInputSource.cc
index ec977c69..06ebb156 100644
--- a/libqpdf/ClosedFileInputSource.cc
+++ b/libqpdf/ClosedFileInputSource.cc
@@ -2,18 +2,13 @@
 
 #include <qpdf/FileInputSource.hh>
 
-ClosedFileInputSource::Members::Members(char const* filename) :
+ClosedFileInputSource::ClosedFileInputSource(char const* filename) :
     filename(filename),
     offset(0),
     stay_open(false)
 {
 }
 
-ClosedFileInputSource::ClosedFileInputSource(char const* filename) :
-    m(new Members(filename))
-{
-}
-
 ClosedFileInputSource::~ClosedFileInputSource()
 {
     // Must be explicit and not inline -- see QPDF_DLL_CLASS in
@@ -23,30 +18,29 @@ ClosedFileInputSource::~ClosedFileInputSource()
 void
 ClosedFileInputSource::before()
 {
-    if (nullptr == this->m->fis) {
-        this->m->fis =
-            std::make_shared<FileInputSource>(this->m->filename.c_str());
-        this->m->fis->seek(this->m->offset, SEEK_SET);
-        this->m->fis->setLastOffset(this->last_offset);
+    if (nullptr == this->fis) {
+        this->fis = std::make_shared<FileInputSource>(this->filename.c_str());
+        this->fis->seek(this->offset, SEEK_SET);
+        this->fis->setLastOffset(this->last_offset);
     }
 }
 
 void
 ClosedFileInputSource::after()
 {
-    this->last_offset = this->m->fis->getLastOffset();
-    this->m->offset = this->m->fis->tell();
-    if (this->m->stay_open) {
+    this->last_offset = this->fis->getLastOffset();
+    this->offset = this->fis->tell();
+    if (this->stay_open) {
         return;
     }
-    this->m->fis = nullptr;
+    this->fis = nullptr;
 }
 
 qpdf_offset_t
 ClosedFileInputSource::findAndSkipNextEOL()
 {
     before();
-    qpdf_offset_t r = this->m->fis->findAndSkipNextEOL();
+    qpdf_offset_t r = this->fis->findAndSkipNextEOL();
     after();
     return r;
 }
@@ -54,14 +48,14 @@ ClosedFileInputSource::findAndSkipNextEOL()
 std::string const&
 ClosedFileInputSource::getName() const
 {
-    return this->m->filename;
+    return this->filename;
 }
 
 qpdf_offset_t
 ClosedFileInputSource::tell()
 {
     before();
-    qpdf_offset_t r = this->m->fis->tell();
+    qpdf_offset_t r = this->fis->tell();
     after();
     return r;
 }
@@ -70,16 +64,16 @@ void
 ClosedFileInputSource::seek(qpdf_offset_t offset, int whence)
 {
     before();
-    this->m->fis->seek(offset, whence);
+    this->fis->seek(offset, whence);
     after();
 }
 
 void
 ClosedFileInputSource::rewind()
 {
-    this->m->offset = 0;
-    if (this->m->fis.get()) {
-        this->m->fis->rewind();
+    this->offset = 0;
+    if (this->fis.get()) {
+        this->fis->rewind();
     }
 }
 
@@ -87,7 +81,7 @@ size_t
 ClosedFileInputSource::read(char* buffer, size_t length)
 {
     before();
-    size_t r = this->m->fis->read(buffer, length);
+    size_t r = this->fis->read(buffer, length);
     after();
     return r;
 }
@@ -96,7 +90,7 @@ void
 ClosedFileInputSource::unreadCh(char ch)
 {
     before();
-    this->m->fis->unreadCh(ch);
+    this->fis->unreadCh(ch);
     // Don't call after -- the file has to stay open after this
     // operation.
 }
@@ -104,8 +98,8 @@ ClosedFileInputSource::unreadCh(char ch)
 void
 ClosedFileInputSource::stayOpen(bool val)
 {
-    this->m->stay_open = val;
-    if ((!val) && this->m->fis.get()) {
+    this->stay_open = val;
+    if ((!val) && this->fis.get()) {
         after();
     }
 }
diff --git a/libqpdf/FileInputSource.cc b/libqpdf/FileInputSource.cc
index ab88d302..2b1ee1ab 100644
--- a/libqpdf/FileInputSource.cc
+++ b/libqpdf/FileInputSource.cc
@@ -5,60 +5,52 @@
 #include <algorithm>
 #include <string.h>
 
-FileInputSource::Members::Members(bool close_file) :
-    close_file(close_file),
-    file(nullptr)
-{
-}
-
-FileInputSource::Members::~Members()
-{
-    if (this->file && this->close_file) {
-        fclose(this->file);
-    }
-}
-
 FileInputSource::FileInputSource() :
-    m(new Members(false))
+    close_file(false),
+    file(nullptr)
 {
 }
 
 FileInputSource::FileInputSource(char const* filename) :
-    m(new Members(false))
+    close_file(true),
+    filename(filename),
+    file(QUtil::safe_fopen(filename, "rb"))
 {
-    setFilename(filename);
 }
 
 FileInputSource::FileInputSource(
     char const* description, FILE* filep, bool close_file) :
-    m(new Members(false))
+    close_file(close_file),
+    filename(description),
+    file(filep)
+{
+}
+
+FileInputSource::~FileInputSource()
 {
-    setFile(description, filep, close_file);
+    // Must be explicit and not inline -- see QPDF_DLL_CLASS in
+    // README-maintainer
+    if (this->file && this->close_file) {
+        fclose(this->file);
+    }
 }
 
 void
 FileInputSource::setFilename(char const* filename)
 {
-    this->m = std::shared_ptr<Members>(new Members(true));
-    this->m->filename = filename;
-    this->m->file = QUtil::safe_fopen(filename, "rb");
+    this->close_file = true;
+    this->filename = filename;
+    this->file = QUtil::safe_fopen(filename, "rb");
 }
 
 void
 FileInputSource::setFile(char const* description, FILE* filep, bool close_file)
 {
-    this->m = std::shared_ptr<Members>(new Members(close_file));
-    this->m->filename = description;
-    this->m->file = filep;
+    this->filename = description;
+    this->file = filep;
     this->seek(0, SEEK_SET);
 }
 
-FileInputSource::~FileInputSource()
-{
-    // Must be explicit and not inline -- see QPDF_DLL_CLASS in
-    // README-maintainer
-}
-
 qpdf_offset_t
 FileInputSource::findAndSkipNextEOL()
 {
@@ -66,7 +58,7 @@ FileInputSource::findAndSkipNextEOL()
     bool done = false;
     char buf[10240];
     while (!done) {
-        qpdf_offset_t cur_offset = QUtil::tell(this->m->file);
+        qpdf_offset_t cur_offset = QUtil::tell(this->file);
         size_t len = this->read(buf, sizeof(buf));
         if (len == 0) {
             done = true;
@@ -98,41 +90,42 @@ FileInputSource::findAndSkipNextEOL()
 std::string const&
 FileInputSource::getName() const
 {
-    return this->m->filename;
+    return this->filename;
 }
 
 qpdf_offset_t
 FileInputSource::tell()
 {
-    return QUtil::tell(this->m->file);
+    return QUtil::tell(this->file);
 }
 
 void
 FileInputSource::seek(qpdf_offset_t offset, int whence)
 {
-    QUtil::os_wrapper(
-        (std::string("seek to ") + this->m->filename + ", offset " +
-         QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) +
-         ")"),
-        QUtil::seek(this->m->file, offset, whence));
+    if (QUtil::seek(this->file, offset, whence) == -1) {
+        QUtil::throw_system_error(
+            std::string("seek to ") + this->filename + ", offset " +
+            QUtil::int_to_string(offset) + " (" + QUtil::int_to_string(whence) +
+            ")");
+    }
 }
 
 void
 FileInputSource::rewind()
 {
-    ::rewind(this->m->file);
+    ::rewind(this->file);
 }
 
 size_t
 FileInputSource::read(char* buffer, size_t length)
 {
-    this->last_offset = this->tell();
-    size_t len = fread(buffer, 1, length, this->m->file);
+    this->last_offset = QUtil::tell(this->file);
+    size_t len = fread(buffer, 1, length, this->file);
     if (len == 0) {
-        if (ferror(this->m->file)) {
+        if (ferror(this->file)) {
             throw QPDFExc(
                 qpdf_e_system,
-                this->m->filename,
+                this->filename,
                 "",
                 this->last_offset,
                 (std::string("read ") + QUtil::uint_to_string(length) +
@@ -148,7 +141,7 @@ FileInputSource::read(char* buffer, size_t length)
 void
 FileInputSource::unreadCh(char ch)
 {
-    QUtil::os_wrapper(
-        this->m->filename + ": unread character",
-        ungetc(static_cast<unsigned char>(ch), this->m->file));
+    if (ungetc(static_cast<unsigned char>(ch), this->file) == -1) {
+        QUtil::throw_system_error(this->filename + ": unread character");
+    }
 }
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 1726e1b9..cd8f932d 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -73,28 +73,20 @@ QPDFWordTokenFinder::check()
     return true;
 }
 
-QPDFTokenizer::Members::Members() :
-    allow_eof(false),
-    include_ignorable(false)
-{
-    reset();
-}
-
 void
-QPDFTokenizer::Members::reset()
+QPDFTokenizer::reset()
 {
-    state = st_top;
+    state = st_before_token;
     type = tt_bad;
-    val = "";
-    raw_val = "";
+    val.clear();
+    raw_val.clear();
     error_message = "";
-    unread_char = false;
+    before_token = true;
+    in_token = false;
     char_to_unread = '\0';
     inline_image_bytes = 0;
     string_depth = 0;
-    string_ignoring_newline = false;
-    last_char_was_bs = false;
-    last_char_was_cr = false;
+    bad = false;
 }
 
 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
@@ -110,20 +102,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
 }
 
 QPDFTokenizer::QPDFTokenizer() :
-    m(new Members())
+    allow_eof(false),
+    include_ignorable(false)
 {
+    reset();
 }
 
 void
 QPDFTokenizer::allowEOF()
 {
-    this->m->allow_eof = true;
+    this->allow_eof = true;
 }
 
 void
 QPDFTokenizer::includeIgnorable()
 {
-    this->m->include_ignorable = true;
+    this->include_ignorable = true;
 }
 
 bool
@@ -139,376 +133,719 @@ QPDFTokenizer::isDelimiter(char ch)
 }
 
 void
-QPDFTokenizer::resolveLiteral()
-{
-    if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) {
-        this->m->type = tt_name;
-        // Deal with # in name token.  Note: '/' by itself is a
-        // valid name, so don't strip leading /.  That way we
-        // don't have to deal with the empty string as a name.
-        std::string nval = "/";
-        size_t len = this->m->val.length();
-        for (size_t i = 1; i < len; ++i) {
-            char ch = this->m->val.at(i);
-            if (ch == '#') {
-                if ((i + 2 < len) &&
-                    QUtil::is_hex_digit(this->m->val.at(i + 1)) &&
-                    QUtil::is_hex_digit(this->m->val.at(i + 2))) {
-                    char num[3];
-                    num[0] = this->m->val.at(i + 1);
-                    num[1] = this->m->val.at(i + 2);
-                    num[2] = '\0';
-                    char ch2 = static_cast<char>(strtol(num, nullptr, 16));
-                    if (ch2 == '\0') {
-                        this->m->type = tt_bad;
-                        QTC::TC("qpdf", "QPDFTokenizer null in name");
-                        this->m->error_message =
-                            "null character not allowed in name token";
-                        nval += "#00";
-                    } else {
-                        nval.append(1, ch2);
-                    }
-                    i += 2;
-                } else {
-                    QTC::TC("qpdf", "QPDFTokenizer bad name");
-                    this->m->error_message =
-                        "name with stray # will not work with PDF >= 1.2";
-                    // Use null to encode a bad # -- this is reversed
-                    // in QPDF_Name::normalizeName.
-                    nval += '\0';
-                }
-            } else {
-                nval.append(1, ch);
-            }
-        }
-        this->m->val = nval;
-    } else if (QUtil::is_number(this->m->val.c_str())) {
-        if (this->m->val.find('.') != std::string::npos) {
-            this->m->type = tt_real;
-        } else {
-            this->m->type = tt_integer;
-        }
-    } else if ((this->m->val == "true") || (this->m->val == "false")) {
-        this->m->type = tt_bool;
-    } else if (this->m->val == "null") {
-        this->m->type = tt_null;
-    } else {
-        // I don't really know what it is, so leave it as tt_word.
-        // Lots of cases ($, #, etc.) other than actual words fall
-        // into this category, but that's okay at least for now.
-        this->m->type = tt_word;
+QPDFTokenizer::presentCharacter(char ch)
+{
+    handleCharacter(ch);
+
+    if (this->in_token) {
+        this->raw_val += ch;
     }
 }
 
 void
-QPDFTokenizer::presentCharacter(char ch)
+QPDFTokenizer::handleCharacter(char ch)
 {
-    if (this->m->state == st_token_ready) {
+    // State machine is implemented such that the final character may not be
+    // handled.  This happens whenever you have to use a character from the
+    // next token to detect the end of the current token.
+
+    switch (this->state) {
+    case st_top:
+        inTop(ch);
+        return;
+
+    case st_in_space:
+        inSpace(ch);
+        return;
+
+    case st_in_comment:
+        inComment(ch);
+        return;
+
+    case st_lt:
+        inLt(ch);
+        return;
+
+    case st_gt:
+        inGt(ch);
+        return;
+
+    case st_in_string:
+        inString(ch);
+        return;
+
+    case st_name:
+        inName(ch);
+        return;
+
+    case st_number:
+        inNumber(ch);
+        return;
+
+    case st_real:
+        inReal(ch);
+        return;
+
+    case st_string_after_cr:
+        inStringAfterCR(ch);
+        return;
+
+    case st_string_escape:
+        inStringEscape(ch);
+        return;
+
+    case st_char_code:
+        inCharCode(ch);
+        return;
+
+    case st_literal:
+        inLiteral(ch);
+        return;
+
+    case st_inline_image:
+        inInlineImage(ch);
+        return;
+
+    case st_in_hexstring:
+        inHexstring(ch);
+        return;
+
+    case st_in_hexstring_2nd:
+        inHexstring2nd(ch);
+        return;
+
+    case st_name_hex1:
+        inNameHex1(ch);
+        return;
+
+    case st_name_hex2:
+        inNameHex2(ch);
+        return;
+
+    case st_sign:
+        inSign(ch);
+        return;
+
+    case st_decimal:
+        inDecimal(ch);
+        return;
+
+    case (st_before_token):
+        inBeforeToken(ch);
+        return;
+
+    case (st_token_ready):
+        inTokenReady(ch);
+        return;
+
+    default:
         throw std::logic_error(
-            "INTERNAL ERROR: QPDF tokenizer presented character "
-            "while token is waiting");
+            "INTERNAL ERROR: invalid state while reading token");
     }
+}
 
-    char orig_ch = ch;
-
-    // State machine is implemented such that some characters may be
-    // handled more than once.  This happens whenever you have to use
-    // the character that caused a state change in the new state.
+void
+QPDFTokenizer::inTokenReady(char ch)
+{
+    throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character "
+                           "while token is waiting");
+}
 
-    bool handled = true;
-    if (this->m->state == st_top) {
-        // Note: we specifically do not use ctype here.  It is
-        // locale-dependent.
-        if (isSpace(ch)) {
-            if (this->m->include_ignorable) {
-                this->m->state = st_in_space;
-                this->m->val += ch;
-            }
-        } else if (ch == '%') {
-            this->m->state = st_in_comment;
-            if (this->m->include_ignorable) {
-                this->m->val += ch;
-            }
-        } else if (ch == '(') {
-            this->m->string_depth = 1;
-            this->m->string_ignoring_newline = false;
-            memset(
-                this->m->bs_num_register,
-                '\0',
-                sizeof(this->m->bs_num_register));
-            this->m->last_char_was_bs = false;
-            this->m->last_char_was_cr = false;
-            this->m->state = st_in_string;
-        } else if (ch == '<') {
-            this->m->state = st_lt;
-        } else if (ch == '>') {
-            this->m->state = st_gt;
-        } else {
-            this->m->val += ch;
-            if (ch == ')') {
-                this->m->type = tt_bad;
-                QTC::TC("qpdf", "QPDFTokenizer bad )");
-                this->m->error_message = "unexpected )";
-                this->m->state = st_token_ready;
-            } else if (ch == '[') {
-                this->m->type = tt_array_open;
-                this->m->state = st_token_ready;
-            } else if (ch == ']') {
-                this->m->type = tt_array_close;
-                this->m->state = st_token_ready;
-            } else if (ch == '{') {
-                this->m->type = tt_brace_open;
-                this->m->state = st_token_ready;
-            } else if (ch == '}') {
-                this->m->type = tt_brace_close;
-                this->m->state = st_token_ready;
-            } else {
-                this->m->state = st_literal;
-            }
-        }
-    } else if (this->m->state == st_in_space) {
-        // We only enter this state if include_ignorable is true.
-        if (!isSpace(ch)) {
-            this->m->type = tt_space;
-            this->m->unread_char = true;
-            this->m->char_to_unread = ch;
-            this->m->state = st_token_ready;
-        } else {
-            this->m->val += ch;
-        }
-    } else if (this->m->state == st_in_comment) {
-        if ((ch == '\r') || (ch == '\n')) {
-            if (this->m->include_ignorable) {
-                this->m->type = tt_comment;
-                this->m->unread_char = true;
-                this->m->char_to_unread = ch;
-                this->m->state = st_token_ready;
-            } else {
-                this->m->state = st_top;
-            }
-        } else if (this->m->include_ignorable) {
-            this->m->val += ch;
-        }
-    } else if (this->m->state == st_lt) {
-        if (ch == '<') {
-            this->m->val = "<<";
-            this->m->type = tt_dict_open;
-            this->m->state = st_token_ready;
-        } else {
-            handled = false;
-            this->m->state = st_in_hexstring;
-        }
-    } else if (this->m->state == st_gt) {
-        if (ch == '>') {
-            this->m->val = ">>";
-            this->m->type = tt_dict_close;
-            this->m->state = st_token_ready;
-        } else {
-            this->m->val = ">";
-            this->m->type = tt_bad;
-            QTC::TC("qpdf", "QPDFTokenizer bad >");
-            this->m->error_message = "unexpected >";
-            this->m->unread_char = true;
-            this->m->char_to_unread = ch;
-            this->m->state = st_token_ready;
+void
+QPDFTokenizer::inBeforeToken(char ch)
+{
+    // Note: we specifically do not use ctype here.  It is
+    // locale-dependent.
+    if (isSpace(ch)) {
+        this->before_token = !this->include_ignorable;
+        this->in_token = this->include_ignorable;
+        if (this->include_ignorable) {
+            this->state = st_in_space;
+            this->val += ch;
         }
-    } else if (this->m->state == st_in_string) {
-        if (this->m->string_ignoring_newline && (ch != '\n')) {
-            this->m->string_ignoring_newline = false;
+    } else if (ch == '%') {
+        this->before_token = !this->include_ignorable;
+        this->in_token = this->include_ignorable;
+        this->state = st_in_comment;
+        if (this->include_ignorable) {
+            this->val += ch;
         }
+    } else {
+        this->before_token = false;
+        this->in_token = true;
+        inTop(ch);
+    }
+}
 
-        size_t bs_num_count = strlen(this->m->bs_num_register);
-        bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
-        if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {
-            // We've accumulated \ddd.  PDF Spec says to ignore
-            // high-order overflow.
-            this->m->val +=
-                static_cast<char>(strtol(this->m->bs_num_register, nullptr, 8));
-            memset(
-                this->m->bs_num_register,
-                '\0',
-                sizeof(this->m->bs_num_register));
-            bs_num_count = 0;
-        }
+void
+QPDFTokenizer::inTop(char ch)
+{
+    switch (ch) {
+    case '(':
+        this->string_depth = 1;
+        this->state = st_in_string;
+        return;
 
-        if (this->m->string_ignoring_newline && (ch == '\n')) {
-            // ignore
-            this->m->string_ignoring_newline = false;
-        } else if (
-            ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) {
-            this->m->bs_num_register[bs_num_count++] = ch;
-        } else if (this->m->last_char_was_bs) {
-            switch (ch) {
-            case 'n':
-                this->m->val += '\n';
-                break;
+    case '<':
+        this->state = st_lt;
+        return;
 
-            case 'r':
-                this->m->val += '\r';
-                break;
+    case '>':
+        this->state = st_gt;
+        return;
 
-            case 't':
-                this->m->val += '\t';
-                break;
+    case (')'):
+        this->type = tt_bad;
+        QTC::TC("qpdf", "QPDFTokenizer bad )");
+        this->error_message = "unexpected )";
+        this->val += ch;
+        this->state = st_token_ready;
+        return;
 
-            case 'b':
-                this->m->val += '\b';
-                break;
+    case '[':
+        this->type = tt_array_open;
+        this->state = st_token_ready;
+        this->val += ch;
+        return;
 
-            case 'f':
-                this->m->val += '\f';
-                break;
+    case ']':
+        this->type = tt_array_close;
+        this->val += ch;
+        this->state = st_token_ready;
+        return;
 
-            case '\n':
-                break;
+    case '{':
+        this->type = tt_brace_open;
+        this->state = st_token_ready;
+        this->val += ch;
+        return;
 
-            case '\r':
-                this->m->string_ignoring_newline = true;
-                break;
+    case '}':
+        this->type = tt_brace_close;
+        this->state = st_token_ready;
+        this->val += ch;
+        return;
 
-            default:
-                // PDF spec says backslash is ignored before anything else
-                this->m->val += ch;
-                break;
-            }
-        } else if (ch == '\\') {
-            // last_char_was_bs is set/cleared below as appropriate
-            if (bs_num_count) {
-                throw std::logic_error(
-                    "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
-                    "when ch == '\\'");
-            }
-        } else if (ch == '(') {
-            this->m->val += ch;
-            ++this->m->string_depth;
-        } else if ((ch == ')') && (--this->m->string_depth == 0)) {
-            this->m->type = tt_string;
-            this->m->state = st_token_ready;
-        } else if (ch == '\r') {
-            // CR by itself is converted to LF
-            this->m->val += '\n';
-        } else if (ch == '\n') {
-            // CR LF is converted to LF
-            if (!this->m->last_char_was_cr) {
-                this->m->val += ch;
-            }
-        } else {
-            this->m->val += ch;
-        }
+    case '/':
+        this->state = st_name;
+        this->val += ch;
+        return;
 
-        this->m->last_char_was_cr =
-            ((!this->m->string_ignoring_newline) && (ch == '\r'));
-        this->m->last_char_was_bs =
-            ((!this->m->last_char_was_bs) && (ch == '\\'));
-    } else if (this->m->state == st_literal) {
-        if (isDelimiter(ch)) {
-            // A C-locale whitespace character or delimiter terminates
-            // token.  It is important to unread the whitespace
-            // character even though it is ignored since it may be the
-            // newline after a stream keyword.  Removing it here could
-            // make the stream-reading code break on some files,
-            // though not on any files in the test suite as of this
-            // writing.
-
-            this->m->type = tt_word;
-            this->m->unread_char = true;
-            this->m->char_to_unread = ch;
-            this->m->state = st_token_ready;
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+        this->state = st_number;
+        this->val += ch;
+        return;
+
+    case '+':
+    case '-':
+        this->state = st_sign;
+        this->val += ch;
+        return;
+
+    case '.':
+        this->state = st_decimal;
+        this->val += ch;
+        return;
+
+    default:
+        this->state = st_literal;
+        this->val += ch;
+        return;
+    }
+}
+
+void
+QPDFTokenizer::inSpace(char ch)
+{
+    // We only enter this state if include_ignorable is true.
+    if (!isSpace(ch)) {
+        this->type = tt_space;
+        this->in_token = false;
+        this->char_to_unread = ch;
+        this->state = st_token_ready;
+        return;
+    } else {
+        this->val += ch;
+        return;
+    }
+}
+
+void
+QPDFTokenizer::inComment(char ch)
+{
+    if ((ch == '\r') || (ch == '\n')) {
+        if (this->include_ignorable) {
+            this->type = tt_comment;
+            this->in_token = false;
+            this->char_to_unread = ch;
+            this->state = st_token_ready;
         } else {
-            this->m->val += ch;
+            this->state = st_before_token;
         }
-    } else if (this->m->state == st_inline_image) {
-        this->m->val += ch;
-        size_t len = this->m->val.length();
-        if (len == this->m->inline_image_bytes) {
-            QTC::TC("qpdf", "QPDFTokenizer found EI by byte count");
-            this->m->type = tt_inline_image;
-            this->m->inline_image_bytes = 0;
-            this->m->state = st_token_ready;
+    } else if (this->include_ignorable) {
+        this->val += ch;
+    }
+}
+
+void
+QPDFTokenizer::inString(char ch)
+{
+    switch (ch) {
+    case '\\':
+        this->state = st_string_escape;
+        return;
+
+    case '(':
+        this->val += ch;
+        ++this->string_depth;
+        return;
+
+    case ')':
+        if (--this->string_depth == 0) {
+            this->type = tt_string;
+            this->state = st_token_ready;
+            return;
         }
+
+        this->val += ch;
+        return;
+
+    case '\r':
+        // CR by itself is converted to LF
+        this->val += '\n';
+        this->state = st_string_after_cr;
+        return;
+
+    case '\n':
+        this->val += ch;
+        return;
+
+    default:
+        this->val += ch;
+        return;
+    }
+}
+
+void
+QPDFTokenizer::inName(char ch)
+{
+    if (isDelimiter(ch)) {
+        // A C-locale whitespace character or delimiter terminates
+        // token.  It is important to unread the whitespace
+        // character even though it is ignored since it may be the
+        // newline after a stream keyword.  Removing it here could
+        // make the stream-reading code break on some files,
+        // though not on any files in the test suite as of this
+        // writing.
+
+        this->type = this->bad ? tt_bad : tt_name;
+        this->in_token = false;
+        this->char_to_unread = ch;
+        this->state = st_token_ready;
+    } else if (ch == '#') {
+        this->char_code = 0;
+        this->state = st_name_hex1;
     } else {
-        handled = false;
-    }
-
-    if (handled) {
-        // okay
-    } else if (this->m->state == st_in_hexstring) {
-        if (ch == '>') {
-            this->m->type = tt_string;
-            this->m->state = st_token_ready;
-            if (this->m->val.length() % 2) {
-                // PDF spec says odd hexstrings have implicit
-                // trailing 0.
-                this->m->val += '0';
-            }
-            char num[3];
-            num[2] = '\0';
-            std::string nval;
-            for (unsigned int i = 0; i < this->m->val.length(); i += 2) {
-                num[0] = this->m->val.at(i);
-                num[1] = this->m->val.at(i + 1);
-                char nch = static_cast<char>(strtol(num, nullptr, 16));
-                nval += nch;
-            }
-            this->m->val = nval;
-        } else if (QUtil::is_hex_digit(ch)) {
-            this->m->val += ch;
-        } else if (isSpace(ch)) {
-            // ignore
-        } else {
-            this->m->type = tt_bad;
-            QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
-            this->m->error_message =
-                std::string("invalid character (") + ch + ") in hexstring";
-            this->m->state = st_token_ready;
-        }
+        this->val += ch;
+    }
+}
+
+void
+QPDFTokenizer::inNameHex1(char ch)
+{
+    this->hex_char = ch;
+
+    if ('0' <= ch && ch <= '9') {
+        this->char_code = 16 * (int(ch) - int('0'));
+        this->state = st_name_hex2;
+
+    } else if ('A' <= ch && ch <= 'F') {
+        this->char_code = 16 * (10 + int(ch) - int('A'));
+        this->state = st_name_hex2;
+
+    } else if ('a' <= ch && ch <= 'f') {
+        this->char_code = 16 * (10 + int(ch) - int('a'));
+        this->state = st_name_hex2;
+
     } else {
-        throw std::logic_error(
-            "INTERNAL ERROR: invalid state while reading token");
+        QTC::TC("qpdf", "QPDFTokenizer bad name 1");
+        this->error_message = "name with stray # will not work with PDF >= 1.2";
+        // Use null to encode a bad # -- this is reversed
+        // in QPDF_Name::normalizeName.
+        this->val += '\0';
+        this->state = st_name;
+        inName(ch);
+    }
+}
+
+void
+QPDFTokenizer::inNameHex2(char ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        this->char_code += int(ch) - int('0');
+
+    } else if ('A' <= ch && ch <= 'F') {
+        this->char_code += 10 + int(ch) - int('A');
+
+    } else if ('a' <= ch && ch <= 'f') {
+        this->char_code += 10 + int(ch) - int('a');
+
+    } else {
+        QTC::TC("qpdf", "QPDFTokenizer bad name 2");
+        this->error_message = "name with stray # will not work with PDF >= 1.2";
+        // Use null to encode a bad # -- this is reversed
+        // in QPDF_Name::normalizeName.
+        this->val += '\0';
+        this->val += this->hex_char;
+        this->state = st_name;
+        inName(ch);
+        return;
+    }
+    if (this->char_code == 0) {
+        QTC::TC("qpdf", "QPDFTokenizer null in name");
+        this->error_message = "null character not allowed in name token";
+        this->val += "#00";
+        this->state = st_name;
+        this->bad = true;
+    } else {
+        this->val += char(this->char_code);
+        this->state = st_name;
+    }
+}
+
+void
+QPDFTokenizer::inSign(char ch)
+{
+    if (QUtil::is_digit(ch)) {
+        this->state = st_number;
+        this->val += ch;
+    } else if (ch == '.') {
+        this->state = st_decimal;
+        this->val += ch;
+    } else {
+        this->state = st_literal;
+        inLiteral(ch);
     }
+}
 
-    if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) {
-        resolveLiteral();
+void
+QPDFTokenizer::inDecimal(char ch)
+{
+    if (QUtil::is_digit(ch)) {
+        this->state = st_real;
+        this->val += ch;
+    } else {
+        this->state = st_literal;
+        inLiteral(ch);
     }
+}
+
+void
+QPDFTokenizer::inNumber(char ch)
+{
+    if (QUtil::is_digit(ch)) {
+        this->val += ch;
+    } else if (ch == '.') {
+        this->state = st_real;
+        this->val += ch;
+    } else if (isDelimiter(ch)) {
+        this->type = tt_integer;
+        this->state = st_token_ready;
+        this->in_token = false;
+        this->char_to_unread = ch;
+    } else {
+        this->state = st_literal;
+        this->val += ch;
+    }
+}
+
+void
+QPDFTokenizer::inReal(char ch)
+{
+    if (QUtil::is_digit(ch)) {
+        this->val += ch;
+    } else if (isDelimiter(ch)) {
+        this->type = tt_real;
+        this->state = st_token_ready;
+        this->in_token = false;
+        this->char_to_unread = ch;
+    } else {
+        this->state = st_literal;
+        this->val += ch;
+    }
+}
+void
+QPDFTokenizer::inStringEscape(char ch)
+{
+    this->state = st_in_string;
+    switch (ch) {
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+        this->state = st_char_code;
+        this->char_code = 0;
+        this->digit_count = 0;
+        inCharCode(ch);
+        return;
+
+    case 'n':
+        this->val += '\n';
+        return;
+
+    case 'r':
+        this->val += '\r';
+        return;
+
+    case 't':
+        this->val += '\t';
+        return;
+
+    case 'b':
+        this->val += '\b';
+        return;
 
-    if (!(betweenTokens() ||
-          ((this->m->state == st_token_ready) && this->m->unread_char))) {
-        this->m->raw_val += orig_ch;
+    case 'f':
+        this->val += '\f';
+        return;
+
+    case '\n':
+        return;
+
+    case '\r':
+        this->state = st_string_after_cr;
+        return;
+
+    default:
+        // PDF spec says backslash is ignored before anything else
+        this->val += ch;
+        return;
+    }
+}
+
+void
+QPDFTokenizer::inStringAfterCR(char ch)
+{
+    this->state = st_in_string;
+    if (ch != '\n') {
+        inString(ch);
+    }
+}
+
+void
+QPDFTokenizer::inLt(char ch)
+{
+    if (ch == '<') {
+        this->val += "<<";
+        this->type = tt_dict_open;
+        this->state = st_token_ready;
+        return;
+    }
+
+    this->state = st_in_hexstring;
+    inHexstring(ch);
+}
+
+void
+QPDFTokenizer::inGt(char ch)
+{
+    if (ch == '>') {
+        this->val += ">>";
+        this->type = tt_dict_close;
+        this->state = st_token_ready;
+    } else {
+        this->val += ">";
+        this->type = tt_bad;
+        QTC::TC("qpdf", "QPDFTokenizer bad >");
+        this->error_message = "unexpected >";
+        this->in_token = false;
+        this->char_to_unread = ch;
+        this->state = st_token_ready;
+    }
+}
+
+void
+QPDFTokenizer::inLiteral(char ch)
+{
+    if (isDelimiter(ch)) {
+        // A C-locale whitespace character or delimiter terminates
+        // token.  It is important to unread the whitespace
+        // character even though it is ignored since it may be the
+        // newline after a stream keyword.  Removing it here could
+        // make the stream-reading code break on some files,
+        // though not on any files in the test suite as of this
+        // writing.
+
+        this->in_token = false;
+        this->char_to_unread = ch;
+        this->state = st_token_ready;
+        this->type = (this->val == "true") || (this->val == "false")
+            ? tt_bool
+            : (this->val == "null" ? tt_null : tt_word);
+    } else {
+        this->val += ch;
+    }
+}
+
+void
+QPDFTokenizer::inHexstring(char ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        this->char_code = 16 * (int(ch) - int('0'));
+        this->state = st_in_hexstring_2nd;
+
+    } else if ('A' <= ch && ch <= 'F') {
+        this->char_code = 16 * (10 + int(ch) - int('A'));
+        this->state = st_in_hexstring_2nd;
+
+    } else if ('a' <= ch && ch <= 'f') {
+        this->char_code = 16 * (10 + int(ch) - int('a'));
+        this->state = st_in_hexstring_2nd;
+
+    } else if (ch == '>') {
+        this->type = tt_string;
+        this->state = st_token_ready;
+
+    } else if (isSpace(ch)) {
+        // ignore
+
+    } else {
+        this->type = tt_bad;
+        QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
+        this->error_message =
+            std::string("invalid character (") + ch + ") in hexstring";
+        this->state = st_token_ready;
+    }
+}
+
+void
+QPDFTokenizer::inHexstring2nd(char ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        this->val += char(this->char_code + int(ch) - int('0'));
+        this->state = st_in_hexstring;
+
+    } else if ('A' <= ch && ch <= 'F') {
+        this->val += char(this->char_code + 10 + int(ch) - int('A'));
+        this->state = st_in_hexstring;
+
+    } else if ('a' <= ch && ch <= 'f') {
+        this->val += char(this->char_code + 10 + int(ch) - int('a'));
+        this->state = st_in_hexstring;
+
+    } else if (ch == '>') {
+        // PDF spec says odd hexstrings have implicit trailing 0.
+        this->val += char(this->char_code);
+        this->type = tt_string;
+        this->state = st_token_ready;
+
+    } else if (isSpace(ch)) {
+        // ignore
+
+    } else {
+        this->type = tt_bad;
+        QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character");
+        this->error_message =
+            std::string("invalid character (") + ch + ") in hexstring";
+        this->state = st_token_ready;
+    }
+}
+
+void
+QPDFTokenizer::inCharCode(char ch)
+{
+    if (('0' <= ch) && (ch <= '7')) {
+        this->char_code = 8 * this->char_code + (int(ch) - int('0'));
+        if (++(this->digit_count) < 3) {
+            return;
+        }
+        // We've accumulated \ddd.  PDF Spec says to ignore
+        // high-order overflow.
+    }
+    this->val += char(this->char_code % 256);
+    this->state = st_in_string;
+    return;
+}
+
+void
+QPDFTokenizer::inInlineImage(char ch)
+{
+    this->val += ch;
+    if (this->val.length() == this->inline_image_bytes) {
+        QTC::TC("qpdf", "QPDFTokenizer found EI by byte count");
+        this->type = tt_inline_image;
+        this->inline_image_bytes = 0;
+        this->state = st_token_ready;
     }
 }
 
 void
 QPDFTokenizer::presentEOF()
 {
-    if (this->m->state == st_literal) {
+    switch (this->state) {
+    case st_name:
+    case st_name_hex1:
+    case st_name_hex2:
+    case st_number:
+    case st_real:
+    case st_sign:
+    case st_decimal:
+    case st_literal:
         QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
-        resolveLiteral();
-    } else if (
-        (this->m->include_ignorable) && (this->m->state == st_in_space)) {
-        this->m->type = tt_space;
-    } else if (
-        (this->m->include_ignorable) && (this->m->state == st_in_comment)) {
-        this->m->type = tt_comment;
-    } else if (betweenTokens()) {
-        this->m->type = tt_eof;
-    } else if (this->m->state != st_token_ready) {
+        // Push any delimiter to the state machine to finish off the final
+        // token.
+        presentCharacter('\f');
+        this->in_token = true;
+        break;
+
+    case st_top:
+    case st_before_token:
+        this->type = tt_eof;
+        break;
+
+    case st_in_space:
+        this->type = this->include_ignorable ? tt_space : tt_eof;
+        break;
+
+    case st_in_comment:
+        this->type = this->include_ignorable ? tt_comment : tt_bad;
+        break;
+
+    case st_token_ready:
+        break;
+
+    default:
         QTC::TC("qpdf", "QPDFTokenizer EOF reading token");
-        this->m->type = tt_bad;
-        this->m->error_message = "EOF while reading token";
+        this->type = tt_bad;
+        this->error_message = "EOF while reading token";
     }
-
-    this->m->state = st_token_ready;
+    this->state = st_token_ready;
 }
 
 void
 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
 {
-    if (this->m->state != st_top) {
+    if (this->state != st_before_token) {
         throw std::logic_error("QPDFTokenizer::expectInlineImage called"
                                " when tokenizer is in improper state");
     }
     findEI(input);
-    this->m->state = st_inline_image;
+    this->before_token = false;
+    this->in_token = true;
+    this->state = st_inline_image;
 }
 
 void
@@ -537,7 +874,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
         if (!input->findFirst("EI", input->tell(), 0, f)) {
             break;
         }
-        this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2);
+        this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2);
 
         QPDFTokenizer check;
         bool found_bad = false;
@@ -610,19 +947,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
 bool
 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
 {
-    bool ready = (this->m->state == st_token_ready);
-    unread_char = this->m->unread_char;
-    ch = this->m->char_to_unread;
+    bool ready = (this->state == st_token_ready);
+    unread_char = !this->in_token && !this->before_token;
+    ch = this->char_to_unread;
     if (ready) {
-        if (this->m->type == tt_bad) {
-            this->m->val = this->m->raw_val;
-        }
-        token = Token(
-            this->m->type,
-            this->m->val,
-            this->m->raw_val,
-            this->m->error_message);
-        this->m->reset();
+        token = (this->type == tt_bad)
+            ? Token(
+                  this->type, this->raw_val, this->raw_val, this->error_message)
+            : Token(this->type, this->val, this->raw_val, this->error_message);
+
+        this->reset();
     }
     return ready;
 }
@@ -630,11 +964,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
 bool
 QPDFTokenizer::betweenTokens()
 {
-    return (
-        (this->m->state == st_top) ||
-        ((!this->m->include_ignorable) &&
-         ((this->m->state == st_in_comment) ||
-          (this->m->state == st_in_space))));
+    return this->before_token;
 }
 
 QPDFTokenizer::Token
@@ -644,49 +974,46 @@ QPDFTokenizer::readToken(
     bool allow_bad,
     size_t max_len)
 {
-    qpdf_offset_t offset = input->tell();
-    Token token;
-    bool unread_char;
-    char char_to_unread;
-    bool presented_eof = false;
-    while (!getToken(token, unread_char, char_to_unread)) {
+    qpdf_offset_t offset = input->fastTell();
+
+    while (this->state != st_token_ready) {
         char ch;
-        if (input->read(&ch, 1) == 0) {
-            if (!presented_eof) {
-                presentEOF();
-                presented_eof = true;
-                if ((this->m->type == tt_eof) && (!this->m->allow_eof)) {
-                    // Nothing in the qpdf library calls readToken
-                    // without allowEOF anymore, so this case is not
-                    // exercised.
-                    this->m->type = tt_bad;
-                    this->m->error_message = "unexpected EOF";
-                    offset = input->getLastOffset();
-                }
-            } else {
-                throw std::logic_error(
-                    "getToken returned false after presenting EOF");
+        if (!input->fastRead(ch)) {
+            presentEOF();
+
+            if ((this->type == tt_eof) && (!this->allow_eof)) {
+                // Nothing in the qpdf library calls readToken
+                // without allowEOF anymore, so this case is not
+                // exercised.
+                this->type = tt_bad;
+                this->error_message = "unexpected EOF";
+                offset = input->getLastOffset();
             }
         } else {
-            presentCharacter(ch);
-            if (betweenTokens() && (input->getLastOffset() == offset)) {
+            handleCharacter(ch);
+            if (this->before_token) {
                 ++offset;
             }
-            if (max_len && (this->m->raw_val.length() >= max_len) &&
-                (this->m->state != st_token_ready)) {
+            if (this->in_token) {
+                this->raw_val += ch;
+            }
+            if (max_len && (this->raw_val.length() >= max_len) &&
+                (this->state != st_token_ready)) {
                 // terminate this token now
                 QTC::TC("qpdf", "QPDFTokenizer block long token");
-                this->m->type = tt_bad;
-                this->m->state = st_token_ready;
-                this->m->error_message =
+                this->type = tt_bad;
+                this->state = st_token_ready;
+                this->error_message =
                     "exceeded allowable length while reading token";
             }
         }
     }
 
-    if (unread_char) {
-        input->unreadCh(char_to_unread);
-    }
+    Token token;
+    bool unread_char;
+    char char_to_unread;
+    getToken(token, unread_char, char_to_unread);
+    input->fastUnread(unread_char);
 
     if (token.getType() != tt_eof) {
         input->setLastOffset(offset);
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 4e58aaf7..d565ece0 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -1207,52 +1207,6 @@ QUtil::random()
     return result;
 }
 
-bool
-QUtil::is_hex_digit(char ch)
-{
-    return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr));
-}
-
-bool
-QUtil::is_space(char ch)
-{
-    return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr));
-}
-
-bool
-QUtil::is_digit(char ch)
-{
-    return ((ch >= '0') && (ch <= '9'));
-}
-
-bool
-QUtil::is_number(char const* p)
-{
-    // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$
-    if (!*p) {
-        return false;
-    }
-    if ((*p == '-') || (*p == '+')) {
-        ++p;
-    }
-    bool found_dot = false;
-    bool found_digit = false;
-    for (; *p; ++p) {
-        if (*p == '.') {
-            if (found_dot) {
-                // only one dot
-                return false;
-            }
-            found_dot = true;
-        } else if (QUtil::is_digit(*p)) {
-            found_digit = true;
-        } else {
-            return false;
-        }
-    }
-    return found_digit;
-}
-
 void
 QUtil::read_file_into_memory(
     char const* filename, std::shared_ptr<char>& file_buf, size_t& size)
diff --git a/manual/release-notes.rst b/manual/release-notes.rst
index ab2c1d8e..01a19249 100644
--- a/manual/release-notes.rst
+++ b/manual/release-notes.rst
@@ -11,7 +11,8 @@ For a detailed list of changes, please see the file
 
     - Many performance enhancements have been added. In developer
       performance benchmarks, gains on the order of 20% have been
-      observed.
+      observed. Most of that work, including major optimization of
+      qpdf's lexical layer, was done by M. Holger.
 
   - Replacement of ``PointerHolder`` with ``std::shared_ptr``
 
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index f535b9ee..9e106902 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -66,8 +66,10 @@ QPDF can't find xref 0
 QPDFTokenizer bad ) 0
 QPDFTokenizer bad > 0
 QPDFTokenizer bad hexstring character 0
+QPDFTokenizer bad hexstring 2nd character 0
 QPDFTokenizer null in name 0
-QPDFTokenizer bad name 0
+QPDFTokenizer bad name 1 0
+QPDFTokenizer bad name 2 0
 QPDF_Stream invalid filter 0
 QPDF UseOutlines but no Outlines 0
 QPDFObjectHandle makeDirect loop 0