From 2699ecf13e8559b136ded1986bf18e1a0a51011f Mon Sep 17 00:00:00 2001
From: Jay Berkenbilt <ejb@ql.org>
Date: Mon, 29 Jan 2018 20:00:06 -0500
Subject: Push QPDFTokenizer members into a nested structure

This is for protection against future ABI breaking changes.
---
 include/qpdf/QPDFTokenizer.hh |  66 +++++---
 libqpdf/QPDFTokenizer.cc      | 357 ++++++++++++++++++++++--------------------
 2 files changed, 231 insertions(+), 192 deletions(-)
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index cd727613..7d7f6132 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -165,31 +165,53 @@ class QPDFTokenizer
                     size_t max_len = 0);
 
   private:
-    void reset();
+    // Do not implement copy or assignment
+    QPDFTokenizer(QPDFTokenizer const&);
+    QPDFTokenizer& operator=(QPDFTokenizer const&);
+
     void resolveLiteral();
     bool isSpace(char);
 
-    // Lexer state
-    enum { st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
-	   st_literal, st_in_hexstring, st_token_ready } state;
-
-    bool pound_special_in_name;
-    bool allow_eof;
-    bool include_ignorable;
-
-    // Current token accumulation
-    token_type_e type;
-    std::string val;
-    std::string raw_val;
-    std::string error_message;
-    bool unread_char;
-    char char_to_unread;
-
-    // State for strings
-    int string_depth;
-    bool string_ignoring_newline;
-    char bs_num_register[4];
-    bool last_char_was_bs;
+    enum state_e {
+        st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
+        st_literal, st_in_hexstring, st_token_ready
+    };
+
+    class Members
+    {
+        friend class QPDFTokenizer;
+
+      public:
+        QPDF_DLL
+        ~Members();
+
+      private:
+        Members();
+        Members(Members const&);
+        void reset();
+
+        // Lexer state
+        state_e state;
+
+        bool pound_special_in_name;
+        bool allow_eof;
+        bool include_ignorable;
+
+        // Current token accumulation
+        token_type_e type;
+        std::string val;
+        std::string raw_val;
+        std::string error_message;
+        bool unread_char;
+        char char_to_unread;
+
+        // State for strings
+        int string_depth;
+        bool string_ignoring_newline;
+        char bs_num_register[4];
+        bool last_char_was_bs;
+    };
+    PointerHolder<Members> m;
 };
 
 #endif // __QPDFTOKENIZER_HH__
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 2a45a0b5..776019c8 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -12,7 +12,7 @@
 #include <string.h>
 #include <cstdlib>
 
-QPDFTokenizer::QPDFTokenizer() :
+QPDFTokenizer::Members::Members() :
     pound_special_in_name(true),
     allow_eof(false),
     include_ignorable(false)
@@ -20,23 +20,47 @@ QPDFTokenizer::QPDFTokenizer() :
     reset();
 }
 
+void
+QPDFTokenizer::Members::reset()
+{
+    state = st_top;
+    type = tt_bad;
+    val = "";
+    raw_val = "";
+    error_message = "";
+    unread_char = false;
+    char_to_unread = '\0';
+    string_depth = 0;
+    string_ignoring_newline = false;
+    last_char_was_bs = false;
+}
+
+QPDFTokenizer::Members::~Members()
+{
+}
+
+QPDFTokenizer::QPDFTokenizer() :
+    m(new Members())
+{
+}
+
 void
 QPDFTokenizer::allowPoundAnywhereInName()
 {
     QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name");
-    this->pound_special_in_name = false;
+    this->m->pound_special_in_name = false;
 }
 
 void
 QPDFTokenizer::allowEOF()
 {
-    this->allow_eof = true;
+    this->m->allow_eof = true;
 }
 
 void
 QPDFTokenizer::includeIgnorable()
 {
-    this->include_ignorable = true;
+    this->m->include_ignorable = true;
 }
 
 bool
@@ -45,35 +69,20 @@ QPDFTokenizer::isSpace(char ch)
     return ((ch == '\0') || QUtil::is_space(ch));
 }
 
-void
-QPDFTokenizer::reset()
-{
-    state = st_top;
-    type = tt_bad;
-    val = "";
-    raw_val = "";
-    error_message = "";
-    unread_char = false;
-    char_to_unread = '\0';
-    string_depth = 0;
-    string_ignoring_newline = false;
-    last_char_was_bs = false;
-}
-
 void
 QPDFTokenizer::resolveLiteral()
 {
-    if ((val.length() > 0) && (val.at(0) == '/'))
+    if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/'))
     {
-        type = tt_name;
+        this->m->type = tt_name;
         // Deal with # in name token.  Note: '/' by itself is a
         // valid name, so don't strip leading /.  That way we
         // don't have to deal with the empty string as a name.
         std::string nval = "/";
-        char const* valstr = val.c_str() + 1;
+        char const* valstr = this->m->val.c_str() + 1;
         for (char const* p = valstr; *p; ++p)
         {
-            if ((*p == '#') && this->pound_special_in_name)
+            if ((*p == '#') && this->m->pound_special_in_name)
             {
                 if (p[1] && p[2] &&
                     QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2]))
@@ -85,9 +94,9 @@ QPDFTokenizer::resolveLiteral()
                     char ch = static_cast<char>(strtol(num, 0, 16));
                     if (ch == '\0')
                     {
-                        type = tt_bad;
+                        this->m->type = tt_bad;
                         QTC::TC("qpdf", "QPDF_Tokenizer null in name");
-                        error_message =
+                        this->m->error_message =
                             "null character not allowed in name token";
                         nval += "#00";
                     }
@@ -100,8 +109,8 @@ QPDFTokenizer::resolveLiteral()
                 else
                 {
                     QTC::TC("qpdf", "QPDF_Tokenizer bad name");
-                    type = tt_bad;
-                    error_message = "invalid name token";
+                    this->m->type = tt_bad;
+                    this->m->error_message = "invalid name token";
                     nval += *p;
                 }
             }
@@ -110,40 +119,40 @@ QPDFTokenizer::resolveLiteral()
                 nval += *p;
             }
         }
-        val = nval;
+        this->m->val = nval;
     }
-    else if (QUtil::is_number(val.c_str()))
+    else if (QUtil::is_number(this->m->val.c_str()))
     {
-        if (val.find('.') != std::string::npos)
+        if (this->m->val.find('.') != std::string::npos)
         {
-            type = tt_real;
+            this->m->type = tt_real;
         }
         else
         {
-            type = tt_integer;
+            this->m->type = tt_integer;
         }
     }
-    else if ((val == "true") || (val == "false"))
+    else if ((this->m->val == "true") || (this->m->val == "false"))
     {
-        type = tt_bool;
+        this->m->type = tt_bool;
     }
-    else if (val == "null")
+    else if (this->m->val == "null")
     {
-        type = tt_null;
+        this->m->type = tt_null;
     }
     else
     {
         // I don't really know what it is, so leave it as tt_word.
         // Lots of cases ($, #, etc.) other than actual words fall
         // into this category, but that's okay at least for now.
-        type = tt_word;
+        this->m->type = tt_word;
     }
 }
 
 void
 QPDFTokenizer::presentCharacter(char ch)
 {
-    if (state == st_token_ready)
+    if (this->m->state == st_token_ready)
     {
 	throw std::logic_error(
 	    "INTERNAL ERROR: QPDF tokenizer presented character "
@@ -157,205 +166,210 @@ QPDFTokenizer::presentCharacter(char ch)
     // the character that caused a state change in the new state.
 
     bool handled = true;
-    if (state == st_top)
+    if (this->m->state == st_top)
     {
 	// Note: we specifically do not use ctype here.  It is
 	// locale-dependent.
 	if (isSpace(ch))
 	{
-            if (this->include_ignorable)
+            if (this->m->include_ignorable)
             {
-                state = st_in_space;
-                val += ch;
+                this->m->state = st_in_space;
+                this->m->val += ch;
             }
 	}
 	else if (ch == '%')
 	{
-	    state = st_in_comment;
-            if (this->include_ignorable)
+	    this->m->state = st_in_comment;
+            if (this->m->include_ignorable)
             {
-                val += ch;
+                this->m->val += ch;
             }
 	}
 	else if (ch == '(')
 	{
-	    string_depth = 1;
-	    string_ignoring_newline = false;
-	    memset(bs_num_register, '\0', sizeof(bs_num_register));
-	    last_char_was_bs = false;
-	    state = st_in_string;
+	    this->m->string_depth = 1;
+	    this->m->string_ignoring_newline = false;
+	    memset(this->m->bs_num_register, '\0',
+                   sizeof(this->m->bs_num_register));
+	    this->m->last_char_was_bs = false;
+	    this->m->state = st_in_string;
 	}
 	else if (ch == '<')
 	{
-	    state = st_lt;
+	    this->m->state = st_lt;
 	}
 	else if (ch == '>')
 	{
-	    state = st_gt;
+	    this->m->state = st_gt;
 	}
 	else
 	{
-	    val += ch;
+	    this->m->val += ch;
 	    if (ch == ')')
 	    {
-		type = tt_bad;
+		this->m->type = tt_bad;
 		QTC::TC("qpdf", "QPDF_Tokenizer bad )");
-		error_message = "unexpected )";
-		state = st_token_ready;
+		this->m->error_message = "unexpected )";
+		this->m->state = st_token_ready;
 	    }
 	    else if (ch == '[')
 	    {
-		type = tt_array_open;
-		state = st_token_ready;
+		this->m->type = tt_array_open;
+		this->m->state = st_token_ready;
 	    }
 	    else if (ch == ']')
 	    {
-		type = tt_array_close;
-		state = st_token_ready;
+		this->m->type = tt_array_close;
+		this->m->state = st_token_ready;
 	    }
 	    else if (ch == '{')
 	    {
-		type = tt_brace_open;
-		state = st_token_ready;
+		this->m->type = tt_brace_open;
+		this->m->state = st_token_ready;
 	    }
 	    else if (ch == '}')
 	    {
-		type = tt_brace_close;
-		state = st_token_ready;
+		this->m->type = tt_brace_close;
+		this->m->state = st_token_ready;
 	    }
 	    else
 	    {
-		state = st_literal;
+		this->m->state = st_literal;
 	    }
 	}
     }
-    else if (state == st_in_space)
+    else if (this->m->state == st_in_space)
     {
         // We only enter this state if include_ignorable is true.
         if (! isSpace(ch))
         {
-	    type = tt_space;
-	    unread_char = true;
-	    char_to_unread = ch;
-	    state = st_token_ready;
+	    this->m->type = tt_space;
+	    this->m->unread_char = true;
+	    this->m->char_to_unread = ch;
+	    this->m->state = st_token_ready;
         }
         else
         {
-            val += ch;
+            this->m->val += ch;
         }
     }
-    else if (state == st_in_comment)
+    else if (this->m->state == st_in_comment)
     {
 	if ((ch == '\r') || (ch == '\n'))
         {
-            if (this->include_ignorable)
+            if (this->m->include_ignorable)
             {
-                type = tt_comment;
-                unread_char = true;
-                char_to_unread = ch;
-                state = st_token_ready;
+                this->m->type = tt_comment;
+                this->m->unread_char = true;
+                this->m->char_to_unread = ch;
+                this->m->state = st_token_ready;
             }
             else
             {
-                state = st_top;
+                this->m->state = st_top;
             }
         }
-        else if (this->include_ignorable)
+        else if (this->m->include_ignorable)
         {
-            val += ch;
+            this->m->val += ch;
         }
     }
-    else if (state == st_lt)
+    else if (this->m->state == st_lt)
     {
 	if (ch == '<')
 	{
-	    val = "<<";
-	    type = tt_dict_open;
-	    state = st_token_ready;
+	    this->m->val = "<<";
+	    this->m->type = tt_dict_open;
+	    this->m->state = st_token_ready;
 	}
 	else
 	{
 	    handled = false;
-	    state = st_in_hexstring;
+	    this->m->state = st_in_hexstring;
 	}
     }
-    else if (state == st_gt)
+    else if (this->m->state == st_gt)
     {
 	if (ch == '>')
 	{
-	    val = ">>";
-	    type = tt_dict_close;
-	    state = st_token_ready;
+	    this->m->val = ">>";
+	    this->m->type = tt_dict_close;
+	    this->m->state = st_token_ready;
 	}
 	else
 	{
-	    val = ">";
-	    type = tt_bad;
+	    this->m->val = ">";
+	    this->m->type = tt_bad;
 	    QTC::TC("qpdf", "QPDF_Tokenizer bad >");
-	    error_message = "unexpected >";
-	    unread_char = true;
-	    char_to_unread = ch;
-	    state = st_token_ready;
+	    this->m->error_message = "unexpected >";
+	    this->m->unread_char = true;
+	    this->m->char_to_unread = ch;
+	    this->m->state = st_token_ready;
 	}
     }
-    else if (state == st_in_string)
+    else if (this->m->state == st_in_string)
     {
-	if (string_ignoring_newline && (! ((ch == '\r') || (ch == '\n'))))
+	if (this->m->string_ignoring_newline &&
+            (! ((ch == '\r') || (ch == '\n'))))
 	{
-	    string_ignoring_newline = false;
+	    this->m->string_ignoring_newline = false;
 	}
 
-	size_t bs_num_count = strlen(bs_num_register);
+	size_t bs_num_count = strlen(this->m->bs_num_register);
 	bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
 	if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal)))
 	{
 	    // We've accumulated \ddd.  PDF Spec says to ignore
 	    // high-order overflow.
-	    val += static_cast<char>(strtol(bs_num_register, 0, 8));
-	    memset(bs_num_register, '\0', sizeof(bs_num_register));
+	    this->m->val += static_cast<char>(
+                strtol(this->m->bs_num_register, 0, 8));
+	    memset(this->m->bs_num_register, '\0',
+                   sizeof(this->m->bs_num_register));
 	    bs_num_count = 0;
 	}
 
-	if (string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
+	if (this->m->string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
 	{
 	    // ignore
 	}
-	else if (ch_is_octal && (last_char_was_bs || (bs_num_count > 0)))
+	else if (ch_is_octal &&
+                 (this->m->last_char_was_bs || (bs_num_count > 0)))
 	{
-	    bs_num_register[bs_num_count++] = ch;
+	    this->m->bs_num_register[bs_num_count++] = ch;
 	}
-	else if (last_char_was_bs)
+	else if (this->m->last_char_was_bs)
 	{
 	    switch (ch)
 	    {
 	      case 'n':
-		val += '\n';
+		this->m->val += '\n';
 		break;
 
 	      case 'r':
-		val += '\r';
+		this->m->val += '\r';
 		break;
 
 	      case 't':
-		val += '\t';
+		this->m->val += '\t';
 		break;
 
 	      case 'b':
-		val += '\b';
+		this->m->val += '\b';
 		break;
 
 	      case 'f':
-		val += '\f';
+		this->m->val += '\f';
 		break;
 
 	      case '\r':
 	      case '\n':
-		string_ignoring_newline = true;
+		this->m->string_ignoring_newline = true;
 		break;
 
 	      default:
 		// PDF spec says backslash is ignored before anything else
-		val += ch;
+		this->m->val += ch;
 		break;
 	    }
 	}
@@ -371,22 +385,23 @@ QPDFTokenizer::presentCharacter(char ch)
 	}
 	else if (ch == '(')
 	{
-	    val += ch;
-	    ++string_depth;
+	    this->m->val += ch;
+	    ++this->m->string_depth;
 	}
-	else if ((ch == ')') && (--string_depth == 0))
+	else if ((ch == ')') && (--this->m->string_depth == 0))
 	{
-	    type = tt_string;
-	    state = st_token_ready;
+	    this->m->type = tt_string;
+	    this->m->state = st_token_ready;
 	}
 	else
 	{
-	    val += ch;
+	    this->m->val += ch;
 	}
 
-	last_char_was_bs = ((! last_char_was_bs) && (ch == '\\'));
+	this->m->last_char_was_bs =
+            ((! this->m->last_char_was_bs) && (ch == '\\'));
     }
-    else if (state == st_literal)
+    else if (this->m->state == st_literal)
     {
 	if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
 	{
@@ -398,14 +413,14 @@ QPDFTokenizer::presentCharacter(char ch)
 	    // though not on any files in the test suite as of this
 	    // writing.
 
-	    type = tt_word;
-	    unread_char = true;
-	    char_to_unread = ch;
-	    state = st_token_ready;
+	    this->m->type = tt_word;
+	    this->m->unread_char = true;
+	    this->m->char_to_unread = ch;
+	    this->m->state = st_token_ready;
 	}
 	else
 	{
-	    val += ch;
+	    this->m->val += ch;
 	}
     }
     else
@@ -418,33 +433,33 @@ QPDFTokenizer::presentCharacter(char ch)
     {
 	// okay
     }
-    else if (state == st_in_hexstring)
+    else if (this->m->state == st_in_hexstring)
     {
 	if (ch == '>')
 	{
-	    type = tt_string;
-	    state = st_token_ready;
-	    if (val.length() % 2)
+	    this->m->type = tt_string;
+	    this->m->state = st_token_ready;
+	    if (this->m->val.length() % 2)
 	    {
 		// PDF spec says odd hexstrings have implicit
 		// trailing 0.
-		val += '0';
+		this->m->val += '0';
 	    }
 	    char num[3];
 	    num[2] = '\0';
 	    std::string nval;
-	    for (unsigned int i = 0; i < val.length(); i += 2)
+	    for (unsigned int i = 0; i < this->m->val.length(); i += 2)
 	    {
-		num[0] = val.at(i);
-		num[1] = val.at(i+1);
+		num[0] = this->m->val.at(i);
+		num[1] = this->m->val.at(i+1);
 		char nch = static_cast<char>(strtol(num, 0, 16));
 		nval += nch;
 	    }
-	    val = nval;
+	    this->m->val = nval;
 	}
 	else if (QUtil::is_hex_digit(ch))
 	{
-	    val += ch;
+	    this->m->val += ch;
 	}
 	else if (isSpace(ch))
 	{
@@ -452,11 +467,11 @@ QPDFTokenizer::presentCharacter(char ch)
 	}
 	else
 	{
-	    type = tt_bad;
+	    this->m->type = tt_bad;
 	    QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character");
-	    error_message = std::string("invalid character (") +
+	    this->m->error_message = std::string("invalid character (") +
 		ch + ") in hexstring";
-	    state = st_token_ready;
+	    this->m->state = st_token_ready;
 	}
     }
     else
@@ -465,61 +480,63 @@ QPDFTokenizer::presentCharacter(char ch)
 	    "INTERNAL ERROR: invalid state while reading token");
     }
 
-    if ((state == st_token_ready) && (type == tt_word))
+    if ((this->m->state == st_token_ready) && (this->m->type == tt_word))
     {
         resolveLiteral();
     }
 
-    if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))
+    if (! (betweenTokens() ||
+           ((this->m->state == st_token_ready) && this->m->unread_char)))
     {
-	this->raw_val += orig_ch;
+	this->m->raw_val += orig_ch;
     }
 }
 
 void
 QPDFTokenizer::presentEOF()
 {
-    if (state == st_literal)
+    if (this->m->state == st_literal)
     {
         QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token");
         resolveLiteral();
     }
-    else if ((this->include_ignorable) && (state == st_in_space))
+    else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
     {
-        type = tt_space;
+        this->m->type = tt_space;
     }
-    else if ((this->include_ignorable) && (state == st_in_comment))
+    else if ((this->m->include_ignorable) && (this->m->state == st_in_comment))
     {
-        type = tt_comment;
+        this->m->type = tt_comment;
     }
     else if (betweenTokens())
     {
-        type = tt_eof;
+        this->m->type = tt_eof;
     }
-    else if (state != st_token_ready)
+    else if (this->m->state != st_token_ready)
     {
         QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token");
-        type = tt_bad;
-        error_message = "EOF while reading token";
+        this->m->type = tt_bad;
+        this->m->error_message = "EOF while reading token";
     }
 
-    state = st_token_ready;
+    this->m->state = st_token_ready;
 }
 
 bool
 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
 {
-    bool ready = (this->state == st_token_ready);
-    unread_char = this->unread_char;
-    ch = this->char_to_unread;
+    bool ready = (this->m->state == st_token_ready);
+    unread_char = this->m->unread_char;
+    ch = this->m->char_to_unread;
     if (ready)
     {
-        if (type == tt_bad)
+        if (this->m->type == tt_bad)
         {
-            val = raw_val;
+            this->m->val = this->m->raw_val;
         }
-	token = Token(type, val, raw_val, error_message);
-	reset();
+	token = Token(this->m->type, this->m->val,
+                      this->m->raw_val, this->m->error_message);
+	this->m->reset();
     }
     return ready;
 }
@@ -527,10 +544,10 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
 bool
 QPDFTokenizer::betweenTokens()
 {
-    return ((state == st_top) ||
-            ((! this->include_ignorable) &&
-             ((state == st_in_comment) ||
-              (state == st_in_space))));
+    return ((this->m->state == st_top) ||
+            ((! this->m->include_ignorable) &&
+             ((this->m->state == st_in_comment) ||
+              (this->m->state == st_in_space))));
 }
 
 QPDFTokenizer::Token
@@ -553,11 +570,11 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
             {
                 presentEOF();
                 presented_eof = true;
-                if ((type == tt_eof) && (! this->allow_eof))
+                if ((this->m->type == tt_eof) && (! this->m->allow_eof))
                 {
                     QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed");
-                    type = tt_bad;
-                    error_message = "unexpected EOF";
+                    this->m->type = tt_bad;
+                    this->m->error_message = "unexpected EOF";
                     offset = input->getLastOffset();
                 }
             }
@@ -574,14 +591,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
 	    {
 		++offset;
 	    }
-            if (max_len && (raw_val.length() >= max_len) &&
-                (this->state != st_token_ready))
+            if (max_len && (this->m->raw_val.length() >= max_len) &&
+                (this->m->state != st_token_ready))
             {
                 // terminate this token now
                 QTC::TC("qpdf", "QPDFTokenizer block long token");
-                this->type = tt_bad;
-                this->state = st_token_ready;
-                error_message =
+                this->m->type = tt_bad;
+                this->m->state = st_token_ready;
+                this->m->error_message =
                     "exceeded allowable length while reading token";
             }
 	}
-- 
cgit v1.2.3-54-g00ecf