aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFTokenizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-01-30 02:00:06 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commit2699ecf13e8559b136ded1986bf18e1a0a51011f (patch)
tree60d158b2f9cdf551548c6887e24e6af3423fb315 /libqpdf/QPDFTokenizer.cc
parentd97474868d7fa6a94bab49d89af5dd82fd5e3a41 (diff)
downloadqpdf-2699ecf13e8559b136ded1986bf18e1a0a51011f.tar.zst
Push QPDFTokenizer members into a nested structure
This is for protection against future ABI breaking changes.
Diffstat (limited to 'libqpdf/QPDFTokenizer.cc')
-rw-r--r--libqpdf/QPDFTokenizer.cc357
1 files changed, 187 insertions, 170 deletions
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 2a45a0b5..776019c8 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -12,7 +12,7 @@
#include <string.h>
#include <cstdlib>
-QPDFTokenizer::QPDFTokenizer() :
+QPDFTokenizer::Members::Members() :
pound_special_in_name(true),
allow_eof(false),
include_ignorable(false)
@@ -21,22 +21,46 @@ QPDFTokenizer::QPDFTokenizer() :
}
void
+QPDFTokenizer::Members::reset()
+{
+ state = st_top;
+ type = tt_bad;
+ val = "";
+ raw_val = "";
+ error_message = "";
+ unread_char = false;
+ char_to_unread = '\0';
+ string_depth = 0;
+ string_ignoring_newline = false;
+ last_char_was_bs = false;
+}
+
+QPDFTokenizer::Members::~Members()
+{
+}
+
+QPDFTokenizer::QPDFTokenizer() :
+ m(new Members())
+{
+}
+
+void
QPDFTokenizer::allowPoundAnywhereInName()
{
QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name");
- this->pound_special_in_name = false;
+ this->m->pound_special_in_name = false;
}
void
QPDFTokenizer::allowEOF()
{
- this->allow_eof = true;
+ this->m->allow_eof = true;
}
void
QPDFTokenizer::includeIgnorable()
{
- this->include_ignorable = true;
+ this->m->include_ignorable = true;
}
bool
@@ -46,34 +70,19 @@ QPDFTokenizer::isSpace(char ch)
}
void
-QPDFTokenizer::reset()
-{
- state = st_top;
- type = tt_bad;
- val = "";
- raw_val = "";
- error_message = "";
- unread_char = false;
- char_to_unread = '\0';
- string_depth = 0;
- string_ignoring_newline = false;
- last_char_was_bs = false;
-}
-
-void
QPDFTokenizer::resolveLiteral()
{
- if ((val.length() > 0) && (val.at(0) == '/'))
+ if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/'))
{
- type = tt_name;
+ this->m->type = tt_name;
// Deal with # in name token. Note: '/' by itself is a
// valid name, so don't strip leading /. That way we
// don't have to deal with the empty string as a name.
std::string nval = "/";
- char const* valstr = val.c_str() + 1;
+ char const* valstr = this->m->val.c_str() + 1;
for (char const* p = valstr; *p; ++p)
{
- if ((*p == '#') && this->pound_special_in_name)
+ if ((*p == '#') && this->m->pound_special_in_name)
{
if (p[1] && p[2] &&
QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2]))
@@ -85,9 +94,9 @@ QPDFTokenizer::resolveLiteral()
char ch = static_cast<char>(strtol(num, 0, 16));
if (ch == '\0')
{
- type = tt_bad;
+ this->m->type = tt_bad;
QTC::TC("qpdf", "QPDF_Tokenizer null in name");
- error_message =
+ this->m->error_message =
"null character not allowed in name token";
nval += "#00";
}
@@ -100,8 +109,8 @@ QPDFTokenizer::resolveLiteral()
else
{
QTC::TC("qpdf", "QPDF_Tokenizer bad name");
- type = tt_bad;
- error_message = "invalid name token";
+ this->m->type = tt_bad;
+ this->m->error_message = "invalid name token";
nval += *p;
}
}
@@ -110,40 +119,40 @@ QPDFTokenizer::resolveLiteral()
nval += *p;
}
}
- val = nval;
+ this->m->val = nval;
}
- else if (QUtil::is_number(val.c_str()))
+ else if (QUtil::is_number(this->m->val.c_str()))
{
- if (val.find('.') != std::string::npos)
+ if (this->m->val.find('.') != std::string::npos)
{
- type = tt_real;
+ this->m->type = tt_real;
}
else
{
- type = tt_integer;
+ this->m->type = tt_integer;
}
}
- else if ((val == "true") || (val == "false"))
+ else if ((this->m->val == "true") || (this->m->val == "false"))
{
- type = tt_bool;
+ this->m->type = tt_bool;
}
- else if (val == "null")
+ else if (this->m->val == "null")
{
- type = tt_null;
+ this->m->type = tt_null;
}
else
{
// I don't really know what it is, so leave it as tt_word.
// Lots of cases ($, #, etc.) other than actual words fall
// into this category, but that's okay at least for now.
- type = tt_word;
+ this->m->type = tt_word;
}
}
void
QPDFTokenizer::presentCharacter(char ch)
{
- if (state == st_token_ready)
+ if (this->m->state == st_token_ready)
{
throw std::logic_error(
"INTERNAL ERROR: QPDF tokenizer presented character "
@@ -157,205 +166,210 @@ QPDFTokenizer::presentCharacter(char ch)
// the character that caused a state change in the new state.
bool handled = true;
- if (state == st_top)
+ if (this->m->state == st_top)
{
// Note: we specifically do not use ctype here. It is
// locale-dependent.
if (isSpace(ch))
{
- if (this->include_ignorable)
+ if (this->m->include_ignorable)
{
- state = st_in_space;
- val += ch;
+ this->m->state = st_in_space;
+ this->m->val += ch;
}
}
else if (ch == '%')
{
- state = st_in_comment;
- if (this->include_ignorable)
+ this->m->state = st_in_comment;
+ if (this->m->include_ignorable)
{
- val += ch;
+ this->m->val += ch;
}
}
else if (ch == '(')
{
- string_depth = 1;
- string_ignoring_newline = false;
- memset(bs_num_register, '\0', sizeof(bs_num_register));
- last_char_was_bs = false;
- state = st_in_string;
+ this->m->string_depth = 1;
+ this->m->string_ignoring_newline = false;
+ memset(this->m->bs_num_register, '\0',
+ sizeof(this->m->bs_num_register));
+ this->m->last_char_was_bs = false;
+ this->m->state = st_in_string;
}
else if (ch == '<')
{
- state = st_lt;
+ this->m->state = st_lt;
}
else if (ch == '>')
{
- state = st_gt;
+ this->m->state = st_gt;
}
else
{
- val += ch;
+ this->m->val += ch;
if (ch == ')')
{
- type = tt_bad;
+ this->m->type = tt_bad;
QTC::TC("qpdf", "QPDF_Tokenizer bad )");
- error_message = "unexpected )";
- state = st_token_ready;
+ this->m->error_message = "unexpected )";
+ this->m->state = st_token_ready;
}
else if (ch == '[')
{
- type = tt_array_open;
- state = st_token_ready;
+ this->m->type = tt_array_open;
+ this->m->state = st_token_ready;
}
else if (ch == ']')
{
- type = tt_array_close;
- state = st_token_ready;
+ this->m->type = tt_array_close;
+ this->m->state = st_token_ready;
}
else if (ch == '{')
{
- type = tt_brace_open;
- state = st_token_ready;
+ this->m->type = tt_brace_open;
+ this->m->state = st_token_ready;
}
else if (ch == '}')
{
- type = tt_brace_close;
- state = st_token_ready;
+ this->m->type = tt_brace_close;
+ this->m->state = st_token_ready;
}
else
{
- state = st_literal;
+ this->m->state = st_literal;
}
}
}
- else if (state == st_in_space)
+ else if (this->m->state == st_in_space)
{
// We only enter this state if include_ignorable is true.
if (! isSpace(ch))
{
- type = tt_space;
- unread_char = true;
- char_to_unread = ch;
- state = st_token_ready;
+ this->m->type = tt_space;
+ this->m->unread_char = true;
+ this->m->char_to_unread = ch;
+ this->m->state = st_token_ready;
}
else
{
- val += ch;
+ this->m->val += ch;
}
}
- else if (state == st_in_comment)
+ else if (this->m->state == st_in_comment)
{
if ((ch == '\r') || (ch == '\n'))
{
- if (this->include_ignorable)
+ if (this->m->include_ignorable)
{
- type = tt_comment;
- unread_char = true;
- char_to_unread = ch;
- state = st_token_ready;
+ this->m->type = tt_comment;
+ this->m->unread_char = true;
+ this->m->char_to_unread = ch;
+ this->m->state = st_token_ready;
}
else
{
- state = st_top;
+ this->m->state = st_top;
}
}
- else if (this->include_ignorable)
+ else if (this->m->include_ignorable)
{
- val += ch;
+ this->m->val += ch;
}
}
- else if (state == st_lt)
+ else if (this->m->state == st_lt)
{
if (ch == '<')
{
- val = "<<";
- type = tt_dict_open;
- state = st_token_ready;
+ this->m->val = "<<";
+ this->m->type = tt_dict_open;
+ this->m->state = st_token_ready;
}
else
{
handled = false;
- state = st_in_hexstring;
+ this->m->state = st_in_hexstring;
}
}
- else if (state == st_gt)
+ else if (this->m->state == st_gt)
{
if (ch == '>')
{
- val = ">>";
- type = tt_dict_close;
- state = st_token_ready;
+ this->m->val = ">>";
+ this->m->type = tt_dict_close;
+ this->m->state = st_token_ready;
}
else
{
- val = ">";
- type = tt_bad;
+ this->m->val = ">";
+ this->m->type = tt_bad;
QTC::TC("qpdf", "QPDF_Tokenizer bad >");
- error_message = "unexpected >";
- unread_char = true;
- char_to_unread = ch;
- state = st_token_ready;
+ this->m->error_message = "unexpected >";
+ this->m->unread_char = true;
+ this->m->char_to_unread = ch;
+ this->m->state = st_token_ready;
}
}
- else if (state == st_in_string)
+ else if (this->m->state == st_in_string)
{
- if (string_ignoring_newline && (! ((ch == '\r') || (ch == '\n'))))
+ if (this->m->string_ignoring_newline &&
+ (! ((ch == '\r') || (ch == '\n'))))
{
- string_ignoring_newline = false;
+ this->m->string_ignoring_newline = false;
}
- size_t bs_num_count = strlen(bs_num_register);
+ size_t bs_num_count = strlen(this->m->bs_num_register);
bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal)))
{
// We've accumulated \ddd. PDF Spec says to ignore
// high-order overflow.
- val += static_cast<char>(strtol(bs_num_register, 0, 8));
- memset(bs_num_register, '\0', sizeof(bs_num_register));
+ this->m->val += static_cast<char>(
+ strtol(this->m->bs_num_register, 0, 8));
+ memset(this->m->bs_num_register, '\0',
+ sizeof(this->m->bs_num_register));
bs_num_count = 0;
}
- if (string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
+ if (this->m->string_ignoring_newline && ((ch == '\r') || (ch == '\n')))
{
// ignore
}
- else if (ch_is_octal && (last_char_was_bs || (bs_num_count > 0)))
+ else if (ch_is_octal &&
+ (this->m->last_char_was_bs || (bs_num_count > 0)))
{
- bs_num_register[bs_num_count++] = ch;
+ this->m->bs_num_register[bs_num_count++] = ch;
}
- else if (last_char_was_bs)
+ else if (this->m->last_char_was_bs)
{
switch (ch)
{
case 'n':
- val += '\n';
+ this->m->val += '\n';
break;
case 'r':
- val += '\r';
+ this->m->val += '\r';
break;
case 't':
- val += '\t';
+ this->m->val += '\t';
break;
case 'b':
- val += '\b';
+ this->m->val += '\b';
break;
case 'f':
- val += '\f';
+ this->m->val += '\f';
break;
case '\r':
case '\n':
- string_ignoring_newline = true;
+ this->m->string_ignoring_newline = true;
break;
default:
// PDF spec says backslash is ignored before anything else
- val += ch;
+ this->m->val += ch;
break;
}
}
@@ -371,22 +385,23 @@ QPDFTokenizer::presentCharacter(char ch)
}
else if (ch == '(')
{
- val += ch;
- ++string_depth;
+ this->m->val += ch;
+ ++this->m->string_depth;
}
- else if ((ch == ')') && (--string_depth == 0))
+ else if ((ch == ')') && (--this->m->string_depth == 0))
{
- type = tt_string;
- state = st_token_ready;
+ this->m->type = tt_string;
+ this->m->state = st_token_ready;
}
else
{
- val += ch;
+ this->m->val += ch;
}
- last_char_was_bs = ((! last_char_was_bs) && (ch == '\\'));
+ this->m->last_char_was_bs =
+ ((! this->m->last_char_was_bs) && (ch == '\\'));
}
- else if (state == st_literal)
+ else if (this->m->state == st_literal)
{
if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
{
@@ -398,14 +413,14 @@ QPDFTokenizer::presentCharacter(char ch)
// though not on any files in the test suite as of this
// writing.
- type = tt_word;
- unread_char = true;
- char_to_unread = ch;
- state = st_token_ready;
+ this->m->type = tt_word;
+ this->m->unread_char = true;
+ this->m->char_to_unread = ch;
+ this->m->state = st_token_ready;
}
else
{
- val += ch;
+ this->m->val += ch;
}
}
else
@@ -418,33 +433,33 @@ QPDFTokenizer::presentCharacter(char ch)
{
// okay
}
- else if (state == st_in_hexstring)
+ else if (this->m->state == st_in_hexstring)
{
if (ch == '>')
{
- type = tt_string;
- state = st_token_ready;
- if (val.length() % 2)
+ this->m->type = tt_string;
+ this->m->state = st_token_ready;
+ if (this->m->val.length() % 2)
{
// PDF spec says odd hexstrings have implicit
// trailing 0.
- val += '0';
+ this->m->val += '0';
}
char num[3];
num[2] = '\0';
std::string nval;
- for (unsigned int i = 0; i < val.length(); i += 2)
+ for (unsigned int i = 0; i < this->m->val.length(); i += 2)
{
- num[0] = val.at(i);
- num[1] = val.at(i+1);
+ num[0] = this->m->val.at(i);
+ num[1] = this->m->val.at(i+1);
char nch = static_cast<char>(strtol(num, 0, 16));
nval += nch;
}
- val = nval;
+ this->m->val = nval;
}
else if (QUtil::is_hex_digit(ch))
{
- val += ch;
+ this->m->val += ch;
}
else if (isSpace(ch))
{
@@ -452,11 +467,11 @@ QPDFTokenizer::presentCharacter(char ch)
}
else
{
- type = tt_bad;
+ this->m->type = tt_bad;
QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character");
- error_message = std::string("invalid character (") +
+ this->m->error_message = std::string("invalid character (") +
ch + ") in hexstring";
- state = st_token_ready;
+ this->m->state = st_token_ready;
}
}
else
@@ -465,61 +480,63 @@ QPDFTokenizer::presentCharacter(char ch)
"INTERNAL ERROR: invalid state while reading token");
}
- if ((state == st_token_ready) && (type == tt_word))
+ if ((this->m->state == st_token_ready) && (this->m->type == tt_word))
{
resolveLiteral();
}
- if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))
+ if (! (betweenTokens() ||
+ ((this->m->state == st_token_ready) && this->m->unread_char)))
{
- this->raw_val += orig_ch;
+ this->m->raw_val += orig_ch;
}
}
void
QPDFTokenizer::presentEOF()
{
- if (state == st_literal)
+ if (this->m->state == st_literal)
{
QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token");
resolveLiteral();
}
- else if ((this->include_ignorable) && (state == st_in_space))
+ else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
{
- type = tt_space;
+ this->m->type = tt_space;
}
- else if ((this->include_ignorable) && (state == st_in_comment))
+ else if ((this->m->include_ignorable) && (this->m->state == st_in_comment))
{
- type = tt_comment;
+ this->m->type = tt_comment;
}
else if (betweenTokens())
{
- type = tt_eof;
+ this->m->type = tt_eof;
}
- else if (state != st_token_ready)
+ else if (this->m->state != st_token_ready)
{
QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token");
- type = tt_bad;
- error_message = "EOF while reading token";
+ this->m->type = tt_bad;
+ this->m->error_message = "EOF while reading token";
}
- state = st_token_ready;
+ this->m->state = st_token_ready;
}
bool
QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
{
- bool ready = (this->state == st_token_ready);
- unread_char = this->unread_char;
- ch = this->char_to_unread;
+ bool ready = (this->m->state == st_token_ready);
+ unread_char = this->m->unread_char;
+ ch = this->m->char_to_unread;
if (ready)
{
- if (type == tt_bad)
+ if (this->m->type == tt_bad)
{
- val = raw_val;
+ this->m->val = this->m->raw_val;
}
- token = Token(type, val, raw_val, error_message);
- reset();
+ token = Token(this->m->type, this->m->val,
+ this->m->raw_val, this->m->error_message);
+ this->m->reset();
}
return ready;
}
@@ -527,10 +544,10 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
bool
QPDFTokenizer::betweenTokens()
{
- return ((state == st_top) ||
- ((! this->include_ignorable) &&
- ((state == st_in_comment) ||
- (state == st_in_space))));
+ return ((this->m->state == st_top) ||
+ ((! this->m->include_ignorable) &&
+ ((this->m->state == st_in_comment) ||
+ (this->m->state == st_in_space))));
}
QPDFTokenizer::Token
@@ -553,11 +570,11 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
{
presentEOF();
presented_eof = true;
- if ((type == tt_eof) && (! this->allow_eof))
+ if ((this->m->type == tt_eof) && (! this->m->allow_eof))
{
QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed");
- type = tt_bad;
- error_message = "unexpected EOF";
+ this->m->type = tt_bad;
+ this->m->error_message = "unexpected EOF";
offset = input->getLastOffset();
}
}
@@ -574,14 +591,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
{
++offset;
}
- if (max_len && (raw_val.length() >= max_len) &&
- (this->state != st_token_ready))
+ if (max_len && (this->m->raw_val.length() >= max_len) &&
+ (this->m->state != st_token_ready))
{
// terminate this token now
QTC::TC("qpdf", "QPDFTokenizer block long token");
- this->type = tt_bad;
- this->state = st_token_ready;
- error_message =
+ this->m->type = tt_bad;
+ this->m->state = st_token_ready;
+ this->m->error_message =
"exceeded allowable length while reading token";
}
}