From 12f1eb15ca3fed6310402847559a7c99d3c77847 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 2 Apr 2022 17:14:10 -0400 Subject: Programmatically apply new formatting to code Run this: for i in **/*.cc **/*.c **/*.h **/*.hh; do clang-format < $i >| $i.new && mv $i.new $i done --- libqpdf/QPDFTokenizer.cc | 526 ++++++++++++++++------------------------------- 1 file changed, 180 insertions(+), 346 deletions(-) (limited to 'libqpdf/QPDFTokenizer.cc') diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index decbc511..ade64cb2 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -4,17 +4,18 @@ // it's not worth the risk of including it in case it may accidentally // be used. -#include +#include #include -#include #include -#include +#include +#include #include #include #include -static bool is_delimiter(char ch) +static bool +is_delimiter(char ch) { return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0); } @@ -22,8 +23,7 @@ static bool is_delimiter(char ch) class QPDFWordTokenFinder: public InputSource::Finder { public: - QPDFWordTokenFinder(PointerHolder is, - std::string const& str) : + QPDFWordTokenFinder(PointerHolder is, std::string const& str) : is(is), str(str) { @@ -46,30 +46,24 @@ QPDFWordTokenFinder::check() QPDFTokenizer tokenizer; QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); qpdf_offset_t pos = is->tell(); - if (! (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) - { + if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) { QTC::TC("qpdf", "QPDFTokenizer finder found wrong word"); return false; } qpdf_offset_t token_start = is->getLastOffset(); char next; bool next_okay = false; - if (is->read(&next, 1) == 0) - { + if (is->read(&next, 1) == 0) { QTC::TC("qpdf", "QPDFTokenizer inline image at EOF"); next_okay = true; - } - else - { + } else { next_okay = is_delimiter(next); } is->seek(pos, SEEK_SET); - if (! next_okay) - { + if (!next_okay) { return false; } - if (token_start == 0) - { + if (token_start == 0) { // Can't actually happen...we never start the search at the // beginning of the input. return false; @@ -110,18 +104,13 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : value(value), raw_value(value) { - if (type == tt_string) - { + if (type == tt_string) { raw_value = QPDFObjectHandle::newString(value).unparse(); - } - else if (type == tt_name) - { + } else if (type == tt_name) { raw_value = QPDFObjectHandle::newName(value).unparse(); } } - - QPDFTokenizer::QPDFTokenizer() : m(new Members()) { @@ -154,44 +143,35 @@ QPDFTokenizer::isDelimiter(char ch) void QPDFTokenizer::resolveLiteral() { - if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) - { + if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) { this->m->type = tt_name; // Deal with # in name token. Note: '/' by itself is a // valid name, so don't strip leading /. That way we // don't have to deal with the empty string as a name. std::string nval = "/"; size_t len = this->m->val.length(); - for (size_t i = 1; i < len; ++i) - { + for (size_t i = 1; i < len; ++i) { char ch = this->m->val.at(i); - if (ch == '#') - { + if (ch == '#') { if ((i + 2 < len) && - QUtil::is_hex_digit(this->m->val.at(i+1)) && - QUtil::is_hex_digit(this->m->val.at(i+2))) - { + QUtil::is_hex_digit(this->m->val.at(i + 1)) && + QUtil::is_hex_digit(this->m->val.at(i + 2))) { char num[3]; - num[0] = this->m->val.at(i+1); - num[1] = this->m->val.at(i+2); + num[0] = this->m->val.at(i + 1); + num[1] = this->m->val.at(i + 2); num[2] = '\0'; char ch2 = static_cast(strtol(num, 0, 16)); - if (ch2 == '\0') - { + if (ch2 == '\0') { this->m->type = tt_bad; QTC::TC("qpdf", "QPDFTokenizer null in name"); this->m->error_message = "null character not allowed in name token"; nval += "#00"; - } - else - { + } else { nval.append(1, ch2); } i += 2; - } - else - { + } else { QTC::TC("qpdf", "QPDFTokenizer bad name"); this->m->error_message = "name with stray # will not work with PDF >= 1.2"; @@ -199,35 +179,22 @@ QPDFTokenizer::resolveLiteral() // in QPDF_Name::normalizeName. nval += '\0'; } - } - else - { + } else { nval.append(1, ch); } } this->m->val = nval; - } - else if (QUtil::is_number(this->m->val.c_str())) - { - if (this->m->val.find('.') != std::string::npos) - { + } else if (QUtil::is_number(this->m->val.c_str())) { + if (this->m->val.find('.') != std::string::npos) { this->m->type = tt_real; - } - else - { + } else { this->m->type = tt_integer; } - } - else if ((this->m->val == "true") || (this->m->val == "false")) - { + } else if ((this->m->val == "true") || (this->m->val == "false")) { this->m->type = tt_bool; - } - else if (this->m->val == "null") - { + } else if (this->m->val == "null") { this->m->type = tt_null; - } - else - { + } else { // I don't really know what it is, so leave it as tt_word. // Lots of cases ($, #, etc.) other than actual words fall // into this category, but that's okay at least for now. @@ -238,8 +205,7 @@ QPDFTokenizer::resolveLiteral() void QPDFTokenizer::presentCharacter(char ch) { - if (this->m->state == st_token_ready) - { + if (this->m->state == st_token_ready) { throw std::logic_error( "INTERNAL ERROR: QPDF tokenizer presented character " "while token is waiting"); @@ -252,140 +218,94 @@ QPDFTokenizer::presentCharacter(char ch) // the character that caused a state change in the new state. bool handled = true; - if (this->m->state == st_top) - { + if (this->m->state == st_top) { // Note: we specifically do not use ctype here. It is // locale-dependent. - if (isSpace(ch)) - { - if (this->m->include_ignorable) - { + if (isSpace(ch)) { + if (this->m->include_ignorable) { this->m->state = st_in_space; this->m->val += ch; } - } - else if (ch == '%') - { + } else if (ch == '%') { this->m->state = st_in_comment; - if (this->m->include_ignorable) - { + if (this->m->include_ignorable) { this->m->val += ch; } - } - else if (ch == '(') - { + } else if (ch == '(') { this->m->string_depth = 1; this->m->string_ignoring_newline = false; - memset(this->m->bs_num_register, '\0', - sizeof(this->m->bs_num_register)); + memset( + this->m->bs_num_register, + '\0', + sizeof(this->m->bs_num_register)); this->m->last_char_was_bs = false; this->m->last_char_was_cr = false; this->m->state = st_in_string; - } - else if (ch == '<') - { + } else if (ch == '<') { this->m->state = st_lt; - } - else if (ch == '>') - { + } else if (ch == '>') { this->m->state = st_gt; - } - else - { + } else { this->m->val += ch; - if (ch == ')') - { + if (ch == ')') { this->m->type = tt_bad; QTC::TC("qpdf", "QPDFTokenizer bad )"); this->m->error_message = "unexpected )"; this->m->state = st_token_ready; - } - else if (ch == '[') - { + } else if (ch == '[') { this->m->type = tt_array_open; this->m->state = st_token_ready; - } - else if (ch == ']') - { + } else if (ch == ']') { this->m->type = tt_array_close; this->m->state = st_token_ready; - } - else if (ch == '{') - { + } else if (ch == '{') { this->m->type = tt_brace_open; this->m->state = st_token_ready; - } - else if (ch == '}') - { + } else if (ch == '}') { this->m->type = tt_brace_close; this->m->state = st_token_ready; - } - else - { + } else { this->m->state = st_literal; } } - } - else if (this->m->state == st_in_space) - { + } else if (this->m->state == st_in_space) { // We only enter this state if include_ignorable is true. - if (! isSpace(ch)) - { + if (!isSpace(ch)) { this->m->type = tt_space; this->m->unread_char = true; this->m->char_to_unread = ch; this->m->state = st_token_ready; - } - else - { + } else { this->m->val += ch; } - } - else if (this->m->state == st_in_comment) - { - if ((ch == '\r') || (ch == '\n')) - { - if (this->m->include_ignorable) - { + } else if (this->m->state == st_in_comment) { + if ((ch == '\r') || (ch == '\n')) { + if (this->m->include_ignorable) { this->m->type = tt_comment; this->m->unread_char = true; this->m->char_to_unread = ch; this->m->state = st_token_ready; - } - else - { + } else { this->m->state = st_top; } - } - else if (this->m->include_ignorable) - { + } else if (this->m->include_ignorable) { this->m->val += ch; } - } - else if (this->m->state == st_lt) - { - if (ch == '<') - { + } else if (this->m->state == st_lt) { + if (ch == '<') { this->m->val = "<<"; this->m->type = tt_dict_open; this->m->state = st_token_ready; - } - else - { + } else { handled = false; this->m->state = st_in_hexstring; } - } - else if (this->m->state == st_gt) - { - if (ch == '>') - { + } else if (this->m->state == st_gt) { + if (ch == '>') { this->m->val = ">>"; this->m->type = tt_dict_close; this->m->state = st_token_ready; - } - else - { + } else { this->m->val = ">"; this->m->type = tt_bad; QTC::TC("qpdf", "QPDFTokenizer bad >"); @@ -394,121 +314,96 @@ QPDFTokenizer::presentCharacter(char ch) this->m->char_to_unread = ch; this->m->state = st_token_ready; } - } - else if (this->m->state == st_in_string) - { - if (this->m->string_ignoring_newline && (ch != '\n')) - { + } else if (this->m->state == st_in_string) { + if (this->m->string_ignoring_newline && (ch != '\n')) { this->m->string_ignoring_newline = false; } size_t bs_num_count = strlen(this->m->bs_num_register); bool ch_is_octal = ((ch >= '0') && (ch <= '7')); - if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal))) - { + if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { // We've accumulated \ddd. PDF Spec says to ignore // high-order overflow. - this->m->val += static_cast( - strtol(this->m->bs_num_register, 0, 8)); - memset(this->m->bs_num_register, '\0', - sizeof(this->m->bs_num_register)); + this->m->val += + static_cast(strtol(this->m->bs_num_register, 0, 8)); + memset( + this->m->bs_num_register, + '\0', + sizeof(this->m->bs_num_register)); bs_num_count = 0; } - if (this->m->string_ignoring_newline && (ch == '\n')) - { + if (this->m->string_ignoring_newline && (ch == '\n')) { // ignore this->m->string_ignoring_newline = false; - } - else if (ch_is_octal && - (this->m->last_char_was_bs || (bs_num_count > 0))) - { + } else if ( + ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) { this->m->bs_num_register[bs_num_count++] = ch; - } - else if (this->m->last_char_was_bs) - { - switch (ch) - { - case 'n': + } else if (this->m->last_char_was_bs) { + switch (ch) { + case 'n': this->m->val += '\n'; break; - case 'r': + case 'r': this->m->val += '\r'; break; - case 't': + case 't': this->m->val += '\t'; break; - case 'b': + case 'b': this->m->val += '\b'; break; - case 'f': + case 'f': this->m->val += '\f'; break; - case '\n': + case '\n': break; - case '\r': + case '\r': this->m->string_ignoring_newline = true; break; - default: + default: // PDF spec says backslash is ignored before anything else this->m->val += ch; break; } - } - else if (ch == '\\') - { + } else if (ch == '\\') { // last_char_was_bs is set/cleared below as appropriate - if (bs_num_count) - { + if (bs_num_count) { throw std::logic_error( "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " "when ch == '\\'"); } - } - else if (ch == '(') - { + } else if (ch == '(') { this->m->val += ch; ++this->m->string_depth; - } - else if ((ch == ')') && (--this->m->string_depth == 0)) - { + } else if ((ch == ')') && (--this->m->string_depth == 0)) { this->m->type = tt_string; this->m->state = st_token_ready; - } - else if (ch == '\r') - { + } else if (ch == '\r') { // CR by itself is converted to LF this->m->val += '\n'; - } - else if (ch == '\n') - { + } else if (ch == '\n') { // CR LF is converted to LF - if (! this->m->last_char_was_cr) - { + if (!this->m->last_char_was_cr) { this->m->val += ch; } - } - else - { + } else { this->m->val += ch; } this->m->last_char_was_cr = - ((! this->m->string_ignoring_newline) && (ch == '\r')); + ((!this->m->string_ignoring_newline) && (ch == '\r')); this->m->last_char_was_bs = - ((! this->m->last_char_was_bs) && (ch == '\\')); - } - else if (this->m->state == st_literal) - { - if (isDelimiter(ch)) - { + ((!this->m->last_char_was_bs) && (ch == '\\')); + } else if (this->m->state == st_literal) { + if (isDelimiter(ch)) { // A C-locale whitespace character or delimiter terminates // token. It is important to unread the whitespace // character even though it is ignored since it may be the @@ -521,41 +416,29 @@ QPDFTokenizer::presentCharacter(char ch) this->m->unread_char = true; this->m->char_to_unread = ch; this->m->state = st_token_ready; - } - else - { + } else { this->m->val += ch; } - } - else if (this->m->state == st_inline_image) - { + } else if (this->m->state == st_inline_image) { this->m->val += ch; size_t len = this->m->val.length(); - if (len == this->m->inline_image_bytes) - { + if (len == this->m->inline_image_bytes) { QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); this->m->type = tt_inline_image; this->m->inline_image_bytes = 0; this->m->state = st_token_ready; } - } - else - { + } else { handled = false; } - if (handled) - { + if (handled) { // okay - } - else if (this->m->state == st_in_hexstring) - { - if (ch == '>') - { + } else if (this->m->state == st_in_hexstring) { + if (ch == '>') { this->m->type = tt_string; this->m->state = st_token_ready; - if (this->m->val.length() % 2) - { + if (this->m->val.length() % 2) { // PDF spec says odd hexstrings have implicit // trailing 0. this->m->val += '0'; @@ -563,46 +446,35 @@ QPDFTokenizer::presentCharacter(char ch) char num[3]; num[2] = '\0'; std::string nval; - for (unsigned int i = 0; i < this->m->val.length(); i += 2) - { + for (unsigned int i = 0; i < this->m->val.length(); i += 2) { num[0] = this->m->val.at(i); - num[1] = this->m->val.at(i+1); + num[1] = this->m->val.at(i + 1); char nch = static_cast(strtol(num, 0, 16)); nval += nch; } this->m->val = nval; - } - else if (QUtil::is_hex_digit(ch)) - { + } else if (QUtil::is_hex_digit(ch)) { this->m->val += ch; - } - else if (isSpace(ch)) - { + } else if (isSpace(ch)) { // ignore - } - else - { + } else { this->m->type = tt_bad; QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); - this->m->error_message = std::string("invalid character (") + - ch + ") in hexstring"; + this->m->error_message = + std::string("invalid character (") + ch + ") in hexstring"; this->m->state = st_token_ready; } - } - else - { + } else { throw std::logic_error( "INTERNAL ERROR: invalid state while reading token"); } - if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) - { + if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) { resolveLiteral(); } - if (! (betweenTokens() || - ((this->m->state == st_token_ready) && this->m->unread_char))) - { + if (!(betweenTokens() || + ((this->m->state == st_token_ready) && this->m->unread_char))) { this->m->raw_val += orig_ch; } } @@ -610,25 +482,18 @@ QPDFTokenizer::presentCharacter(char ch) void QPDFTokenizer::presentEOF() { - if (this->m->state == st_literal) - { + if (this->m->state == st_literal) { QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); resolveLiteral(); - } - else if ((this->m->include_ignorable) && (this->m->state == st_in_space)) - { + } else if ( + (this->m->include_ignorable) && (this->m->state == st_in_space)) { this->m->type = tt_space; - } - else if ((this->m->include_ignorable) && (this->m->state == st_in_comment)) - { + } else if ( + (this->m->include_ignorable) && (this->m->state == st_in_comment)) { this->m->type = tt_comment; - } - else if (betweenTokens()) - { + } else if (betweenTokens()) { this->m->type = tt_eof; - } - else if (this->m->state != st_token_ready) - { + } else if (this->m->state != st_token_ready) { QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); this->m->type = tt_bad; this->m->error_message = "EOF while reading token"; @@ -640,8 +505,7 @@ QPDFTokenizer::presentEOF() void QPDFTokenizer::expectInlineImage(PointerHolder input) { - if (this->m->state != st_top) - { + if (this->m->state != st_top) { throw std::logic_error("QPDFTokenizer::expectInlineImage called" " when tokenizer is in improper state"); } @@ -652,8 +516,7 @@ QPDFTokenizer::expectInlineImage(PointerHolder input) void QPDFTokenizer::findEI(PointerHolder input) { - if (! input.get()) - { + if (!input.get()) { return; } @@ -671,11 +534,9 @@ QPDFTokenizer::findEI(PointerHolder input) bool okay = false; bool first_try = true; - while (! okay) - { + while (!okay) { QPDFWordTokenFinder f(input, "EI"); - if (! input->findFirst("EI", input->tell(), 0, f)) - { + if (!input->findFirst("EI", input->tell(), 0, f)) { break; } this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); @@ -689,21 +550,14 @@ QPDFTokenizer::findEI(PointerHolder input) // bits per pixel, and color space are all required as well as // a BI and ID. If we get 10 good tokens in a row or hit EOF, // we can be pretty sure we've found the actual EI. - for (int i = 0; i < 10; ++i) - { - QPDFTokenizer::Token t = - check.readToken(input, "checker", true); + for (int i = 0; i < 10; ++i) { + QPDFTokenizer::Token t = check.readToken(input, "checker", true); token_type_e type = t.getType(); - if (type == tt_eof) - { + if (type == tt_eof) { okay = true; - } - else if (type == tt_bad) - { + } else if (type == tt_bad) { found_bad = true; - } - else if (type == tt_word) - { + } else if (type == tt_word) { // The qpdf tokenizer lumps alphabetic and otherwise // uncategorized characters into "words". We recognize // strings of alphabetic characters as potential valid @@ -717,52 +571,41 @@ QPDFTokenizer::findEI(PointerHolder input) bool found_other = false; std::string value = t.getValue(); for (std::string::iterator iter = value.begin(); - iter != value.end(); ++iter) - { + iter != value.end(); + ++iter) { char ch = *iter; if (((ch >= 'a') && (ch <= 'z')) || - ((ch >= 'A') && (ch <= 'Z')) || - (ch == '*')) - { + ((ch >= 'A') && (ch <= 'Z')) || (ch == '*')) { // Treat '*' as alpha since there are valid // PDF operators that contain * along with // alphabetic characters. found_alpha = true; - } - else if ((static_cast(ch) < 32) && - (! isSpace(ch))) - { + } else if ( + (static_cast(ch) < 32) && (!isSpace(ch))) { // Compare ch as a signed char so characters // outside of 7-bit will be < 0. found_non_printable = true; break; - } - else - { + } else { found_other = true; } } - if (found_non_printable || (found_alpha && found_other)) - { + if (found_non_printable || (found_alpha && found_other)) { found_bad = true; } } - if (okay || found_bad) - { + if (okay || found_bad) { break; } } - if (! found_bad) - { + if (!found_bad) { okay = true; } - if (! okay) - { + if (!okay) { first_try = false; } } - if (okay && (! first_try)) - { + if (okay && (!first_try)) { QTC::TC("qpdf", "QPDFTokenizer found EI after more than one try"); } @@ -776,14 +619,15 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) bool ready = (this->m->state == st_token_ready); unread_char = this->m->unread_char; ch = this->m->char_to_unread; - if (ready) - { - if (this->m->type == tt_bad) - { + if (ready) { + if (this->m->type == tt_bad) { this->m->val = this->m->raw_val; } - token = Token(this->m->type, this->m->val, - this->m->raw_val, this->m->error_message); + token = Token( + this->m->type, + this->m->val, + this->m->raw_val, + this->m->error_message); this->m->reset(); } return ready; @@ -792,34 +636,32 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) bool QPDFTokenizer::betweenTokens() { - return ((this->m->state == st_top) || - ((! this->m->include_ignorable) && - ((this->m->state == st_in_comment) || - (this->m->state == st_in_space)))); + return ( + (this->m->state == st_top) || + ((!this->m->include_ignorable) && + ((this->m->state == st_in_comment) || + (this->m->state == st_in_space)))); } QPDFTokenizer::Token -QPDFTokenizer::readToken(PointerHolder input, - std::string const& context, - bool allow_bad, - size_t max_len) +QPDFTokenizer::readToken( + PointerHolder input, + std::string const& context, + bool allow_bad, + size_t max_len) { qpdf_offset_t offset = input->tell(); Token token; bool unread_char; char char_to_unread; bool presented_eof = false; - while (! getToken(token, unread_char, char_to_unread)) - { + while (!getToken(token, unread_char, char_to_unread)) { char ch; - if (input->read(&ch, 1) == 0) - { - if (! presented_eof) - { + if (input->read(&ch, 1) == 0) { + if (!presented_eof) { presentEOF(); presented_eof = true; - if ((this->m->type == tt_eof) && (! this->m->allow_eof)) - { + if ((this->m->type == tt_eof) && (!this->m->allow_eof)) { // Nothing in the qpdf library calls readToken // without allowEOF anymore, so this case is not // exercised. @@ -827,23 +669,17 @@ QPDFTokenizer::readToken(PointerHolder input, this->m->error_message = "unexpected EOF"; offset = input->getLastOffset(); } - } - else - { + } else { throw std::logic_error( "getToken returned false after presenting EOF"); } - } - else - { + } else { presentCharacter(ch); - if (betweenTokens() && (input->getLastOffset() == offset)) - { + if (betweenTokens() && (input->getLastOffset() == offset)) { ++offset; } if (max_len && (this->m->raw_val.length() >= max_len) && - (this->m->state != st_token_ready)) - { + (this->m->state != st_token_ready)) { // terminate this token now QTC::TC("qpdf", "QPDFTokenizer block long token"); this->m->type = tt_bad; @@ -854,26 +690,24 @@ QPDFTokenizer::readToken(PointerHolder input, } } - if (unread_char) - { + if (unread_char) { input->unreadCh(char_to_unread); } - if (token.getType() != tt_eof) - { + if (token.getType() != tt_eof) { input->setLastOffset(offset); } - if (token.getType() == tt_bad) - { - if (allow_bad) - { + if (token.getType() == tt_bad) { + if (allow_bad) { QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); - } - else - { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), - context, offset, token.getErrorMessage()); + } else { + throw QPDFExc( + qpdf_e_damaged_pdf, + input->getName(), + context, + offset, + token.getErrorMessage()); } } -- cgit v1.2.3-70-g09d2