diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/qpdf/BufferInputSource.hh | 25 | ||||
-rw-r--r-- | include/qpdf/ClosedFileInputSource.hh | 21 | ||||
-rw-r--r-- | include/qpdf/FileInputSource.hh | 21 | ||||
-rw-r--r-- | include/qpdf/InputSource.hh | 68 | ||||
-rw-r--r-- | include/qpdf/QPDFTokenizer.hh | 94 | ||||
-rw-r--r-- | include/qpdf/QUtil.hh | 55 |
6 files changed, 189 insertions, 95 deletions
diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh index b965704f..1a93815b 100644 --- a/include/qpdf/BufferInputSource.hh +++ b/include/qpdf/BufferInputSource.hh @@ -54,26 +54,11 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource virtual void unreadCh(char ch); private: - class QPDF_DLL_PRIVATE Members - { - friend class BufferInputSource; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(bool own_memory, std::string const& description, Buffer* buf); - Members(Members const&) = delete; - - bool own_memory; - std::string description; - Buffer* buf; - qpdf_offset_t cur_offset; - qpdf_offset_t max_offset; - }; - - std::shared_ptr<Members> m; + bool own_memory; + std::string description; + Buffer* buf; + qpdf_offset_t cur_offset; + qpdf_offset_t max_offset; }; #endif // QPDF_BUFFERINPUTSOURCE_HH diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh index c72a1df8..b23c2767 100644 --- a/include/qpdf/ClosedFileInputSource.hh +++ b/include/qpdf/ClosedFileInputSource.hh @@ -73,23 +73,10 @@ class QPDF_DLL_CLASS ClosedFileInputSource: public InputSource QPDF_DLL_PRIVATE void after(); - class QPDF_DLL_PRIVATE Members - { - friend class ClosedFileInputSource; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(char const* filename); - - std::string filename; - qpdf_offset_t offset; - std::shared_ptr<FileInputSource> fis; - bool stay_open; - }; - std::shared_ptr<Members> m; + std::string filename; + qpdf_offset_t offset; + std::shared_ptr<FileInputSource> fis; + bool stay_open; }; #endif // QPDF_CLOSEDFILEINPUTSOURCE_HH diff --git a/include/qpdf/FileInputSource.hh b/include/qpdf/FileInputSource.hh index f1e7edf4..9e0d57fb 100644 --- a/include/qpdf/FileInputSource.hh +++ b/include/qpdf/FileInputSource.hh @@ -58,24 +58,9 @@ class QPDF_DLL_CLASS FileInputSource: public InputSource FileInputSource(FileInputSource const&) = delete; FileInputSource& operator=(FileInputSource const&) = delete; - class QPDF_DLL_PRIVATE Members - { - friend class FileInputSource; - - public: - QPDF_DLL - ~Members(); - - private: - Members(bool close_file); - Members(Members const&) = delete; - - bool close_file; - std::string filename; - FILE* file; - }; - - std::shared_ptr<Members> m; + bool close_file; + std::string filename; + FILE* file; }; #endif // QPDF_FILEINPUTSOURCE_HH diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh index 9feb8ec3..e9d99cdb 100644 --- a/include/qpdf/InputSource.hh +++ b/include/qpdf/InputSource.hh @@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource // efficient. virtual void unreadCh(char ch) = 0; + // The following methods are for use by QPDFTokenizer + inline qpdf_offset_t fastTell(); + inline bool fastRead(char&); + inline void fastUnread(bool); + inline void loadBuffer(); + protected: qpdf_offset_t last_offset; @@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource }; std::shared_ptr<Members> m; + + // State for fast... methods + static const qpdf_offset_t buf_size = 128; + char buffer[buf_size]; + qpdf_offset_t buf_len = 0; + qpdf_offset_t buf_idx = 0; + qpdf_offset_t buf_start = 0; }; +inline void +InputSource::loadBuffer() +{ + this->buf_idx = 0; + this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size)); + // NB read sets last_offset + this->buf_start = this->last_offset; +} + +inline qpdf_offset_t +InputSource::fastTell() +{ + if (this->buf_len == 0) { + loadBuffer(); + } else { + auto curr = tell(); + if (curr < this->buf_start || + curr >= (this->buf_start + this->buf_len)) { + loadBuffer(); + } else { + this->last_offset = curr; + this->buf_idx = curr - this->buf_start; + } + } + return this->last_offset; +} + +inline bool +InputSource::fastRead(char& ch) +{ + // Before calling fastRead, fastTell must be called to prepare the buffer. + // Once reading is complete, fastUnread must be called to set the correct + // file position. + if (this->buf_idx < this->buf_len) { + ch = this->buffer[this->buf_idx]; + ++(this->buf_idx); + ++(this->last_offset); + return true; + + } else if (this->buf_len == 0) { + return false; + } else { + seek(this->buf_start + this->buf_len, SEEK_SET); + fastTell(); + return fastRead(ch); + } +} + +inline void +InputSource::fastUnread(bool back) +{ + this->last_offset -= back ? 1 : 0; + seek(this->last_offset, SEEK_SET); +} + #endif // QPDF_INPUTSOURCE_HH diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 2187f21e..33b2e710 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -193,60 +193,82 @@ class QPDFTokenizer QPDFTokenizer(QPDFTokenizer const&) = delete; QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; - void resolveLiteral(); bool isSpace(char); bool isDelimiter(char); void findEI(std::shared_ptr<InputSource> input); enum state_e { st_top, + st_in_hexstring, + st_in_string, + st_in_hexstring_2nd, + st_name, + st_literal, st_in_space, st_in_comment, - st_in_string, + st_string_escape, + st_char_code, + st_string_after_cr, st_lt, st_gt, - st_literal, - st_in_hexstring, st_inline_image, + st_sign, + st_number, + st_real, + st_decimal, + st_name_hex1, + st_name_hex2, + st_before_token, st_token_ready }; - class Members - { - friend class QPDFTokenizer; - - public: - QPDF_DLL - ~Members() = default; + void handleCharacter(char); + void inBeforeToken(char); + void inTop(char); + void inSpace(char); + void inComment(char); + void inString(char); + void inName(char); + void inLt(char); + void inGt(char); + void inStringAfterCR(char); + void inStringEscape(char); + void inLiteral(char); + void inCharCode(char); + void inHexstring(char); + void inHexstring2nd(char); + void inInlineImage(char); + void inTokenReady(char); + void inNameHex1(char); + void inNameHex2(char); + void inSign(char); + void inDecimal(char); + void inNumber(char); + void inReal(char); + void reset(); - private: - Members(); - Members(Members const&) = delete; - void reset(); + // Lexer state + state_e state; - // Lexer state - state_e state; + bool allow_eof; + bool include_ignorable; - bool allow_eof; - bool include_ignorable; + // Current token accumulation + token_type_e type; + std::string val; + std::string raw_val; + std::string error_message; + bool before_token; + bool in_token; + char char_to_unread; + size_t inline_image_bytes; + bool bad; - // Current token accumulation - token_type_e type; - std::string val; - std::string raw_val; - std::string error_message; - bool unread_char; - char char_to_unread; - size_t inline_image_bytes; - - // State for strings - int string_depth; - bool string_ignoring_newline; - char bs_num_register[4]; - bool last_char_was_bs; - bool last_char_was_cr; - }; - std::shared_ptr<Members> m; + // State for strings + int string_depth; + int char_code; + char hex_char; + int digit_count; }; #endif // QPDFTOKENIZER_HH diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index 32aeae1f..41b89da4 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -25,6 +25,7 @@ #include <qpdf/DLL.h> #include <qpdf/PointerHolder.hh> #include <qpdf/Types.h> +#include <cstring> #include <functional> #include <list> #include <memory> @@ -489,16 +490,16 @@ namespace QUtil // classes without using ctype, which we avoid because of locale // considerations. QPDF_DLL - bool is_hex_digit(char); + inline bool is_hex_digit(char); QPDF_DLL - bool is_space(char); + inline bool is_space(char); QPDF_DLL - bool is_digit(char); + inline bool is_digit(char); QPDF_DLL - bool is_number(char const*); + inline bool is_number(char const*); // This method parses the numeric range syntax used by the qpdf // command-line tool. May throw std::runtime_error. @@ -526,4 +527,50 @@ namespace QUtil #endif // QPDF_NO_WCHAR_T }; // namespace QUtil +inline bool +QUtil::is_hex_digit(char ch) +{ + return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr)); +} + +inline bool +QUtil::is_space(char ch) +{ + return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr)); +} + +inline bool +QUtil::is_digit(char ch) +{ + return ((ch >= '0') && (ch <= '9')); +} + +inline bool +QUtil::is_number(char const* p) +{ + // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ + if (!*p) { + return false; + } + if ((*p == '-') || (*p == '+')) { + ++p; + } + bool found_dot = false; + bool found_digit = false; + for (; *p; ++p) { + if (*p == '.') { + if (found_dot) { + // only one dot + return false; + } + found_dot = true; + } else if (QUtil::is_digit(*p)) { + found_digit = true; + } else { + return false; + } + } + return found_digit; +} + #endif // QUTIL_HH |