aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/qpdf/BufferInputSource.hh25
-rw-r--r--include/qpdf/ClosedFileInputSource.hh21
-rw-r--r--include/qpdf/FileInputSource.hh21
-rw-r--r--include/qpdf/InputSource.hh68
-rw-r--r--include/qpdf/QPDFTokenizer.hh94
-rw-r--r--include/qpdf/QUtil.hh55
6 files changed, 189 insertions, 95 deletions
diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh
index b965704f..1a93815b 100644
--- a/include/qpdf/BufferInputSource.hh
+++ b/include/qpdf/BufferInputSource.hh
@@ -54,26 +54,11 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource
virtual void unreadCh(char ch);
private:
- class QPDF_DLL_PRIVATE Members
- {
- friend class BufferInputSource;
-
- public:
- QPDF_DLL
- ~Members() = default;
-
- private:
- Members(bool own_memory, std::string const& description, Buffer* buf);
- Members(Members const&) = delete;
-
- bool own_memory;
- std::string description;
- Buffer* buf;
- qpdf_offset_t cur_offset;
- qpdf_offset_t max_offset;
- };
-
- std::shared_ptr<Members> m;
+ bool own_memory;
+ std::string description;
+ Buffer* buf;
+ qpdf_offset_t cur_offset;
+ qpdf_offset_t max_offset;
};
#endif // QPDF_BUFFERINPUTSOURCE_HH
diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh
index c72a1df8..b23c2767 100644
--- a/include/qpdf/ClosedFileInputSource.hh
+++ b/include/qpdf/ClosedFileInputSource.hh
@@ -73,23 +73,10 @@ class QPDF_DLL_CLASS ClosedFileInputSource: public InputSource
QPDF_DLL_PRIVATE
void after();
- class QPDF_DLL_PRIVATE Members
- {
- friend class ClosedFileInputSource;
-
- public:
- QPDF_DLL
- ~Members() = default;
-
- private:
- Members(char const* filename);
-
- std::string filename;
- qpdf_offset_t offset;
- std::shared_ptr<FileInputSource> fis;
- bool stay_open;
- };
- std::shared_ptr<Members> m;
+ std::string filename;
+ qpdf_offset_t offset;
+ std::shared_ptr<FileInputSource> fis;
+ bool stay_open;
};
#endif // QPDF_CLOSEDFILEINPUTSOURCE_HH
diff --git a/include/qpdf/FileInputSource.hh b/include/qpdf/FileInputSource.hh
index f1e7edf4..9e0d57fb 100644
--- a/include/qpdf/FileInputSource.hh
+++ b/include/qpdf/FileInputSource.hh
@@ -58,24 +58,9 @@ class QPDF_DLL_CLASS FileInputSource: public InputSource
FileInputSource(FileInputSource const&) = delete;
FileInputSource& operator=(FileInputSource const&) = delete;
- class QPDF_DLL_PRIVATE Members
- {
- friend class FileInputSource;
-
- public:
- QPDF_DLL
- ~Members();
-
- private:
- Members(bool close_file);
- Members(Members const&) = delete;
-
- bool close_file;
- std::string filename;
- FILE* file;
- };
-
- std::shared_ptr<Members> m;
+ bool close_file;
+ std::string filename;
+ FILE* file;
};
#endif // QPDF_FILEINPUTSOURCE_HH
diff --git a/include/qpdf/InputSource.hh b/include/qpdf/InputSource.hh
index 9feb8ec3..e9d99cdb 100644
--- a/include/qpdf/InputSource.hh
+++ b/include/qpdf/InputSource.hh
@@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource
// efficient.
virtual void unreadCh(char ch) = 0;
+ // The following methods are for use by QPDFTokenizer
+ inline qpdf_offset_t fastTell();
+ inline bool fastRead(char&);
+ inline void fastUnread(bool);
+ inline void loadBuffer();
+
protected:
qpdf_offset_t last_offset;
@@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource
};
std::shared_ptr<Members> m;
+
+ // State for fast... methods
+ static const qpdf_offset_t buf_size = 128;
+ char buffer[buf_size];
+ qpdf_offset_t buf_len = 0;
+ qpdf_offset_t buf_idx = 0;
+ qpdf_offset_t buf_start = 0;
};
+inline void
+InputSource::loadBuffer()
+{
+ this->buf_idx = 0;
+ this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size));
+ // NB read sets last_offset
+ this->buf_start = this->last_offset;
+}
+
+inline qpdf_offset_t
+InputSource::fastTell()
+{
+ if (this->buf_len == 0) {
+ loadBuffer();
+ } else {
+ auto curr = tell();
+ if (curr < this->buf_start ||
+ curr >= (this->buf_start + this->buf_len)) {
+ loadBuffer();
+ } else {
+ this->last_offset = curr;
+ this->buf_idx = curr - this->buf_start;
+ }
+ }
+ return this->last_offset;
+}
+
+inline bool
+InputSource::fastRead(char& ch)
+{
+ // Before calling fastRead, fastTell must be called to prepare the buffer.
+ // Once reading is complete, fastUnread must be called to set the correct
+ // file position.
+ if (this->buf_idx < this->buf_len) {
+ ch = this->buffer[this->buf_idx];
+ ++(this->buf_idx);
+ ++(this->last_offset);
+ return true;
+
+ } else if (this->buf_len == 0) {
+ return false;
+ } else {
+ seek(this->buf_start + this->buf_len, SEEK_SET);
+ fastTell();
+ return fastRead(ch);
+ }
+}
+
+inline void
+InputSource::fastUnread(bool back)
+{
+ this->last_offset -= back ? 1 : 0;
+ seek(this->last_offset, SEEK_SET);
+}
+
#endif // QPDF_INPUTSOURCE_HH
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 2187f21e..33b2e710 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -193,60 +193,82 @@ class QPDFTokenizer
QPDFTokenizer(QPDFTokenizer const&) = delete;
QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
- void resolveLiteral();
bool isSpace(char);
bool isDelimiter(char);
void findEI(std::shared_ptr<InputSource> input);
enum state_e {
st_top,
+ st_in_hexstring,
+ st_in_string,
+ st_in_hexstring_2nd,
+ st_name,
+ st_literal,
st_in_space,
st_in_comment,
- st_in_string,
+ st_string_escape,
+ st_char_code,
+ st_string_after_cr,
st_lt,
st_gt,
- st_literal,
- st_in_hexstring,
st_inline_image,
+ st_sign,
+ st_number,
+ st_real,
+ st_decimal,
+ st_name_hex1,
+ st_name_hex2,
+ st_before_token,
st_token_ready
};
- class Members
- {
- friend class QPDFTokenizer;
-
- public:
- QPDF_DLL
- ~Members() = default;
+ void handleCharacter(char);
+ void inBeforeToken(char);
+ void inTop(char);
+ void inSpace(char);
+ void inComment(char);
+ void inString(char);
+ void inName(char);
+ void inLt(char);
+ void inGt(char);
+ void inStringAfterCR(char);
+ void inStringEscape(char);
+ void inLiteral(char);
+ void inCharCode(char);
+ void inHexstring(char);
+ void inHexstring2nd(char);
+ void inInlineImage(char);
+ void inTokenReady(char);
+ void inNameHex1(char);
+ void inNameHex2(char);
+ void inSign(char);
+ void inDecimal(char);
+ void inNumber(char);
+ void inReal(char);
+ void reset();
- private:
- Members();
- Members(Members const&) = delete;
- void reset();
+ // Lexer state
+ state_e state;
- // Lexer state
- state_e state;
+ bool allow_eof;
+ bool include_ignorable;
- bool allow_eof;
- bool include_ignorable;
+ // Current token accumulation
+ token_type_e type;
+ std::string val;
+ std::string raw_val;
+ std::string error_message;
+ bool before_token;
+ bool in_token;
+ char char_to_unread;
+ size_t inline_image_bytes;
+ bool bad;
- // Current token accumulation
- token_type_e type;
- std::string val;
- std::string raw_val;
- std::string error_message;
- bool unread_char;
- char char_to_unread;
- size_t inline_image_bytes;
-
- // State for strings
- int string_depth;
- bool string_ignoring_newline;
- char bs_num_register[4];
- bool last_char_was_bs;
- bool last_char_was_cr;
- };
- std::shared_ptr<Members> m;
+ // State for strings
+ int string_depth;
+ int char_code;
+ char hex_char;
+ int digit_count;
};
#endif // QPDFTOKENIZER_HH
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index 32aeae1f..41b89da4 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -25,6 +25,7 @@
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
#include <qpdf/Types.h>
+#include <cstring>
#include <functional>
#include <list>
#include <memory>
@@ -489,16 +490,16 @@ namespace QUtil
// classes without using ctype, which we avoid because of locale
// considerations.
QPDF_DLL
- bool is_hex_digit(char);
+ inline bool is_hex_digit(char);
QPDF_DLL
- bool is_space(char);
+ inline bool is_space(char);
QPDF_DLL
- bool is_digit(char);
+ inline bool is_digit(char);
QPDF_DLL
- bool is_number(char const*);
+ inline bool is_number(char const*);
// This method parses the numeric range syntax used by the qpdf
// command-line tool. May throw std::runtime_error.
@@ -526,4 +527,50 @@ namespace QUtil
#endif // QPDF_NO_WCHAR_T
}; // namespace QUtil
+inline bool
+QUtil::is_hex_digit(char ch)
+{
+ return (ch && (strchr("0123456789abcdefABCDEF", ch) != nullptr));
+}
+
+inline bool
+QUtil::is_space(char ch)
+{
+ return (ch && (strchr(" \f\n\r\t\v", ch) != nullptr));
+}
+
+inline bool
+QUtil::is_digit(char ch)
+{
+ return ((ch >= '0') && (ch <= '9'));
+}
+
+inline bool
+QUtil::is_number(char const* p)
+{
+ // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$
+ if (!*p) {
+ return false;
+ }
+ if ((*p == '-') || (*p == '+')) {
+ ++p;
+ }
+ bool found_dot = false;
+ bool found_digit = false;
+ for (; *p; ++p) {
+ if (*p == '.') {
+ if (found_dot) {
+ // only one dot
+ return false;
+ }
+ found_dot = true;
+ } else if (QUtil::is_digit(*p)) {
+ found_digit = true;
+ } else {
+ return false;
+ }
+ }
+ return found_digit;
+}
+
#endif // QUTIL_HH