aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-01-30 03:05:15 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commitec538792fac039daa9636f9c94000b7bc1f3a669 (patch)
treec8a755d6d3f66bed71327d8470cc857e02012cb5
parent5b5f45e9dde77df4e8da50ac2a102db6bfc2aa27 (diff)
downloadqpdf-ec538792fac039daa9636f9c94000b7bc1f3a669.tar.zst
Use inline image token type in tokenizer filter
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc69
-rw-r--r--libqpdf/qpdf/Pl_QPDFTokenizer.hh3
-rw-r--r--qpdf/qpdf.testcov2
3 files changed, 15 insertions, 59 deletions
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
index 8a54a640..3c737eec 100644
--- a/libqpdf/Pl_QPDFTokenizer.cc
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
just_wrote_nl(false),
last_char_was_cr(false),
unread_char(false),
- char_to_unread('\0'),
- in_inline_image(false)
+ char_to_unread('\0')
{
- memset(this->image_buf, 0, IMAGE_BUF_SIZE);
}
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
void
Pl_QPDFTokenizer::processChar(char ch)
{
- if (this->in_inline_image)
- {
- // Scan through the input looking for EI surrounded by
- // whitespace. If that pattern appears in the inline image's
- // representation, we're hosed, but this situation seems
- // excessively unlikely, and this code path is only followed
- // during content stream normalization, which is pretty much
- // used for debugging and human inspection of PDF files.
- memmove(this->image_buf,
- this->image_buf + 1,
- IMAGE_BUF_SIZE - 1);
- this->image_buf[IMAGE_BUF_SIZE - 1] = ch;
- if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&
- (this->image_buf[1] == 'E') &&
- (this->image_buf[2] == 'I') &&
- strchr(" \t\n\v\f\r", this->image_buf[3]))
- {
- // We've found an EI operator. We've already written the
- // EI operator to output; terminate with a newline
- // character and resume normal processing.
- writeNext("\n", 1);
- this->in_inline_image = false;
- QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");
- }
- else
- {
- writeNext(&ch, 1);
- }
- return;
- }
-
tokenizer.presentCharacter(ch);
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
@@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch)
if ((token.getType() == QPDFTokenizer::tt_word) &&
(token.getValue() == "ID"))
{
- // Suspend normal scanning until we find an EI token.
- this->in_inline_image = true;
- if (this->unread_char)
- {
- writeNext(&this->char_to_unread, 1);
- this->unread_char = false;
- }
+ QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
+ tokenizer.expectInlineImage();
}
}
else
@@ -171,21 +133,18 @@ void
Pl_QPDFTokenizer::finish()
{
this->tokenizer.presentEOF();
- if (! this->in_inline_image)
+ QPDFTokenizer::Token token;
+ if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
- QPDFTokenizer::Token token;
- if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
- {
- writeToken(token);
- if (unread_char)
- {
- if (this->char_to_unread == '\r')
- {
- this->char_to_unread = '\n';
- }
- writeNext(&this->char_to_unread, 1);
- }
- }
+ writeToken(token);
+ if (unread_char)
+ {
+ if (this->char_to_unread == '\r')
+ {
+ this->char_to_unread = '\n';
+ }
+ writeNext(&this->char_to_unread, 1);
+ }
}
if (! this->just_wrote_nl)
{
diff --git a/libqpdf/qpdf/Pl_QPDFTokenizer.hh b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
index 0d041577..e47064eb 100644
--- a/libqpdf/qpdf/Pl_QPDFTokenizer.hh
+++ b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline
bool last_char_was_cr;
bool unread_char;
char char_to_unread;
- bool in_inline_image;
- static int const IMAGE_BUF_SIZE = 4; // must be >= 4
- char image_buf[IMAGE_BUF_SIZE];
};
#endif // __PL_QPDFTOKENIZER_HH__
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 11ab767c..57fd4fd4 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0
QPDF_Stream getStreamData 0
QPDF_Stream expand filter abbreviation 0
qpdf-c called qpdf_read_memory 0
-Pl_QPDFTokenizer found EI 0
QPDF stream without newline 0
QPDF stream with CR only 0
QPDF stream with CRNL 0
@@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0
QPDF_Stream TIFF predictor 0
QPDFTokenizer EOF when not allowed 0
QPDFTokenizer inline image at EOF 0
+Pl_QPDFTokenizer found ID 0