aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/Pl_QPDFTokenizer.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-01-30 03:16:56 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commitba453ba4fff442dc03ea04a3328aaa58bb8e6923 (patch)
tree76055d9f429c7aedf395c63ea23561e8e2729696 /libqpdf/Pl_QPDFTokenizer.cc
parentec538792fac039daa9636f9c94000b7bc1f3a669 (diff)
downloadqpdf-ba453ba4fff442dc03ea04a3328aaa58bb8e6923.tar.zst
Use space tokens in tokenizer filter
Diffstat (limited to 'libqpdf/Pl_QPDFTokenizer.cc')
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc69
1 files changed, 36 insertions, 33 deletions
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
index 3c737eec..690d7bc9 100644
--- a/libqpdf/Pl_QPDFTokenizer.cc
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -8,12 +8,13 @@
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
Pipeline(identifier, next),
- newline_after_next_token(false),
just_wrote_nl(false),
last_char_was_cr(false),
unread_char(false),
char_to_unread('\0')
{
+ tokenizer.allowEOF();
+ tokenizer.includeIgnorable();
}
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
switch (token.getType())
{
+ case QPDFTokenizer::tt_space:
+ {
+ size_t len = value.length();
+ for (size_t i = 0; i < len; ++i)
+ {
+ char ch = value.at(i);
+ if (ch == '\r')
+ {
+ if ((i + 1 < len) && (value.at(i + 1) == '\n'))
+ {
+ // ignore
+ }
+ else
+ {
+ writeNext("\n", 1);
+ }
+ }
+ else
+ {
+ writeNext(&ch, 1);
+ }
+ }
+ }
+ value.clear();
+ break;
+
case QPDFTokenizer::tt_string:
value = QPDF_String(token.getValue()).unparse();
+
break;
case QPDFTokenizer::tt_name:
@@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch)
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
writeToken(token);
- if (this->newline_after_next_token)
- {
+ std::string value = token.getRawValue();
+ QPDFTokenizer::token_type_e token_type = token.getType();
+ if (((token_type == QPDFTokenizer::tt_string) ||
+ (token_type == QPDFTokenizer::tt_name)) &&
+ ((value.find('\r') != std::string::npos) ||
+ (value.find('\n') != std::string::npos)))
+ {
writeNext("\n", 1);
- this->newline_after_next_token = false;
}
if ((token.getType() == QPDFTokenizer::tt_word) &&
(token.getValue() == "ID"))
@@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch)
tokenizer.expectInlineImage();
}
}
- else
- {
- bool suppress = false;
- if ((ch == '\n') && (this->last_char_was_cr))
- {
- // Always ignore \n following \r
- suppress = true;
- }
-
- if ((this->last_char_was_cr = (ch == '\r')))
- {
- ch = '\n';
- }
-
- if (this->tokenizer.betweenTokens())
- {
- if (! suppress)
- {
- writeNext(&ch, 1);
- }
- }
- else
- {
- if (ch == '\n')
- {
- this->newline_after_next_token = true;
- }
- }
- }
}