aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-01-30 02:57:04 +0100
committerJay Berkenbilt <ejb@ql.org>2018-02-19 03:05:46 +0100
commitfefe25030eaffdaf06a9e957b3255304682c71cf (patch)
treefa404200db521e085a711fa13f6952469665ae8e
parent2699ecf13e8559b136ded1986bf18e1a0a51011f (diff)
downloadqpdf-fefe25030eaffdaf06a9e957b3255304682c71cf.tar.zst
Inline image token type
-rw-r--r--ChangeLog71
-rw-r--r--include/qpdf/QPDFTokenizer.hh40
-rw-r--r--libqpdf/QPDFTokenizer.cc68
-rw-r--r--qpdf/qpdf.testcov17
-rw-r--r--qpdf/qtest/qpdf/tokens-maxlen.out650
-rw-r--r--qpdf/qtest/qpdf/tokens-no-ignorable.out319
-rw-r--r--qpdf/qtest/qpdf/tokens.out650
-rw-r--r--qpdf/qtest/qpdf/tokens.pdfbin9120 -> 9438 bytes
-rw-r--r--qpdf/test_tokenizer.cc18
9 files changed, 982 insertions, 851 deletions
diff --git a/ChangeLog b/ChangeLog
index e95e2370..e9dea347 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,34 +1,49 @@
2018-02-04 Jay Berkenbilt <ejb@ql.org>
* Significant lexer (tokenizer) enhancements. These are changes to
- the QPDFTokenizer class. These changes are of concern only to
- people who are operating with PDF files at the lexical layer
- using qpdf. They have little or no impact on most high-level
- interfaces or the command-line tool.
- * New token types tt_space and tt_comment to recognize
- whitespace and comments. this makes it possible to tokenize a
- PDF file or stream and preserve everything about it.
- * For backward compatibility, space and comment tokens are not
- returned by the tokenizer unless
- QPDFTokenizer.includeIgnorable() is called.
- * Better handling of null bytes. These are now included in space
- tokens rather than being their own "tt_word" tokens. This
- should have no impact on any correct PDF file and has no
- impact on output, but it may change offsets in some error
- messages when trying to parse contents of bad files. Under
- default operation, qpdf does not attempt to parse content
- streams, so this change is mostly invisible.
- * Bug fix to handling of bad tokens at ends of streams. Now,
- when allowEOF() has been called, these are treated as bad tokens
- (tt_bad or an exception, depending on invocation), and a
- separate tt_eof token is returned. Before the bad token
- contents were returned as the value of a tt_eof token. tt_eof
- tokens are always empty now.
- * Fix a bug that would, on rare occasions, report the offset in an
- error message in the wrong space because of spaces or comments
- adjacent to a bad token.
- * Clarify in comments exactly where the input source is
- positioned surrounding calls to readToken and getToken.
+ the QPDFTokenizer class. These changes are of concern only to
+ people who are operating with PDF files at the lexical layer using
+ qpdf. They have little or no impact on most high-level interfaces
+ or the command-line tool.
+
+ New token types tt_space and tt_comment to recognize whitespace
+ and comments. this makes it possible to tokenize a PDF file or
+ stream and preserve everything about it.
+
+ For backward compatibility, space and comment tokens are not
+ returned by the tokenizer unless QPDFTokenizer.includeIgnorable()
+ is called.
+
+ Better handling of null bytes. These are now included in space
+ tokens rather than being their own "tt_word" tokens. This should
+ have no impact on any correct PDF file and has no impact on
+ output, but it may change offsets in some error messages when
+ trying to parse contents of bad files. Under default operation,
+ qpdf does not attempt to parse content streams, so this change is
+ mostly invisible.
+
+ Bug fix to handling of bad tokens at ends of streams. Now, when
+ allowEOF() has been called, these are treated as bad tokens
+ (tt_bad or an exception, depending on invocation), and a
+ separate tt_eof token is returned. Before the bad token
+ contents were returned as the value of a tt_eof token. tt_eof
+ tokens are always empty now.
+
+ Fix a bug that would, on rare occasions, report the offset in an
+ error message in the wrong space because of spaces or comments
+ adjacent to a bad token.
+
+ Clarify in comments exactly where the input source is positioned
+ surrounding calls to readToken and getToken.
+
+ * Add a new token type for inline images. This token type is only
+ returned by QPDFTokenizer immediately following a call to
+ expectInlineImage(). This change includes internal refactoring of
+ a handful of places that all separately handled inline images, The
+ logic of detecting inline images in content streams is now handled
+ in one place in the code. Also we are more flexible about what
+ characters may surround the EI operator that marks the end of an
+ inline image.
2018-02-04 Jay Berkenbilt <ejb@ql.org>
diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh
index 7d7f6132..fe2e95f7 100644
--- a/include/qpdf/QPDFTokenizer.hh
+++ b/include/qpdf/QPDFTokenizer.hh
@@ -34,7 +34,8 @@ class QPDFTokenizer
public:
// Token type tt_eof is only returned of allowEOF() is called on
// the tokenizer. tt_eof was introduced in QPDF version 4.1.
- // tt_space and tt_comment were added in QPDF version 8.
+ // tt_space, tt_comment, and tt_inline_image were added in QPDF
+ // version 8.
enum token_type_e
{
tt_bad,
@@ -54,6 +55,7 @@ class QPDFTokenizer
tt_eof,
tt_space,
tt_comment,
+ tt_inline_image,
};
class Token
@@ -128,11 +130,17 @@ class QPDFTokenizer
QPDF_DLL
void includeIgnorable();
- // Mode of operation:
+ // There are two modes of operation: push and pull. The pull
+ // method is easier but requires an input source. The push method
+ // is more complicated but can be used to tokenize a stream of
+ // incoming characters in a pipeline.
- // Keep presenting characters and calling getToken() until
- // getToken() returns true. When it does, be sure to check
- // unread_ch and to unread ch if it is true.
+ // Push mode:
+
+ // Keep presenting characters with presentCharacter() and
+ // presentEOF() and calling getToken() until getToken() returns
+ // true. When it does, be sure to check unread_ch and to unread ch
+ // if it is true.
// It these are called when a token is available, an exception
// will be thrown.
@@ -155,15 +163,30 @@ class QPDFTokenizer
QPDF_DLL
bool betweenTokens();
- // Read a token from an input source. Context describes the
+ // Pull mode:
+
+ // Read a token from an input source. Context describes the
// context in which the token is being read and is used in the
- // exception thrown if there is an error.
+ // exception thrown if there is an error. After a token is read,
+ // the position of the input source returned by input->tell()
+ // points to just after the token, and the input source's "last
+ // offset" as returned by input->getLastOffset() points to the
+ // beginning of the token.
QPDF_DLL
Token readToken(PointerHolder<InputSource> input,
std::string const& context,
bool allow_bad = false,
size_t max_len = 0);
+ // Calling this method puts the tokenizer in a state for reading
+ // inline images. In that state, it will return all data up to and
+ // including the next EI token. After you call this method, the
+ // next call to readToken (or the token created next time getToken
+ // returns true) will either be tt_inline_image or tt_bad. This is
+ // the only way readToken returns a tt_inline_image token.
+ QPDF_DLL
+ void expectInlineImage();
+
private:
// Do not implement copy or assignment
QPDFTokenizer(QPDFTokenizer const&);
@@ -171,10 +194,11 @@ class QPDFTokenizer
void resolveLiteral();
bool isSpace(char);
+ bool isDelimiter(char);
enum state_e {
st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
- st_literal, st_in_hexstring, st_token_ready
+ st_literal, st_in_hexstring, st_inline_image, st_token_ready
};
class Members
diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc
index 776019c8..078b1af0 100644
--- a/libqpdf/QPDFTokenizer.cc
+++ b/libqpdf/QPDFTokenizer.cc
@@ -69,6 +69,12 @@ QPDFTokenizer::isSpace(char ch)
return ((ch == '\0') || QUtil::is_space(ch));
}
+bool
+QPDFTokenizer::isDelimiter(char ch)
+{
+ return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0);
+}
+
void
QPDFTokenizer::resolveLiteral()
{
@@ -95,7 +101,7 @@ QPDFTokenizer::resolveLiteral()
if (ch == '\0')
{
this->m->type = tt_bad;
- QTC::TC("qpdf", "QPDF_Tokenizer null in name");
+ QTC::TC("qpdf", "QPDFTokenizer null in name");
this->m->error_message =
"null character not allowed in name token";
nval += "#00";
@@ -108,7 +114,7 @@ QPDFTokenizer::resolveLiteral()
}
else
{
- QTC::TC("qpdf", "QPDF_Tokenizer bad name");
+ QTC::TC("qpdf", "QPDFTokenizer bad name");
this->m->type = tt_bad;
this->m->error_message = "invalid name token";
nval += *p;
@@ -209,7 +215,7 @@ QPDFTokenizer::presentCharacter(char ch)
if (ch == ')')
{
this->m->type = tt_bad;
- QTC::TC("qpdf", "QPDF_Tokenizer bad )");
+ QTC::TC("qpdf", "QPDFTokenizer bad )");
this->m->error_message = "unexpected )";
this->m->state = st_token_ready;
}
@@ -301,7 +307,7 @@ QPDFTokenizer::presentCharacter(char ch)
{
this->m->val = ">";
this->m->type = tt_bad;
- QTC::TC("qpdf", "QPDF_Tokenizer bad >");
+ QTC::TC("qpdf", "QPDFTokenizer bad >");
this->m->error_message = "unexpected >";
this->m->unread_char = true;
this->m->char_to_unread = ch;
@@ -403,7 +409,7 @@ QPDFTokenizer::presentCharacter(char ch)
}
else if (this->m->state == st_literal)
{
- if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
+ if (isDelimiter(ch))
{
// A C-locale whitespace character or delimiter terminates
// token. It is important to unread the whitespace
@@ -423,6 +429,25 @@ QPDFTokenizer::presentCharacter(char ch)
this->m->val += ch;
}
}
+ else if (this->m->state == st_inline_image)
+ {
+ size_t len = this->m->val.length();
+ if ((len >= 4) &&
+ isDelimiter(this->m->val.at(len-4)) &&
+ (this->m->val.at(len-3) == 'E') &&
+ (this->m->val.at(len-2) == 'I') &&
+ isDelimiter(this->m->val.at(len-1)))
+ {
+ this->m->type = tt_inline_image;
+ this->m->unread_char = true;
+ this->m->char_to_unread = ch;
+ this->m->state = st_token_ready;
+ }
+ else
+ {
+ this->m->val += ch;
+ }
+ }
else
{
handled = false;
@@ -468,7 +493,7 @@ QPDFTokenizer::presentCharacter(char ch)
else
{
this->m->type = tt_bad;
- QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character");
+ QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
this->m->error_message = std::string("invalid character (") +
ch + ") in hexstring";
this->m->state = st_token_ready;
@@ -495,9 +520,23 @@ QPDFTokenizer::presentCharacter(char ch)
void
QPDFTokenizer::presentEOF()
{
+ if (this->m->state == st_inline_image)
+ {
+ size_t len = this->m->val.length();
+ if ((len >= 3) &&
+ isDelimiter(this->m->val.at(len-3)) &&
+ (this->m->val.at(len-2) == 'E') &&
+ (this->m->val.at(len-1) == 'I'))
+ {
+ QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
+ this->m->type = tt_inline_image;
+ this->m->state = st_token_ready;
+ }
+ }
+
if (this->m->state == st_literal)
{
- QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token");
+ QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
resolveLiteral();
}
else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
@@ -514,7 +553,7 @@ QPDFTokenizer::presentEOF()
}
else if (this->m->state != st_token_ready)
{
- QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token");
+ QTC::TC("qpdf", "QPDFTokenizer EOF reading token");
this->m->type = tt_bad;
this->m->error_message = "EOF while reading token";
}
@@ -522,6 +561,17 @@ QPDFTokenizer::presentEOF()
this->m->state = st_token_ready;
}
+void
+QPDFTokenizer::expectInlineImage()
+{
+ if (this->m->state != st_top)
+ {
+ throw std::logic_error("QPDFTokenizer::expectInlineImage called"
+ " when tokenizer is in improper state");
+ }
+ this->m->state = st_inline_image;
+}
+
bool
QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
{
@@ -572,7 +622,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
presented_eof = true;
if ((this->m->type == tt_eof) && (! this->m->allow_eof))
{
- QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed");
+ QTC::TC("qpdf", "QPDFTokenizer EOF when not allowed");
this->m->type = tt_bad;
this->m->error_message = "unexpected EOF";
offset = input->getLastOffset();
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 914f6887..11ab767c 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -64,11 +64,11 @@ QPDF stream length not integer 0
QPDF missing endstream 0
QPDFObjectHandle bad dictionary close 0
QPDF can't find xref 0
-QPDF_Tokenizer bad ) 0
-QPDF_Tokenizer bad > 0
-QPDF_Tokenizer bad hexstring character 0
-QPDF_Tokenizer null in name 0
-QPDF_Tokenizer bad name 0
+QPDFTokenizer bad ) 0
+QPDFTokenizer bad > 0
+QPDFTokenizer bad hexstring character 0
+QPDFTokenizer null in name 0
+QPDFTokenizer bad name 0
QPDF_Stream invalid filter 0
QPDF UseOutlines but no Outlines 0
QPDFObjectHandle clone bool 0
@@ -233,8 +233,8 @@ QPDFWriter copy use_aes 1
QPDFObjectHandle indirect without context 0
QPDFObjectHandle trailing data in parse 0
qpdf pages encryption password 0
-QPDF_Tokenizer EOF reading token 0
-QPDF_Tokenizer EOF reading appendable token 0
+QPDFTokenizer EOF reading token 0
+QPDFTokenizer EOF reading appendable token 0
QPDFWriter extra header text no newline 0
QPDFWriter extra header text add newline 0
QPDF bogus 0 offset 0
@@ -302,4 +302,5 @@ qpdf-c called qpdf_set_compress_streams 0
qpdf-c called qpdf_set_preserve_unreferenced_objects 0
qpdf-c called qpdf_set_newline_before_endstream 0
QPDF_Stream TIFF predictor 0
-QPDF_Tokenizer EOF when not allowed 0
+QPDFTokenizer EOF when not allowed 0
+QPDFTokenizer inline image at EOF 0
diff --git a/qpdf/qtest/qpdf/tokens-maxlen.out b/qpdf/qtest/qpdf/tokens-maxlen.out
index 01283fc8..6eb8072b 100644
--- a/qpdf/qtest/qpdf/tokens-maxlen.out
+++ b/qpdf/qtest/qpdf/tokens-maxlen.out
@@ -222,307 +222,311 @@ skipping to endstream
7601: word: endstream
7610: space: \x0a
7611: word: endobj
-7617: space: \x0a\x0a
-7619: integer: 46
-7621: space:
-7622: integer: 0
-7623: space:
-7624: word: obj
-7627: space: \x0a
-7628: integer: 68
-7630: space: \x0a
-7631: word: endobj
-7637: space: \x0a\x0a
-7639: comment: %% Contents for page 6
-7661: space: \x0a
-7662: comment: %% Original object ID: 42 0
-7689: space: \x0a
-7690: integer: 47
-7692: space:
-7693: integer: 0
-7694: space:
-7695: word: obj
-7698: space: \x0a
-7699: dict_open: <<
-7701: space: \x0a
-7704: name: /Length
-7711: space:
-7712: integer: 48
-7714: space:
-7715: integer: 0
-7716: space:
-7717: word: R
-7718: space: \x0a
-7719: dict_close: >>
-7721: space: \x0a
-7722: word: stream
+7617: space: \x0a
+7618: comment: %QDF: ignore_newline
+7638: space: \x0a\x0a
+7640: integer: 46
+7642: space:
+7643: integer: 0
+7644: space:
+7645: word: obj
+7648: space: \x0a
+7649: integer: 67
+7651: space: \x0a
+7652: word: endobj
+7658: space: \x0a\x0a
+7660: comment: %% Contents for page 6
+7682: space: \x0a
+7683: comment: %% Original object ID: 42 0
+7710: space: \x0a
+7711: integer: 47
+7713: space:
+7714: integer: 0
+7715: space:
+7716: word: obj
+7719: space: \x0a
+7720: dict_open: <<
+7722: space: \x0a
+7725: name: /Length
+7732: space:
+7733: integer: 48
+7735: space:
+7736: integer: 0
+7737: space:
+7738: word: R
+7739: space: \x0a
+7740: dict_close: >>
+7742: space: \x0a
+7743: word: stream
skipping to endstream
-7773: word: endstream
-7782: space: \x0a
-7783: word: endobj
-7789: space: \x0a\x0a
-7791: integer: 48
-7793: space:
-7794: integer: 0
-7795: space:
-7796: word: obj
-7799: space: \x0a
-7800: integer: 44
-7802: space: \x0a
-7803: word: endobj
-7809: space: \x0a\x0a
-7811: comment: %% Contents for page 7
-7833: space: \x0a
-7834: comment: %% Original object ID: 43 0
-7861: space: \x0a
-7862: integer: 49
-7864: space:
-7865: integer: 0
-7866: space:
-7867: word: obj
-7870: space: \x0a
-7871: dict_open: <<
-7873: space: \x0a
-7876: name: /Length
-7883: space:
-7884: integer: 50
-7886: space:
-7887: integer: 0
-7888: space:
-7889: word: R
-7890: space: \x0a
-7891: dict_close: >>
-7893: space: \x0a
-7894: word: stream
+7794: word: endstream
+7803: space: \x0a
+7804: word: endobj
+7810: space: \x0a\x0a
+7812: integer: 48
+7814: space:
+7815: integer: 0
+7816: space:
+7817: word: obj
+7820: space: \x0a
+7821: integer: 44
+7823: space: \x0a
+7824: word: endobj
+7830: space: \x0a\x0a
+7832: comment: %% Contents for page 7
+7854: space: \x0a
+7855: comment: %% Original object ID: 43 0
+7882: space: \x0a
+7883: integer: 49
+7885: space:
+7886: integer: 0
+7887: space:
+7888: word: obj
+7891: space: \x0a
+7892: dict_open: <<
+7894: space: \x0a
+7897: name: /Length
+7904: space:
+7905: integer: 50
+7907: space:
+7908: integer: 0
+7909: space:
+7910: word: R
+7911: space: \x0a
+7912: dict_close: >>
+7914: space: \x0a
+7915: word: stream
skipping to endstream
-7945: word: endstream
-7954: space: \x0a
-7955: word: endobj
-7961: space: \x0a\x0a
-7963: integer: 50
-7965: space:
-7966: integer: 0
-7967: space:
-7968: word: obj
-7971: space: \x0a
-7972: integer: 44
-7974: space: \x0a
-7975: word: endobj
-7981: space: \x0a\x0a
-7983: comment: %% Contents for page 8
-8005: space: \x0a
-8006: comment: %% Original object ID: 44 0
-8033: space: \x0a
-8034: integer: 51
-8036: space:
-8037: integer: 0
-8038: space:
-8039: word: obj
-8042: space: \x0a
-8043: dict_open: <<
-8045: space: \x0a
-8048: name: /Length
-8055: space:
-8056: integer: 52
-8058: space:
-8059: integer: 0
-8060: space:
-8061: word: R
-8062: space: \x0a
-8063: dict_close: >>
-8065: space: \x0a
-8066: word: stream
+8241: word: endstream
+8250: space: \x0a
+8251: word: endobj
+8257: space: \x0a
+8258: comment: %QDF: ignore_newline
+8278: space: \x0a\x0a
+8280: integer: 50
+8282: space:
+8283: integer: 0
+8284: space:
+8285: word: obj
+8288: space: \x0a
+8289: integer: 318
+8292: space: \x0a
+8293: word: endobj
+8299: space: \x0a\x0a
+8301: comment: %% Contents for page 8
+8323: space: \x0a
+8324: comment: %% Original object ID: 44 0
+8351: space: \x0a
+8352: integer: 51
+8354: space:
+8355: integer: 0
+8356: space:
+8357: word: obj
+8360: space: \x0a
+8361: dict_open: <<
+8363: space: \x0a
+8366: name: /Length
+8373: space:
+8374: integer: 52
+8376: space:
+8377: integer: 0
+8378: space:
+8379: word: R
+8380: space: \x0a
+8381: dict_close: >>
+8383: space: \x0a
+8384: word: stream
skipping to endstream
-8117: word: endstream
-8126: space: \x0a
-8127: word: endobj
-8133: space: \x0a\x0a
-8135: integer: 52
-8137: space:
-8138: integer: 0
-8139: space:
-8140: word: obj
-8143: space: \x0a
-8144: integer: 44
-8146: space: \x0a
-8147: word: endobj
-8153: space: \x0a\x0a
-8155: comment: %% Contents for page 9
-8177: space: \x0a
-8178: comment: %% Original object ID: 45 0
-8205: space: \x0a
-8206: integer: 53
-8208: space:
-8209: integer: 0
-8210: space:
-8211: word: obj
-8214: space: \x0a
-8215: dict_open: <<
-8217: space: \x0a
-8220: name: /Length
-8227: space:
-8228: integer: 54
-8230: space:
-8231: integer: 0
-8232: space:
-8233: word: R
-8234: space: \x0a
-8235: dict_close: >>
-8237: space: \x0a
-8238: word: stream
-skipping to endstream
-8289: word: endstream
-8298: space: \x0a
-8299: word: endobj
-8305: space: \x0a\x0a
-8307: integer: 54
-8309: space:
-8310: integer: 0
-8311: space:
-8312: word: obj
-8315: space: \x0a
-8316: integer: 44
-8318: space: \x0a
-8319: word: endobj
-8325: space: \x0a\x0a
-8327: comment: %% Contents for page 10
-8350: space: \x0a
-8351: comment: %% Original object ID: 46 0
-8378: space: \x0a
-8379: integer: 55
-8381: space:
-8382: integer: 0
-8383: space:
-8384: word: obj
-8387: space: \x0a
-8388: dict_open: <<
-8390: space: \x0a
-8393: name: /Length
-8400: space:
-8401: integer: 56
-8403: space:
-8404: integer: 0
-8405: space:
-8406: word: R
-8407: space: \x0a
-8408: dict_close: >>
-8410: space: \x0a
-8411: word: stream
-skipping to endstream
-8462: word: endstream
-8471: space: \x0a
-8472: word: endobj
-8478: space: \x0a\x0a
-8480: integer: 56
-8482: space:
-8483: integer: 0
-8484: space:
-8485: word: obj
-8488: space: \x0a
-8489: integer: 44
-8491: space: \x0a
-8492: word: endobj
-8498: space: \x0a\x0a
-8500: comment: %% Contents for page 11
+8435: word: endstream
+8444: space: \x0a
+8445: word: endobj
+8451: space: \x0a\x0a
+8453: integer: 52
+8455: space:
+8456: integer: 0
+8457: space:
+8458: word: obj
+8461: space: \x0a
+8462: integer: 44
+8464: space: \x0a
+8465: word: endobj
+8471: space: \x0a\x0a
+8473: comment: %% Contents for page 9
+8495: space: \x0a
+8496: comment: %% Original object ID: 45 0
8523: space: \x0a
-8524: comment: %% Original object ID: 47 0
-8551: space: \x0a
-8552: integer: 57
-8554: space:
-8555: integer: 0
-8556: space:
-8557: word: obj
-8560: space: \x0a
-8561: dict_open: <<
-8563: space: \x0a
-8566: name: /Length
-8573: space:
-8574: integer: 58
-8576: space:
-8577: integer: 0
-8578: space:
-8579: word: R
-8580: space: \x0a
-8581: dict_close: >>
-8583: space: \x0a
-8584: word: stream
+8524: integer: 53
+8526: space:
+8527: integer: 0
+8528: space:
+8529: word: obj
+8532: space: \x0a
+8533: dict_open: <<
+8535: space: \x0a
+8538: name: /Length
+8545: space:
+8546: integer: 54
+8548: space:
+8549: integer: 0
+8550: space:
+8551: word: R
+8552: space: \x0a
+8553: dict_close: >>
+8555: space: \x0a
+8556: word: stream
skipping to endstream
-8635: word: endstream
-8644: space: \x0a
-8645: word: endobj
-8651: space: \x0a\x0a
-8653: integer: 58
-8655: space:
-8656: integer: 0
-8657: space:
-8658: word: obj
-8661: space: \x0a
-8662: integer: 44
-8664: space: \x0a
-8665: word: endobj
-8671: space: \x0a\x0a
-8673: integer: 59
-8675: space:
-8676: integer: 0
-8677: space:
-8678: word: obj
-8681: space: \x0a
-8682: dict_open: <<
-8684: space: \x0a
-8687: name: /Type
-8692: space:
-8693: name: /XRef
-8698: space: \x0a
-8701: name: /Length
-8708: space:
-8709: integer: 240
-8712: space: \x0a
-8715: name: /W
-8717: space:
-8718: array_open: [
-8719: space:
-8720: integer: 1
+8607: word: endstream
+8616: space: \x0a
+8617: word: endobj
+8623: space: \x0a\x0a
+8625: integer: 54
+8627: space:
+8628: integer: 0
+8629: space:
+8630: word: obj
+8633: space: \x0a
+8634: integer: 44
+8636: space: \x0a
+8637: word: endobj
+8643: space: \x0a\x0a
+8645: comment: %% Contents for page 10
+8668: space: \x0a
+8669: comment: %% Original object ID: 46 0
+8696: space: \x0a
+8697: integer: 55
+8699: space:
+8700: integer: 0
+8701: space:
+8702: word: obj
+8705: space: \x0a
+8706: dict_open: <<
+8708: space: \x0a
+8711: name: /Length
+8718: space:
+8719: integer: 56
8721: space:
-8722: integer: 2
+8722: integer: 0
8723: space:
-8724: integer: 1
-8725: space:
-8726: array_close: ]
-8727: space: \x0a
-8730: name: /Root
-8735: space:
-8736: integer: 2
-8737: space:
-8738: integer: 0
-8739: space:
-8740: word: R
-8741: space: \x0a
-8744: name: /Size
-8749: space:
-8750: integer: 60
-8752: space: \x0a
-8755: name: /ID
-8758: space:
-8759: array_open: [
-8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
-8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
-8828: array_close: ]
-8829: space: \x0a
-8830: dict_close: >>
-8832: space: \x0a
-8833: word: stream
+8724: word: R
+8725: space: \x0a
+8726: dict_close: >>
+8728: space: \x0a
+8729: word: stream
+skipping to endstream
+8780: word: endstream
+8789: space: \x0a
+8790: word: endobj
+8796: space: \x0a\x0a
+8798: integer: 56
+8800: space:
+8801: integer: 0
+8802: space:
+8803: word: obj
+8806: space: \x0a
+8807: integer: 44
+8809: space: \x0a
+8810: word: endobj
+8816: space: \x0a\x0a
+8818: comment: %% Contents for page 11
+8841: space: \x0a
+8842: comment: %% Original object ID: 47 0
+8869: space: \x0a
+8870: integer: 57
+8872: space:
+8873: integer: 0
+8874: space:
+8875: word: obj
+8878: space: \x0a
+8879: dict_open: <<
+8881: space: \x0a
+8884: name: /Length
+8891: space:
+8892: integer: 58
+8894: space:
+8895: integer: 0
+8896: space:
+8897: word: R
+8898: space: \x0a
+8899: dict_close: >>
+8901: space: \x0a
+8902: word: stream
+skipping to endstream
+8953: word: endstream
+8962: space: \x0a
+8963: word: endobj
+8969: space: \x0a\x0a
+8971: integer: 58
+8973: space:
+8974: integer: 0
+8975: space:
+8976: word: obj
+8979: space: \x0a
+8980: integer: 44
+8982: space: \x0a
+8983: word: endobj
+8989: space: \x0a\x0a
+8991: integer: 59
+8993: space:
+8994: integer: 0
+8995: space:
+8996: word: obj
+8999: space: \x0a
+9000: dict_open: <<
+9002: space: \x0a
+9005: name: /Type
+9010: space:
+9011: name: /XRef
+9016: space: \x0a
+9019: name: /Length
+9026: space:
+9027: integer: 240
+9030: space: \x0a
+9033: name: /W
+9035: space:
+9036: array_open: [
+9037: space:
+9038: integer: 1
+9039: space:
+9040: integer: 2
+9041: space:
+9042: integer: 1
+9043: space:
+9044: array_close: ]
+9045: space: \x0a
+9048: name: /Root
+9053: space:
+9054: integer: 2
+9055: space:
+9056: integer: 0
+9057: space:
+9058: word: R
+9059: space: \x0a
+9062: name: /Size
+9067: space:
+9068: integer: 60
+9070: space: \x0a
+9073: name: /ID
+9076: space:
+9077: array_open: [
+9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
+9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
+9146: array_close: ]
+9147: space: \x0a
+9148: dict_close: >>
+9150: space: \x0a
+9151: word: stream
skipping to endstream
-9081: word: endstream
-9090: space: \x0a
-9091: word: endobj
-9097: space: \x0a\x0a
-9099: word: startxref
-9108: space: \x0a
-9109: integer: 8673
-9113: space: \x0a
-9114: comment: %%EOF
-9119: space: \x0a
-9120: eof
+9399: word: endstream
+9408: space: \x0a
+9409: word: endobj
+9415: space: \x0a\x0a
+9417: word: startxref
+9426: space: \x0a
+9427: integer: 8991
+9431: space: \x0a
+9432: comment: %%EOF
+9437: space: \x0a
+9438: eof
--- END FILE ---
--- BEGIN PAGE 1 ---
0: word: BT
@@ -595,9 +599,7 @@ skipping to endstream
103: dict_close: >>
105: space: \x0a
106: word: ID
-skipping to EI
-352: word: EI
-354: space: \x0a
+108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
355: word: BT
357: space: \x0a
360: name: /F1
@@ -743,13 +745,11 @@ skipping to EI
47: word: ET
49: space: \x0a\x00\x0a
52: name: /ThisMustBeLast
-67: space: \x0a
-68: eof
+67: eof
--- END PAGE 5 ---
--- BEGIN PAGE 6 ---
0: word: ID
-skipping to EI
-EI not found
+EI not found; resuming normal scanning
2: space: \x0a
5: name: /F1
8: space:
@@ -772,27 +772,37 @@ EI not found
44: eof
--- END PAGE 6 ---
--- BEGIN PAGE 7 ---
-0: word: BT
-2: space: \x0a
-5: name: /F1
-8: space:
-9: integer: 24
-11: space:
-12: word: Tf
-14: space: \x0a
-17: integer: 72
+0: name: /potato
+7: space: \x0a
+8: word: BI
+10: space: \x0a
+11: name: /CS
+14: space:
+15: name: /G
+17: name: /W
19: space:
-20: integer: 720
-23: space:
-24: word: Td
-26: space: \x0a
-29: string: Potato (raw: (Potato))
-37: space:
-38: word: Tj
-40: space: \x0a
-41: word: ET
-43: space: \x0a
-44: eof
+20: integer: 66
+22: name: /H
+24: space:
+25: integer: 47
+27: name: /BPC
+31: space:
+32: integer: 8
+33: name: /F
+35: name: /Fl
+38: name: /DP
+41: dict_open: <<
+43: name: /Predictor
+53: space:
+54: integer: 15
+56: name: /Columns
+64: space:
+65: integer: 66
+67: dict_close: >>
+69: space: \x0a
+70: word: ID
+72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
+318: eof
--- END PAGE 7 ---
--- BEGIN PAGE 8 ---
0: word: BT
diff --git a/qpdf/qtest/qpdf/tokens-no-ignorable.out b/qpdf/qtest/qpdf/tokens-no-ignorable.out
index 2ad1e3a6..3bbef579 100644
--- a/qpdf/qtest/qpdf/tokens-no-ignorable.out
+++ b/qpdf/qtest/qpdf/tokens-no-ignorable.out
@@ -101,152 +101,152 @@ skipping to endstream
skipping to endstream
7601: word: endstream
7611: word: endobj
-7619: integer: 46
-7622: integer: 0
-7624: word: obj
-7628: integer: 68
-7631: word: endobj
-7690: integer: 47
-7693: integer: 0
-7695: word: obj
-7699: dict_open: <<
-7704: name: /Length
-7712: integer: 48
-7715: integer: 0
-7717: word: R
-7719: dict_close: >>
-7722: word: stream
+7640: integer: 46
+7643: integer: 0
+7645: word: obj
+7649: integer: 67
+7652: word: endobj
+7711: integer: 47
+7714: integer: 0
+7716: word: obj
+7720: dict_open: <<
+7725: name: /Length
+7733: integer: 48
+7736: integer: 0
+7738: word: R
+7740: dict_close: >>
+7743: word: stream
skipping to endstream
-7773: word: endstream
-7783: word: endobj
-7791: integer: 48
-7794: integer: 0
-7796: word: obj
-7800: integer: 44
-7803: word: endobj
-7862: integer: 49
-7865: integer: 0
-7867: word: obj
-7871: dict_open: <<
-7876: name: /Length
-7884: integer: 50
-7887: integer: 0
-7889: word: R
-7891: dict_close: >>
-7894: word: stream
+7794: word: endstream
+7804: word: endobj
+7812: integer: 48
+7815: integer: 0
+7817: word: obj
+7821: integer: 44
+7824: word: endobj
+7883: integer: 49
+7886: integer: 0
+7888: word: obj
+7892: dict_open: <<
+7897: name: /Length
+7905: integer: 50
+7908: integer: 0
+7910: word: R
+7912: dict_close: >>
+7915: word: stream
skipping to endstream
-7945: word: endstream
-7955: word: endobj
-7963: integer: 50
-7966: integer: 0
-7968: word: obj
-7972: integer: 44
-7975: word: endobj
-8034: integer: 51
-8037: integer: 0
-8039: word: obj
-8043: dict_open: <<
-8048: name: /Length
-8056: integer: 52
-8059: integer: 0
-8061: word: R
-8063: dict_close: >>
-8066: word: stream
+8241: word: endstream
+8251: word: endobj
+8280: integer: 50
+8283: integer: 0
+8285: word: obj
+8289: integer: 318
+8293: word: endobj
+8352: integer: 51
+8355: integer: 0
+8357: word: obj
+8361: dict_open: <<
+8366: name: /Length
+8374: integer: 52
+8377: integer: 0
+8379: word: R
+8381: dict_close: >>
+8384: word: stream
skipping to endstream
-8117: word: endstream
-8127: word: endobj
-8135: integer: 52
-8138: integer: 0
-8140: word: obj
-8144: integer: 44
-8147: word: endobj
-8206: integer: 53
-8209: integer: 0
-8211: word: obj
-8215: dict_open: <<
-8220: name: /Length
-8228: integer: 54
-8231: integer: 0
-8233: word: R
-8235: dict_close: >>
-8238: word: stream
+8435: word: endstream
+8445: word: endobj
+8453: integer: 52
+8456: integer: 0
+8458: word: obj
+8462: integer: 44
+8465: word: endobj
+8524: integer: 53
+8527: integer: 0
+8529: word: obj
+8533: dict_open: <<
+8538: name: /Length
+8546: integer: 54
+8549: integer: 0
+8551: word: R
+8553: dict_close: >>
+8556: word: stream
skipping to endstream
-8289: word: endstream
-8299: word: endobj
-8307: integer: 54
-8310: integer: 0
-8312: word: obj
-8316: integer: 44
-8319: word: endobj
-8379: integer: 55
-8382: integer: 0
-8384: word: obj
-8388: dict_open: <<
-8393: name: /Length
-8401: integer: 56
-8404: integer: 0
-8406: word: R
-8408: dict_close: >>
-8411: word: stream
+8607: word: endstream
+8617: word: endobj
+8625: integer: 54
+8628: integer: 0
+8630: word: obj
+8634: integer: 44
+8637: word: endobj
+8697: integer: 55
+8700: integer: 0
+8702: word: obj
+8706: dict_open: <<
+8711: name: /Length
+8719: integer: 56
+8722: integer: 0
+8724: word: R
+8726: dict_close: >>
+8729: word: stream
skipping to endstream
-8462: word: endstream
-8472: word: endobj
-8480: integer: 56
-8483: integer: 0
-8485: word: obj
-8489: integer: 44
-8492: word: endobj
-8552: integer: 57
-8555: integer: 0
-8557: word: obj
-8561: dict_open: <<
-8566: name: /Length
-8574: integer: 58
-8577: integer: 0
-8579: word: R
-8581: dict_close: >>
-8584: word: stream
+8780: word: endstream
+8790: word: endobj
+8798: integer: 56
+8801: integer: 0
+8803: word: obj
+8807: integer: 44
+8810: word: endobj
+8870: integer: 57
+8873: integer: 0
+8875: word: obj
+8879: dict_open: <<
+8884: name: /Length
+8892: integer: 58
+8895: integer: 0
+8897: word: R
+8899: dict_close: >>
+8902: word: stream
skipping to endstream
-8635: word: endstream
-8645: word: endobj
-8653: integer: 58
-8656: integer: 0
-8658: word: obj
-8662: integer: 44
-8665: word: endobj
-8673: integer: 59
-8676: integer: 0
-8678: word: obj
-8682: dict_open: <<
-8687: name: /Type
-8693: name: /XRef
-8701: name: /Length
-8709: integer: 240
-8715: name: /W
-8718: array_open: [
-8720: integer: 1
-8722: integer: 2
-8724: integer: 1
-8726: array_close: ]
-8730: name: /Root
-8736: integer: 2
-8738: integer: 0
-8740: word: R
-8744: name: /Size
-8750: integer: 60
-8755: name: /ID
-8759: array_open: [
-8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
-8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
-8828: array_close: ]
-8830: dict_close: >>
-8833: word: stream
+8953: word: endstream
+8963: word: endobj
+8971: integer: 58
+8974: integer: 0
+8976: word: obj
+8980: integer: 44
+8983: word: endobj
+8991: integer: 59
+8994: integer: 0
+8996: word: obj
+9000: dict_open: <<
+9005: name: /Type
+9011: name: /XRef
+9019: name: /Length
+9027: integer: 240
+9033: name: /W
+9036: array_open: [
+9038: integer: 1
+9040: integer: 2
+9042: integer: 1
+9044: array_close: ]
+9048: name: /Root
+9054: integer: 2
+9056: integer: 0
+9058: word: R
+9062: name: /Size
+9068: integer: 60
+9073: name: /ID
+9077: array_open: [
+9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
+9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
+9146: array_close: ]
+9148: dict_close: >>
+9151: word: stream
skipping to endstream
-9081: word: endstream
-9091: word: endobj
-9099: word: startxref
-9109: integer: 8673
-9120: eof
+9399: word: endstream
+9409: word: endobj
+9417: word: startxref
+9427: integer: 8991
+9438: eof
--- END FILE ---
--- BEGIN PAGE 1 ---
0: word: BT
@@ -291,8 +291,7 @@ skipping to endstream
101: integer: 66
103: dict_close: >>
106: word: ID
-skipping to EI
-352: word: EI
+108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
355: word: BT
360: name: /F1
364: integer: 24
@@ -374,12 +373,11 @@ skipping to EI
44: word: Tj
47: word: ET
52: name: /ThisMustBeLast
-68: eof
+67: eof
--- END PAGE 5 ---
--- BEGIN PAGE 6 ---
0: word: ID
-skipping to EI
-EI not found
+EI not found; resuming normal scanning
5: name: /F1
9: integer: 24
12: word: Tf
@@ -392,17 +390,28 @@ EI not found
44: eof
--- END PAGE 6 ---
--- BEGIN PAGE 7 ---
-0: word: BT
-5: name: /F1
-9: integer: 24
-12: word: Tf
-17: integer: 72
-20: integer: 720
-24: word: Td
-29: string: Potato (raw: (Potato))
-38: word: Tj
-41: word: ET
-44: eof
+0: name: /potato
+8: word: BI
+11: name: /CS
+15: name: /G
+17: name: /W
+20: integer: 66
+22: name: /H
+25: integer: 47
+27: name: /BPC
+32: integer: 8
+33: name: /F
+35: name: /Fl
+38: name: /DP
+41: dict_open: <<
+43: name: /Predictor
+54: integer: 15
+56: name: /Columns
+65: integer: 66
+67: dict_close: >>
+70: word: ID
+72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
+318: eof
--- END PAGE 7 ---
--- BEGIN PAGE 8 ---
0: word: BT
diff --git a/qpdf/qtest/qpdf/tokens.out b/qpdf/qtest/qpdf/tokens.out
index 2e08f3e0..ee1f6d3e 100644
--- a/qpdf/qtest/qpdf/tokens.out
+++ b/qpdf/qtest/qpdf/tokens.out
@@ -222,307 +222,311 @@ skipping to endstream
7601: word: endstream
7610: space: \x0a
7611: word: endobj
-7617: space: \x0a\x0a
-7619: integer: 46
-7621: space:
-7622: integer: 0
-7623: space:
-7624: word: obj
-7627: space: \x0a
-7628: integer: 68
-7630: space: \x0a
-7631: word: endobj
-7637: space: \x0a\x0a
-7639: comment: %% Contents for page 6
-7661: space: \x0a
-7662: comment: %% Original object ID: 42 0
-7689: space: \x0a
-7690: integer: 47
-7692: space:
-7693: integer: 0
-7694: space:
-7695: word: obj
-7698: space: \x0a
-7699: dict_open: <<
-7701: space: \x0a
-7704: name: /Length
-7711: space:
-7712: integer: 48
-7714: space:
-7715: integer: 0
-7716: space:
-7717: word: R
-7718: space: \x0a
-7719: dict_close: >>
-7721: space: \x0a
-7722: word: stream
+7617: space: \x0a
+7618: comment: %QDF: ignore_newline
+7638: space: \x0a\x0a
+7640: integer: 46
+7642: space:
+7643: integer: 0
+7644: space:
+7645: word: obj
+7648: space: \x0a
+7649: integer: 67
+7651: space: \x0a
+7652: word: endobj
+7658: space: \x0a\x0a
+7660: comment: %% Contents for page 6
+7682: space: \x0a
+7683: comment: %% Original object ID: 42 0
+7710: space: \x0a
+7711: integer: 47
+7713: space:
+7714: integer: 0
+7715: space:
+7716: word: obj
+7719: space: \x0a
+7720: dict_open: <<
+7722: space: \x0a
+7725: name: /Length
+7732: space:
+7733: integer: 48
+7735: space:
+7736: integer: 0
+7737: space:
+7738: word: R
+7739: space: \x0a
+7740: dict_close: >>
+7742: space: \x0a
+7743: word: stream
skipping to endstream
-7773: word: endstream
-7782: space: \x0a
-7783: word: endobj
-7789: space: \x0a\x0a
-7791: integer: 48
-7793: space:
-7794: integer: 0
-7795: space:
-7796: word: obj
-7799: space: \x0a
-7800: integer: 44
-7802: space: \x0a
-7803: word: endobj
-7809: space: \x0a\x0a
-7811: comment: %% Contents for page 7
-7833: space: \x0a
-7834: comment: %% Original object ID: 43 0
-7861: space: \x0a
-7862: integer: 49
-7864: space:
-7865: integer: 0
-7866: space:
-7867: word: obj
-7870: space: \x0a
-7871: dict_open: <<
-7873: space: \x0a
-7876: name: /Length
-7883: space:
-7884: integer: 50
-7886: space:
-7887: integer: 0
-7888: space:
-7889: word: R
-7890: space: \x0a
-7891: dict_close: >>
-7893: space: \x0a
-7894: word: stream
+7794: word: endstream
+7803: space: \x0a
+7804: word: endobj
+7810: space: \x0a\x0a
+7812: integer: 48
+7814: space:
+7815: integer: 0
+7816: space:
+7817: word: obj
+7820: space: \x0a
+7821: integer: 44
+7823: space: \x0a
+7824: word: endobj
+7830: space: \x0a\x0a
+7832: comment: %% Contents for page 7
+7854: space: \x0a
+7855: comment: %% Original object ID: 43 0
+7882: space: \x0a
+7883: integer: 49
+7885: space:
+7886: integer: 0
+7887: space:
+7888: word: obj
+7891: space: \x0a
+7892: dict_open: <<
+7894: space: \x0a
+7897: name: /Length
+7904: space:
+7905: integer: 50
+7907: space:
+7908: integer: 0
+7909: space:
+7910: word: R
+7911: space: \x0a
+7912: dict_close: >>
+7914: space: \x0a
+7915: word: stream
skipping to endstream
-7945: word: endstream
-7954: space: \x0a
-7955: word: endobj
-7961: space: \x0a\x0a
-7963: integer: 50
-7965: space:
-7966: integer: 0
-7967: space:
-7968: word: obj
-7971: space: \x0a
-7972: integer: 44
-7974: space: \x0a
-7975: word: endobj
-7981: space: \x0a\x0a
-7983: comment: %% Contents for page 8
-8005: space: \x0a
-8006: comment: %% Original object ID: 44 0
-8033: space: \x0a
-8034: integer: 51
-8036: space:
-8037: integer: 0
-8038: space:
-8039: word: obj
-8042: space: \x0a
-8043: dict_open: <<
-8045: space: \x0a
-8048: name: /Length
-8055: space:
-8056: integer: 52
-8058: space:
-8059: integer: 0
-8060: space:
-8061: word: R
-8062: space: \x0a
-8063: dict_close: >>
-8065: space: \x0a
-8066: word: stream
+8241: word: endstream
+8250: space: \x0a
+8251: word: endobj
+8257: space: \x0a
+8258: comment: %QDF: ignore_newline
+8278: space: \x0a\x0a
+8280: integer: 50
+8282: space:
+8283: integer: 0
+8284: space:
+8285: word: obj
+8288: space: \x0a
+8289: integer: 318
+8292: space: \x0a
+8293: word: endobj
+8299: space: \x0a\x0a
+8301: comment: %% Contents for page 8
+8323: space: \x0a
+8324: comment: %% Original object ID: 44 0
+8351: space: \x0a
+8352: integer: 51
+8354: space:
+8355: integer: 0
+8356: space:
+8357: word: obj
+8360: space: \x0a
+8361: dict_open: <<
+8363: space: \x0a
+8366: name: /Length
+8373: space:
+8374: integer: 52
+8376: space:
+8377: integer: 0
+8378: space:
+8379: word: R
+8380: space: \x0a
+8381: dict_close: >>
+8383: space: \x0a
+8384: word: stream
skipping to endstream
-8117: word: endstream
-8126: space: \x0a
-8127: word: endobj
-8133: space: \x0a\x0a
-8135: integer: 52
-8137: space:
-8138: integer: 0
-8139: space:
-8140: word: obj
-8143: space: \x0a
-8144: integer: 44
-8146: space: \x0a
-8147: word: endobj
-8153: space: \x0a\x0a
-8155: comment: %% Contents for page 9
-8177: space: \x0a
-8178: comment: %% Original object ID: 45 0
-8205: space: \x0a
-8206: integer: 53
-8208: space:
-8209: integer: 0
-8210: space:
-8211: word: obj
-8214: space: \x0a
-8215: dict_open: <<
-8217: space: \x0a
-8220: name: /Length
-8227: space:
-8228: integer: 54
-8230: space:
-8231: integer: 0
-8232: space:
-8233: word: R
-8234: space: \x0a
-8235: dict_close: >>
-8237: space: \x0a
-8238: word: stream
-skipping to endstream
-8289: word: endstream
-8298: space: \x0a
-8299: word: endobj
-8305: space: \x0a\x0a
-8307: integer: 54
-8309: space:
-8310: integer: 0
-8311: space:
-8312: word: obj
-8315: space: \x0a
-8316: integer: 44
-8318: space: \x0a
-8319: word: endobj
-8325: space: \x0a\x0a
-8327: comment: %% Contents for page 10
-8350: space: \x0a
-8351: comment: %% Original object ID: 46 0
-8378: space: \x0a
-8379: integer: 55
-8381: space:
-8382: integer: 0
-8383: space:
-8384: word: obj
-8387: space: \x0a
-8388: dict_open: <<
-8390: space: \x0a
-8393: name: /Length
-8400: space:
-8401: integer: 56
-8403: space:
-8404: integer: 0
-8405: space:
-8406: word: R
-8407: space: \x0a
-8408: dict_close: >>
-8410: space: \x0a
-8411: word: stream
-skipping to endstream
-8462: word: endstream
-8471: space: \x0a
-8472: word: endobj
-8478: space: \x0a\x0a
-8480: integer: 56
-8482: space:
-8483: integer: 0
-8484: space:
-8485: word: obj
-8488: space: \x0a
-8489: integer: 44
-8491: space: \x0a
-8492: word: endobj
-8498: space: \x0a\x0a
-8500: comment: %% Contents for page 11
+8435: word: endstream
+8444: space: \x0a
+8445: word: endobj
+8451: space: \x0a\x0a
+8453: integer: 52
+8455: space:
+8456: integer: 0
+8457: space:
+8458: word: obj
+8461: space: \x0a
+8462: integer: 44
+8464: space: \x0a
+8465: word: endobj
+8471: space: \x0a\x0a
+8473: comment: %% Contents for page 9
+8495: space: \x0a
+8496: comment: %% Original object ID: 45 0
8523: space: \x0a
-8524: comment: %% Original object ID: 47 0
-8551: space: \x0a
-8552: integer: 57
-8554: space:
-8555: integer: 0
-8556: space:
-8557: word: obj
-8560: space: \x0a
-8561: dict_open: <<
-8563: space: \x0a
-8566: name: /Length
-8573: space:
-8574: integer: 58
-8576: space:
-8577: integer: 0
-8578: space:
-8579: word: R
-8580: space: \x0a
-8581: dict_close: >>
-8583: space: \x0a
-8584: word: stream
+8524: integer: 53
+8526: space:
+8527: integer: 0
+8528: space:
+8529: word: obj
+8532: space: \x0a
+8533: dict_open: <<
+8535: space: \x0a
+8538: name: /Length
+8545: space:
+8546: integer: 54
+8548: space:
+8549: integer: 0
+8550: space:
+8551: word: R
+8552: space: \x0a
+8553: dict_close: >>
+8555: space: \x0a
+8556: word: stream
skipping to endstream
-8635: word: endstream
-8644: space: \x0a
-8645: word: endobj
-8651: space: \x0a\x0a
-8653: integer: 58
-8655: space:
-8656: integer: 0
-8657: space:
-8658: word: obj
-8661: space: \x0a
-8662: integer: 44
-8664: space: \x0a
-8665: word: endobj
-8671: space: \x0a\x0a
-8673: integer: 59
-8675: space:
-8676: integer: 0
-8677: space:
-8678: word: obj
-8681: space: \x0a
-8682: dict_open: <<
-8684: space: \x0a
-8687: name: /Type
-8692: space:
-8693: name: /XRef
-8698: space: \x0a
-8701: name: /Length
-8708: space:
-8709: integer: 240
-8712: space: \x0a
-8715: name: /W
-8717: space:
-8718: array_open: [
-8719: space:
-8720: integer: 1
+8607: word: endstream
+8616: space: \x0a
+8617: word: endobj
+8623: space: \x0a\x0a
+8625: integer: 54
+8627: space:
+8628: integer: 0
+8629: space:
+8630: word: obj
+8633: space: \x0a
+8634: integer: 44
+8636: space: \x0a
+8637: word: endobj
+8643: space: \x0a\x0a
+8645: comment: %% Contents for page 10
+8668: space: \x0a
+8669: comment: %% Original object ID: 46 0
+8696: space: \x0a
+8697: integer: 55
+8699: space:
+8700: integer: 0
+8701: space:
+8702: word: obj
+8705: space: \x0a
+8706: dict_open: <<
+8708: space: \x0a
+8711: name: /Length
+8718: space:
+8719: integer: 56
8721: space:
-8722: integer: 2
+8722: integer: 0
8723: space:
-8724: integer: 1
-8725: space:
-8726: array_close: ]
-8727: space: \x0a
-8730: name: /Root
-8735: space:
-8736: integer: 2
-8737: space:
-8738: integer: 0
-8739: space:
-8740: word: R
-8741: space: \x0a
-8744: name: /Size
-8749: space:
-8750: integer: 60
-8752: space: \x0a
-8755: name: /ID
-8758: space:
-8759: array_open: [
-8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
-8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
-8828: array_close: ]
-8829: space: \x0a
-8830: dict_close: >>
-8832: space: \x0a
-8833: word: stream
+8724: word: R
+8725: space: \x0a
+8726: dict_close: >>
+8728: space: \x0a
+8729: word: stream
+skipping to endstream
+8780: word: endstream
+8789: space: \x0a
+8790: word: endobj
+8796: space: \x0a\x0a
+8798: integer: 56
+8800: space:
+8801: integer: 0
+8802: space:
+8803: word: obj
+8806: space: \x0a
+8807: integer: 44
+8809: space: \x0a
+8810: word: endobj
+8816: space: \x0a\x0a
+8818: comment: %% Contents for page 11
+8841: space: \x0a
+8842: comment: %% Original object ID: 47 0
+8869: space: \x0a
+8870: integer: 57
+8872: space:
+8873: integer: 0
+8874: space:
+8875: word: obj
+8878: space: \x0a
+8879: dict_open: <<
+8881: space: \x0a
+8884: name: /Length
+8891: space:
+8892: integer: 58
+8894: space:
+8895: integer: 0
+8896: space:
+8897: word: R
+8898: space: \x0a
+8899: dict_close: >>
+8901: space: \x0a
+8902: word: stream
+skipping to endstream
+8953: word: endstream
+8962: space: \x0a
+8963: word: endobj
+8969: space: \x0a\x0a
+8971: integer: 58
+8973: space:
+8974: integer: 0
+8975: space:
+8976: word: obj
+8979: space: \x0a
+8980: integer: 44
+8982: space: \x0a
+8983: word: endobj
+8989: space: \x0a\x0a
+8991: integer: 59
+8993: space:
+8994: integer: 0
+8995: space:
+8996: word: obj
+8999: space: \x0a
+9000: dict_open: <<
+9002: space: \x0a
+9005: name: /Type
+9010: space:
+9011: name: /XRef
+9016: space: \x0a
+9019: name: /Length
+9026: space:
+9027: integer: 240
+9030: space: \x0a
+9033: name: /W
+9035: space:
+9036: array_open: [
+9037: space:
+9038: integer: 1
+9039: space:
+9040: integer: 2
+9041: space:
+9042: integer: 1
+9043: space:
+9044: array_close: ]
+9045: space: \x0a
+9048: name: /Root
+9053: space:
+9054: integer: 2
+9055: space:
+9056: integer: 0
+9057: space:
+9058: word: R
+9059: space: \x0a
+9062: name: /Size
+9067: space:
+9068: integer: 60
+9070: space: \x0a
+9073: name: /ID
+9076: space:
+9077: array_open: [
+9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
+9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
+9146: array_close: ]
+9147: space: \x0a
+9148: dict_close: >>
+9150: space: \x0a
+9151: word: stream
skipping to endstream
-9081: word: endstream
-9090: space: \x0a
-9091: word: endobj
-9097: space: \x0a\x0a
-9099: word: startxref
-9108: space: \x0a
-9109: integer: 8673
-9113: space: \x0a
-9114: comment: %%EOF
-9119: space: \x0a
-9120: eof
+9399: word: endstream
+9408: space: \x0a
+9409: word: endobj
+9415: space: \x0a\x0a
+9417: word: startxref
+9426: space: \x0a
+9427: integer: 8991
+9431: space: \x0a
+9432: comment: %%EOF
+9437: space: \x0a
+9438: eof
--- END FILE ---
--- BEGIN PAGE 1 ---
0: word: BT
@@ -595,9 +599,7 @@ skipping to endstream
103: dict_close: >>
105: space: \x0a
106: word: ID
-skipping to EI
-352: word: EI
-354: space: \x0a
+108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
355: word: BT
357: space: \x0a
360: name: /F1
@@ -743,13 +745,11 @@ skipping to EI
47: word: ET
49: space: \x0a\x00\x0a
52: name: /ThisMustBeLast
-67: space: \x0a
-68: eof
+67: eof
--- END PAGE 5 ---
--- BEGIN PAGE 6 ---
0: word: ID
-skipping to EI
-EI not found
+EI not found; resuming normal scanning
2: space: \x0a
5: name: /F1
8: space:
@@ -772,27 +772,37 @@ EI not found
44: eof
--- END PAGE 6 ---
--- BEGIN PAGE 7 ---
-0: word: BT
-2: space: \x0a
-5: name: /F1
-8: space:
-9: integer: 24
-11: space:
-12: word: Tf
-14: space: \x0a
-17: integer: 72
+0: name: /potato
+7: space: \x0a
+8: word: BI
+10: space: \x0a
+11: name: /CS
+14: space:
+15: name: /G
+17: name: /W
19: space:
-20: integer: 720
-23: space:
-24: word: Td
-26: space: \x0a
-29: string: Potato (raw: (Potato))
-37: space:
-38: word: Tj
-40: space: \x0a
-41: word: ET
-43: space: \x0a
-44: eof
+20: integer: 66
+22: name: /H
+24: space:
+25: integer: 47
+27: name: /BPC
+31: space:
+32: integer: 8
+33: name: /F
+35: name: /Fl
+38: name: /DP
+41: dict_open: <<
+43: name: /Predictor
+53: space:
+54: integer: 15
+56: name: /Columns
+64: space:
+65: integer: 66
+67: dict_close: >>
+69: space: \x0a
+70: word: ID
+72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
+318: eof
--- END PAGE 7 ---
--- BEGIN PAGE 8 ---
0: word: BT
diff --git a/qpdf/qtest/qpdf/tokens.pdf b/qpdf/qtest/qpdf/tokens.pdf
index b444db5f..a7157eef 100644
--- a/qpdf/qtest/qpdf/tokens.pdf
+++ b/qpdf/qtest/qpdf/tokens.pdf
Binary files differ
diff --git a/qpdf/test_tokenizer.cc b/qpdf/test_tokenizer.cc
index 6e694bfb..aa6c8acc 100644
--- a/qpdf/test_tokenizer.cc
+++ b/qpdf/test_tokenizer.cc
@@ -88,6 +88,8 @@ static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype)
return "space";
case QPDFTokenizer::tt_comment:
return "comment";
+ case QPDFTokenizer::tt_inline_image:
+ return "inline-image";
}
return 0;
}
@@ -131,7 +133,6 @@ dump_tokens(PointerHolder<InputSource> is, std::string const& label,
bool skip_streams, bool skip_inline_images)
{
Finder f1(is, "endstream");
- Finder f2(is, "EI");
std::cout << "--- BEGIN " << label << " ---" << std::endl;
bool done = false;
QPDFTokenizer tokenizer;
@@ -140,10 +141,20 @@ dump_tokens(PointerHolder<InputSource> is, std::string const& label,
{
tokenizer.includeIgnorable();
}
+ qpdf_offset_t inline_image_offset = 0;
while (! done)
{
QPDFTokenizer::Token token =
- tokenizer.readToken(is, "test", true, max_len);
+ tokenizer.readToken(is, "test", true,
+ inline_image_offset ? 0 : max_len);
+ if (inline_image_offset && (token.getType() == QPDFTokenizer::tt_bad))
+ {
+ std::cout << "EI not found; resuming normal scanning" << std::endl;
+ is->seek(inline_image_offset, SEEK_SET);
+ inline_image_offset = 0;
+ continue;
+ }
+ inline_image_offset = 0;
qpdf_offset_t offset = is->getLastOffset();
std::cout << offset << ": "
@@ -170,7 +181,8 @@ dump_tokens(PointerHolder<InputSource> is, std::string const& label,
else if (skip_inline_images &&
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
{
- try_skipping(tokenizer, is, max_len, "EI", f2);
+ tokenizer.expectInlineImage();
+ inline_image_offset = is->tell();
}
else if (token.getType() == QPDFTokenizer::tt_eof)
{