Refactor QPDFTokenizer's inline image handling

Add a version of expectInlineImage that takes an input source and searches for EI. This is in preparation for improving the way EI is found. This commit just refactors the code without changing the functionality and adds tests to make sure the old and new code behave identically.
author: Jay Berkenbilt <ejb@ql.org> 2019-01-30 20:20:56 +0100
committer: Jay Berkenbilt <ejb@ql.org> 2019-01-31 15:26:37 +0100
commit: ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b (patch)
tree: 970f4526f39909838f837eb1de5ac672881e9d58 /qpdf
parent: 31372edce0b60211c7af98340b3afa054f414ca4 (diff)
download: qpdf-ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b.tar.zst
3 files changed, 29 insertions, 9 deletions
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 5150e567..6dcebd6e 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -430,3 +430,6 @@ QPDFPageObjectHelper copy shared attribute 0
 qpdf from_nr from repeat_nr 0
 QPDF resolve duplicated page object 0
 QPDF handle direct page object 0
+QPDFTokenizer found EI the old way 0
+QPDFTokenizer found EI by byte count 0
+QPDFTokenizer inline image at EOF the old way 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index ca7ea12b..6abc7edb 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -694,7 +694,7 @@ $td->runtest("check pass1 file",
 show_ntests();
 # ----------
 $td->notify("--- Tokenizer ---");
-$n_tests += 4;
+$n_tests += 5;
 
 $td->runtest("tokenizer with no ignorable",
              {$td->COMMAND => "test_tokenizer -no-ignorable tokens.pdf"},
@@ -706,6 +706,11 @@ $td->runtest("tokenizer",
              {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0},
              $td->NORMALIZE_NEWLINES);
 
+$td->runtest("tokenizer with old inline image code",
+             {$td->COMMAND => "test_tokenizer -old-ei tokens.pdf"},
+             {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0},
+             $td->NORMALIZE_NEWLINES);
+
 $td->runtest("tokenizer with max_len",
              {$td->COMMAND => "test_tokenizer -maxlen 50 tokens.pdf"},
              {$td->FILE => "tokens-maxlen.out", $td->EXIT_STATUS => 0},
diff --git a/qpdf/test_tokenizer.cc b/qpdf/test_tokenizer.cc
index 9f65281b..ecbb3552 100644
--- a/qpdf/test_tokenizer.cc
+++ b/qpdf/test_tokenizer.cc
@@ -16,7 +16,7 @@ static char const* whoami = 0;
 void usage()
 {
     std::cerr << "Usage: " << whoami
-              << " [-maxlen len | -no-ignorable] filename"
+              << " [-maxlen len | -no-ignorable | -old-ei] filename"
               << std::endl;
     exit(2);
 }
@@ -132,7 +132,7 @@ try_skipping(QPDFTokenizer& tokenizer, PointerHolder<InputSource> is,
 static void
 dump_tokens(PointerHolder<InputSource> is, std::string const& label,
             size_t max_len, bool include_ignorable,
-            bool skip_streams, bool skip_inline_images)
+            bool skip_streams, bool skip_inline_images, bool old_ei)
 {
     Finder f1(is, "endstream");
     std::cout << "--- BEGIN " << label << " ---" << std::endl;
@@ -183,7 +183,14 @@ dump_tokens(PointerHolder<InputSource> is, std::string const& label,
         else if (skip_inline_images &&
                  (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
         {
-            tokenizer.expectInlineImage();
+            if (old_ei)
+            {
+                tokenizer.expectInlineImage();
+            }
+            else
+            {
+                tokenizer.expectInlineImage(is);
+            }
             inline_image_offset = is->tell();
         }
         else if (token.getType() == QPDFTokenizer::tt_eof)
@@ -195,7 +202,7 @@ dump_tokens(PointerHolder<InputSource> is, std::string const& label,
 }
 
 static void process(char const* filename, bool include_ignorable,
-                    size_t max_len)
+                    size_t max_len, bool old_ei)
 {
     PointerHolder<InputSource> is;
 
@@ -203,7 +210,7 @@ static void process(char const* filename, bool include_ignorable,
     FileInputSource* fis = new FileInputSource();
     fis->setFilename(filename);
     is = fis;
-    dump_tokens(is, "FILE", max_len, include_ignorable, true, false);
+    dump_tokens(is, "FILE", max_len, include_ignorable, true, false, false);
 
     // Tokenize content streams, skipping inline images
     QPDF qpdf;
@@ -222,7 +229,7 @@ static void process(char const* filename, bool include_ignorable,
             "content data", content_data.getPointer());
         is = bis;
         dump_tokens(is, "PAGE " + QUtil::int_to_string(pageno),
-                    max_len, include_ignorable, false, true);
+                    max_len, include_ignorable, false, true, old_ei);
     }
 
     // Tokenize object streams
@@ -241,7 +248,7 @@ static void process(char const* filename, bool include_ignorable,
             is = bis;
             dump_tokens(is, "OBJECT STREAM " +
                         QUtil::int_to_string((*iter).getObjectID()),
-                        max_len, include_ignorable, false, false);
+                        max_len, include_ignorable, false, false, false);
         }
     }
 }
@@ -266,6 +273,7 @@ int main(int argc, char* argv[])
     char const* filename = 0;
     size_t max_len = 0;
     bool include_ignorable = true;
+    bool old_ei = false;
     for (int i = 1; i < argc; ++i)
     {
         if (argv[i][0] == '-')
@@ -282,6 +290,10 @@ int main(int argc, char* argv[])
             {
                 include_ignorable = false;
             }
+            else if (strcmp(argv[i], "-old-ei") == 0)
+            {
+                old_ei = true;
+            }
             else
             {
                 usage();
@@ -303,7 +315,7 @@ int main(int argc, char* argv[])
 
     try
     {
-        process(filename, include_ignorable, max_len);
+        process(filename, include_ignorable, max_len, old_ei);
     }
     catch (std::exception& e)
     {
author	Jay Berkenbilt <ejb@ql.org>	2019-01-30 20:20:56 +0100
committer	Jay Berkenbilt <ejb@ql.org>	2019-01-31 15:26:37 +0100
commit	ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b (patch)
tree	970f4526f39909838f837eb1de5ac672881e9d58 /qpdf
parent	31372edce0b60211c7af98340b3afa054f414ca4 (diff)
download	qpdf-ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b.tar.zst