aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--TODO12
-rw-r--r--libqpdf/Pl_QPDFTokenizer.cc51
-rw-r--r--libqpdf/qpdf/Pl_QPDFTokenizer.hh4
-rw-r--r--qpdf/qpdf.testcov1
-rw-r--r--qpdf/qtest/qpdf.test12
-rw-r--r--qpdf/qtest/qpdf/inline-images-cr.pdfbin0 -> 1519163 bytes
7 files changed, 67 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index bcf8e9f8..46afbfed 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2011-04-30 Jay Berkenbilt <ejb@ql.org>
+
+ * libqpdf/Pl_QPDFTokenizer.cc (processChar): When an inline image
+ is detected, suspend normalization only up to the end of the
+ inline image rather than for the remainder of the content stream.
+ (Fixes qpdf-Bugs 3152169.)
+
2011-01-31 Jay Berkenbilt <ejb@ql.org>
* libqpdf/QPDF.cc (readObjectAtOffset): use -1 rather than 0 when
diff --git a/TODO b/TODO
index 41aee45d..37d8aa75 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,11 @@
+Next
+====
+
+ * Look for %PDF header somewhere within the first 1024 bytes of the
+ file. Also accept headers of the form "%!PS−Adobe−N.n PDF−M.m".
+ See Implementation notes 13 and 14 in appendix H of the PDF 1.7
+ specification. This is bug 3267974.
+
General
=======
@@ -174,6 +182,10 @@ Index: QPDFWriter.cc
providing some mechanism to recover earlier versions of a file
embedded prior to appended sections.
+ * From a suggestion in bug 3152169, consisder having an option to
+ re-encode inline images with an ASCII encoding.
+
+
Splitting by Pages
==================
diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc
index 28ea4faa..3bd3fb6c 100644
--- a/libqpdf/Pl_QPDFTokenizer.cc
+++ b/libqpdf/Pl_QPDFTokenizer.cc
@@ -1,6 +1,7 @@
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QPDF_String.hh>
#include <qpdf/QPDF_Name.hh>
+#include <qpdf/QTC.hh>
#include <stdexcept>
#include <string.h>
@@ -11,8 +12,9 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
last_char_was_cr(false),
unread_char(false),
char_to_unread('\0'),
- pass_through(false)
+ in_inline_image(false)
{
+ memset(this->image_buf, 0, IMAGE_BUF_SIZE);
}
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -56,11 +58,34 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
void
Pl_QPDFTokenizer::processChar(char ch)
{
- if (this->pass_through)
+ if (this->in_inline_image)
{
- // We're not normalizing anymore -- just write this without
- // looking at it.
- writeNext(&ch, 1);
+ // Scan through the input looking for EI surrounded by
+ // whitespace. If that pattern appears in the inline image's
+ // representation, we're hosed, but this situation seems
+ // excessively unlikely, and this code path is only followed
+ // during content stream normalization, which is pretty much
+ // used for debugging and human inspection of PDF files.
+ memmove(this->image_buf,
+ this->image_buf + 1,
+ IMAGE_BUF_SIZE - 1);
+ this->image_buf[IMAGE_BUF_SIZE - 1] = ch;
+ if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&
+ (this->image_buf[1] == 'E') &&
+ (this->image_buf[2] == 'I') &&
+ strchr(" \t\n\v\f\r", this->image_buf[3]))
+ {
+ // We've found an EI operator. We've already written the
+ // EI operator to output; terminate with a newline
+ // character and resume normal processing.
+ writeNext("\n", 1);
+ this->in_inline_image = false;
+ QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");
+ }
+ else
+ {
+ writeNext(&ch, 1);
+ }
return;
}
@@ -75,18 +100,10 @@ Pl_QPDFTokenizer::processChar(char ch)
this->newline_after_next_token = false;
}
if ((token.getType() == QPDFTokenizer::tt_word) &&
- (token.getValue() == "BI"))
+ (token.getValue() == "ID"))
{
- // Uh oh.... we're not sophisticated enough to handle
- // inline images safely. We'd have to to set up all the
- // filters and pipe the image data through it until the
- // filtered output was the right size for an image of the
- // specified dimensions. Then we'd either have to write
- // out raw image data or continue to write filtered data,
- // resuming normalization when we get to the end.
- // Instead, for now, we'll just turn off normalization for
- // the remainder of this stream.
- this->pass_through = true;
+ // Suspend normal scanning until we find an EI token.
+ this->in_inline_image = true;
if (this->unread_char)
{
writeNext(&this->char_to_unread, 1);
@@ -156,7 +173,7 @@ void
Pl_QPDFTokenizer::finish()
{
this->tokenizer.presentEOF();
- if (! this->pass_through)
+ if (! this->in_inline_image)
{
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
diff --git a/libqpdf/qpdf/Pl_QPDFTokenizer.hh b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
index d300d7cd..3f816f5d 100644
--- a/libqpdf/qpdf/Pl_QPDFTokenizer.hh
+++ b/libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -33,7 +33,9 @@ class Pl_QPDFTokenizer: public Pipeline
bool last_char_was_cr;
bool unread_char;
char char_to_unread;
- bool pass_through;
+ bool in_inline_image;
+ static int const IMAGE_BUF_SIZE = 4; // must be >= 4
+ char image_buf[IMAGE_BUF_SIZE];
};
#endif // __PL_QPDFTOKENIZER_HH__
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index f0b3e06a..51a6f0e8 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -187,3 +187,4 @@ QPDF_Stream getRawStreamData 0
QPDF_Stream getStreamData 0
QPDF_Stream expand filter abbreviation 0
qpdf-c called qpdf_read_memory 0
+Pl_QPDFTokenizer found EI 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 2562f7c3..62eed000 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -1257,8 +1257,8 @@ my @flags = (["-qdf", # 1
"no arguments"],
);
-$n_tests += (@files * @flags * 2 * 3);
-$n_compare_pdfs += (@files * @flags * 2);
+$n_tests += 1 + (@files * @flags * 2 * 3);
+$n_compare_pdfs += 1 + (@files * @flags * 2);
$n_acroread += (@files * @flags * 2);
foreach my $file (@files)
@@ -1311,6 +1311,14 @@ foreach my $file (@files)
}
}
+# inline-images-cr.pdf is xbkm938-dies.pdf from PDF collection
+$td->runtest("convert inline-images-cr to qdf",
+ {$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
+ " --qdf inline-images-cr.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+
+compare_pdfs("inline-images-cr.pdf", "a.pdf");
+
show_ntests();
# ----------
$td->notify("--- fix-qdf Tests ---");
diff --git a/qpdf/qtest/qpdf/inline-images-cr.pdf b/qpdf/qtest/qpdf/inline-images-cr.pdf
new file mode 100644
index 00000000..48861a73
--- /dev/null
+++ b/qpdf/qtest/qpdf/inline-images-cr.pdf
Binary files differ