summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2011-04-30 23:46:09 +0200
committerJay Berkenbilt <ejb@ql.org>2011-04-30 23:46:09 +0200
commitaeb892f99bad9f6c24aef94a2d93d573c6de0382 (patch)
treec8f7afc1e0421233efe2d9b55d09e2ce3cf36d2a
parentc551b972f6b9d365cb7f808a801dbfac409cc8b7 (diff)
downloadqpdf-release-qpdf-2.2.3.tar.zst
accept stream keyword with CR onlyrelease-qpdf-2.2.3
git-svn-id: svn+q:///qpdf/trunk@1052 71b93d88-0707-0410-a8cf-f5a4172ac649
-rw-r--r--ChangeLog5
-rw-r--r--libqpdf/QPDF.cc68
-rw-r--r--manual/qpdf-manual.xml6
-rw-r--r--qpdf/qpdf.testcov4
-rw-r--r--qpdf/qtest/qpdf.test13
-rw-r--r--qpdf/qtest/qpdf/stream-line-enders.out3
-rw-r--r--qpdf/qtest/qpdf/stream-line-enders.pdf50
-rw-r--r--qpdf/qtest/qpdf/stream-line-enders.qdf137
8 files changed, 272 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 01e43879..a81124bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,11 @@
* 2.2.3: release
+ * libqpdf/QPDF.cc (readObjectInternal): Accept the case of the
+ stream keyword being followed by carriage return by itself. While
+ this is not permitted by the specification, there are PDF files
+ that do this, and other readers can read them.
+
* libqpdf/Pl_QPDFTokenizer.cc (processChar): When an inline image
is detected, suspend normalization only up to the end of the
inline image rather than for the remainder of the content stream.
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index c8146eff..f6157287 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -1331,24 +1331,66 @@ QPDF::readObjectInternal(PointerHolder<InputSource> input,
if (readToken(input) ==
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
{
- // Kill to next actual newline. Do not use readLine()
- // here -- streams are a special case. The next
- // single newline character marks the end of the
- // stream token. It is incorrect to strip subsequent
- // carriage returns or newlines as they may be part of
- // the stream.
+ // The PDF specification states that the word "stream"
+ // should be followed by either a carriage return and
+ // a newline or by a newline alone. It specifically
+ // disallowed following it by a carriage return alone
+ // since, in that case, there would be no way to tell
+ // whether the NL in a CR NL sequence was part of the
+ // stream data. However, some readers, including
+ // Adobe reader, accept a carriage return by itself
+ // when followed by a non-newline character, so that's
+ // what we do here.
{
char ch;
- do
+ if (input->read(&ch, 1) == 0)
{
- if (input->read(&ch, 1) == 0)
+ // A premature EOF here will result in some
+ // other problem that will get reported at
+ // another time.
+ }
+ else if (ch == '\n')
+ {
+ // ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with NL only");
+ }
+ else if (ch == '\r')
+ {
+ // Read another character
+ if (input->read(&ch, 1) != 0)
{
- // A premature EOF here will result in
- // some other problem that will get
- // reported at another time.
- ch = '\n';
+ if (ch == '\n')
+ {
+ // Ready to read stream data
+ QTC::TC("qpdf", "QPDF stream with CRNL");
+ }
+ else
+ {
+ // Treat the \r by itself as the
+ // whitespace after endstream and
+ // start reading stream data in spite
+ // of not having seen a newline.
+ QTC::TC("qpdf", "QPDF stream with CR only");
+ input->unreadCh(ch);
+ warn(QPDFExc(
+ qpdf_e_damaged_pdf,
+ input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword followed"
+ " by carriage return only"));
+ }
}
- } while (ch != '\n');
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDF stream without newline");
+ warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+ this->last_object_description,
+ input->tell(),
+ "stream keyword not followed"
+ " by proper line terminator"));
+ }
}
// Must get offset before accessing any additional
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index 9bdb95b3..02b69a1d 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -2080,6 +2080,12 @@ print "\n";
<itemizedlist>
<listitem>
<para>
+ Handle some damaged streams with incorrect characters
+ following the stream keyword.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
Improve handling of inline images when normalizing content
streams.
</para>
diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov
index 51a6f0e8..4e7e292a 100644
--- a/qpdf/qpdf.testcov
+++ b/qpdf/qpdf.testcov
@@ -188,3 +188,7 @@ QPDF_Stream getStreamData 0
QPDF_Stream expand filter abbreviation 0
qpdf-c called qpdf_read_memory 0
Pl_QPDFTokenizer found EI 0
+QPDF stream without newline 0
+QPDF stream with CR only 0
+QPDF stream with CRNL 0
+QPDF stream with NL only 0
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 62eed000..75a92aa4 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -111,7 +111,7 @@ $td->runtest("new stream",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 29;
+$n_tests += 31;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -265,6 +265,17 @@ $td->runtest("error/output redirection to strings",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
+$td->runtest("odd terminators for stream keyword",
+ {$td->COMMAND =>
+ "qpdf --qdf --static-id" .
+ " stream-line-enders.pdf a.qdf"},
+ {$td->FILE => "stream-line-enders.out",
+ $td->EXIT_STATUS => 3},
+ $td->NORMALIZE_NEWLINES);
+$td->runtest("check output",
+ {$td->FILE => "a.qdf"},
+ {$td->FILE => "stream-line-enders.qdf"});
+
show_ntests();
# ----------
$td->notify("--- Error Condition Tests ---");
diff --git a/qpdf/qtest/qpdf/stream-line-enders.out b/qpdf/qtest/qpdf/stream-line-enders.out
new file mode 100644
index 00000000..b7a7513c
--- /dev/null
+++ b/qpdf/qtest/qpdf/stream-line-enders.out
@@ -0,0 +1,3 @@
+WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only
+WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator
+qpdf: operation succeeded with warnings; resulting file may have some problems
diff --git a/qpdf/qtest/qpdf/stream-line-enders.pdf b/qpdf/qtest/qpdf/stream-line-enders.pdf
new file mode 100644
index 00000000..e6238889
--- /dev/null
+++ b/qpdf/qtest/qpdf/stream-line-enders.pdf
@@ -0,0 +1,50 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
+endobj
+4 0 obj
+<< /Length 14 >>
+stream
+BT
+ /F1 24 Tf
+endstream
+endobj
+5 0 obj
+<< /Length 10 >>
+stream 72 720 Td
+endstream
+endobj
+6 0 obj
+<< /Length 15 >>
+stream (Potato) Tj
+ET
+endstream
+endobj
+7 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+8 0 obj
+[ /PDF /Text ]
+endobj
+xref
+0 9
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000123 00000 n
+0000000282 00000 n
+0000000346 00000 n
+0000000405 00000 n
+0000000469 00000 n
+0000000576 00000 n
+trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >>
+startxref
+606
+%%EOF
diff --git a/qpdf/qtest/qpdf/stream-line-enders.qdf b/qpdf/qtest/qpdf/stream-line-enders.qdf
new file mode 100644
index 00000000..33536028
--- /dev/null
+++ b/qpdf/qtest/qpdf/stream-line-enders.qdf
@@ -0,0 +1,137 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 1
+ /Kids [
+ 3 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents [
+ 4 0 R
+ 6 0 R
+ 8 0 R
+ ]
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 10 0 R
+ >>
+ /ProcSet 11 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Length 5 0 R
+>>
+stream
+BT
+ /F1 24 Tf
+endstream
+endobj
+
+5 0 obj
+14
+endobj
+
+%% Contents for page 1
+%% Original object ID: 5 0
+6 0 obj
+<<
+ /Length 7 0 R
+>>
+stream
+72 720 Td
+endstream
+endobj
+
+7 0 obj
+10
+endobj
+
+%% Contents for page 1
+%% Original object ID: 6 0
+8 0 obj
+<<
+ /Length 9 0 R
+>>
+stream
+(Potato) Tj
+ET
+endstream
+endobj
+
+9 0 obj
+15
+endobj
+
+%% Original object ID: 7 0
+10 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 8 0
+11 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 12
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000242 00000 n
+0000000516 00000 n
+0000000585 00000 n
+0000000654 00000 n
+0000000719 00000 n
+0000000788 00000 n
+0000000858 00000 n
+0000000904 00000 n
+0000001050 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 12
+ /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>]
+>>
+startxref
+1086
+%%EOF