aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-12 02:57:05 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-12 02:57:05 +0200
commit36b3fe5af761b7032a506808cf6ce6d37c297bfc (patch)
treedfe58edc693033677cc16988c59151315fbda27b
parent46611f0710fa08f1a90134a84cfccec3a1e49f94 (diff)
downloadqpdf-36b3fe5af761b7032a506808cf6ce6d37c297bfc.tar.zst
Fix --newline-before-endstream option (fixes #133)
Add a newline unconditionally before endstream even if a newline was already written as part of the stream data.
-rw-r--r--ChangeLog4
-rw-r--r--libqpdf/QPDFWriter.cc22
-rw-r--r--manual/qpdf-manual.xml15
-rw-r--r--qpdf/qtest/qpdf.test38
-rw-r--r--qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf244
-rw-r--r--qpdf/qtest/qpdf/newline-before-endstream-nl.pdf92
-rw-r--r--qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf238
-rw-r--r--qpdf/qtest/qpdf/newline-before-endstream.pdfbin800 -> 0 bytes
-rw-r--r--qpdf/qtest/qpdf/streams-with-newlines.pdf90
9 files changed, 720 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index 34cf93ef..3d1b6fd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2017-08-11 Jay Berkenbilt <ejb@ql.org>
+ * Fix --newline-before-endstream to always add a newline before
+ endstream even if the last character was already a newline. This
+ is actually what's required by PDF/A. Fixes #133.
+
* Handle encrypted files whose encryption parameters are too
short. Fixes #96.
diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc
index 8d5f0682..de4fa349 100644
--- a/libqpdf/QPDFWriter.cc
+++ b/libqpdf/QPDFWriter.cc
@@ -1587,18 +1587,16 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
char last_char = this->pipeline->getLastChar();
popPipelineStack();
- if (this->qdf_mode || this->newline_before_endstream)
- {
- if (last_char != '\n')
- {
- writeString("\n");
- this->added_newline = true;
- }
- else
- {
- this->added_newline = false;
- }
- }
+ if (this->newline_before_endstream ||
+ (this->qdf_mode && (last_char != '\n')))
+ {
+ writeString("\n");
+ this->added_newline = true;
+ }
+ else
+ {
+ this->added_newline = false;
+ }
writeString("endstream");
}
else if (object.isString())
diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml
index a944f38a..06504d33 100644
--- a/manual/qpdf-manual.xml
+++ b/manual/qpdf-manual.xml
@@ -932,6 +932,21 @@ outfile.pdf</option>
</listitem>
</varlistentry>
<varlistentry>
+ <term><option>--newline-before-endstream</option></term>
+ <listitem>
+ <para>
+ Tells qpdf to insert a newline before the
+ <literal>endstream</literal> keyword, not counted in the
+ length, after any stream content even if the last character of
+ the stream was a newline. This may result in two newlines in
+ some cases. This is a requirement of PDF/A. While qpdf doesn't
+ specifically know how to generate PDF/A-compliant PDFs, this
+ at least prevents it from removing compliance on already
+ compliant files.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--qdf</option></term>
<listitem>
<para>
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 89e41b36..06f298e0 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -234,7 +234,7 @@ foreach my $d (@bug_tests)
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
-$n_tests += 88;
+$n_tests += 86;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@@ -623,16 +623,6 @@ $td->runtest("split content stream errors",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
-$td->runtest("newline before endstream",
- {$td->COMMAND =>
- "qpdf --static-id --newline-before-endstream" .
- " minimal.pdf a.pdf"},
- {$td->STRING => "", $td->EXIT_STATUS => 0},
- $td->NORMALIZE_NEWLINES);
-$td->runtest("check output",
- {$td->FILE => "a.pdf"},
- {$td->FILE => "newline-before-endstream.pdf"});
-
# Demonstrate show-xref after check and not after check to illustrate
# that it can dump the real xref or the recovered xref.
$td->runtest("dump bad xref",
@@ -690,6 +680,32 @@ $td->runtest("short /O or /U",
show_ntests();
# ----------
+$td->notify("--- Newline before endstream ---");
+$n_tests += 8;
+foreach my $d (
+ ['--qdf', 'qdf', 'qdf'],
+ ['--newline-before-endstream', 'newline', 'nl'],
+ ['--qdf --newline-before-endstream', 'newline and qdf', 'nl-qdf'],
+ )
+{
+ my ($flags, $description, $suffix) = @$d;
+ $td->runtest("newline before endstream: $description",
+ {$td->COMMAND => "qpdf --static-id --stream-data=preserve" .
+ " $flags streams-with-newlines.pdf a.pdf"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+ $td->runtest("check output",
+ {$td->FILE => "a.pdf"},
+ {$td->FILE => "newline-before-endstream-$suffix.pdf"});
+ if ($flags =~ /qdf/)
+ {
+ $td->runtest("fix-qdf",
+ {$td->COMMAND => "fix-qdf a.pdf"},
+ {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
+ }
+}
+show_ntests();
+# ----------
$td->notify("--- Single Page ---");
# sp = single-pages
my @sp_cases = (
diff --git a/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf
new file mode 100644
index 00000000..631743dc
--- /dev/null
+++ b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf
@@ -0,0 +1,244 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 3
+ /Kids [
+ 3 0 R
+ 4 0 R
+ 5 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents 6 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 8 0 R
+ >>
+ /ProcSet 9 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 2
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Contents 10 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 12 0 R
+ >>
+ /ProcSet 13 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 3
+%% Original object ID: 5 0
+5 0 obj
+<<
+ /Contents 14 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 16 0 R
+ >>
+ /ProcSet 17 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 6 0
+6 0 obj
+<<
+ /Length 7 0 R
+>>
+stream
+% Stream contains a newline as part of its length
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+
+endstream
+endobj
+
+%QDF: ignore_newline
+7 0 obj
+94
+endobj
+
+%% Original object ID: 7 0
+8 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 8 0
+9 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 2
+%% Original object ID: 9 0
+10 0 obj
+<<
+ /Length 11 0 R
+>>
+stream
+% Stream data does not end with a newline but endstream is preceded by
+% a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+
+endstream
+endobj
+
+%QDF: ignore_newline
+11 0 obj
+128
+endobj
+
+%% Original object ID: 10 0
+12 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 11 0
+13 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 3
+%% Original object ID: 12 0
+14 0 obj
+<<
+ /Length 15 0 R
+>>
+stream
+% Stream data does not end with a newline and endstream is not
+% preceded by a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+
+endstream
+endobj
+
+%QDF: ignore_newline
+15 0 obj
+132
+endobj
+
+%% Original object ID: 13 0
+16 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 14 0
+17 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 18
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000262 00000 n
+0000000491 00000 n
+0000000723 00000 n
+0000000968 00000 n
+0000001139 00000 n
+0000001185 00000 n
+0000001330 00000 n
+0000001415 00000 n
+0000001622 00000 n
+0000001671 00000 n
+0000001818 00000 n
+0000001905 00000 n
+0000002116 00000 n
+0000002165 00000 n
+0000002312 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 18
+ /ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>]
+>>
+startxref
+2348
+%%EOF
diff --git a/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf b/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf
new file mode 100644
index 00000000..436b3060
--- /dev/null
+++ b/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf
@@ -0,0 +1,92 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
+endobj
+4 0 obj
+<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
+endobj
+5 0 obj
+<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >>
+endobj
+6 0 obj
+<< /Length 94 >>
+stream
+% Stream contains a newline as part of its length
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+
+endstream
+endobj
+7 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+8 0 obj
+[ /PDF /Text ]
+endobj
+9 0 obj
+<< /Length 127 >>
+stream
+% Stream data does not end with a newline but endstream is preceded by
+% a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+10 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+11 0 obj
+[ /PDF /Text ]
+endobj
+12 0 obj
+<< /Length 131 >>
+stream
+% Stream data does not end with a newline and endstream is not
+% preceded by a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+13 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+14 0 obj
+[ /PDF /Text ]
+endobj
+xref
+0 15
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000135 00000 n
+0000000278 00000 n
+0000000423 00000 n
+0000000569 00000 n
+0000000713 00000 n
+0000000820 00000 n
+0000000850 00000 n
+0000001028 00000 n
+0000001136 00000 n
+0000001167 00000 n
+0000001350 00000 n
+0000001458 00000 n
+trailer << /Root 1 0 R /Size 15 /ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>] >>
+startxref
+1489
+%%EOF
diff --git a/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf
new file mode 100644
index 00000000..a1fab14a
--- /dev/null
+++ b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf
@@ -0,0 +1,238 @@
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+ /Pages 2 0 R
+ /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+ /Count 3
+ /Kids [
+ 3 0 R
+ 4 0 R
+ 5 0 R
+ ]
+ /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+ /Contents 6 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 8 0 R
+ >>
+ /ProcSet 9 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 2
+%% Original object ID: 4 0
+4 0 obj
+<<
+ /Contents 10 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 12 0 R
+ >>
+ /ProcSet 13 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Page 3
+%% Original object ID: 5 0
+5 0 obj
+<<
+ /Contents 14 0 R
+ /MediaBox [
+ 0
+ 0
+ 612
+ 792
+ ]
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 16 0 R
+ >>
+ /ProcSet 17 0 R
+ >>
+ /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 6 0
+6 0 obj
+<<
+ /Length 7 0 R
+>>
+stream
+% Stream contains a newline as part of its length
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+7 0 obj
+94
+endobj
+
+%% Original object ID: 7 0
+8 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 8 0
+9 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 2
+%% Original object ID: 9 0
+10 0 obj
+<<
+ /Length 11 0 R
+>>
+stream
+% Stream data does not end with a newline but endstream is preceded by
+% a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+11 0 obj
+128
+endobj
+
+%% Original object ID: 10 0
+12 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 11 0
+13 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+%% Contents for page 3
+%% Original object ID: 12 0
+14 0 obj
+<<
+ /Length 15 0 R
+>>
+stream
+% Stream data does not end with a newline and endstream is not
+% preceded by a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+
+15 0 obj
+132
+endobj
+
+%% Original object ID: 13 0
+16 0 obj
+<<
+ /BaseFont /Helvetica
+ /Encoding /WinAnsiEncoding
+ /Name /F1
+ /Subtype /Type1
+ /Type /Font
+>>
+endobj
+
+%% Original object ID: 14 0
+17 0 obj
+[
+ /PDF
+ /Text
+]
+endobj
+
+xref
+0 18
+0000000000 65535 f
+0000000052 00000 n
+0000000133 00000 n
+0000000262 00000 n
+0000000491 00000 n
+0000000723 00000 n
+0000000968 00000 n
+0000001117 00000 n
+0000001163 00000 n
+0000001308 00000 n
+0000001393 00000 n
+0000001578 00000 n
+0000001627 00000 n
+0000001774 00000 n
+0000001861 00000 n
+0000002050 00000 n
+0000002099 00000 n
+0000002246 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 18
+ /ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>]
+>>
+startxref
+2282
+%%EOF
diff --git a/qpdf/qtest/qpdf/newline-before-endstream.pdf b/qpdf/qtest/qpdf/newline-before-endstream.pdf
deleted file mode 100644
index cde4693b..00000000
--- a/qpdf/qtest/qpdf/newline-before-endstream.pdf
+++ /dev/null
Binary files differ
diff --git a/qpdf/qtest/qpdf/streams-with-newlines.pdf b/qpdf/qtest/qpdf/streams-with-newlines.pdf
new file mode 100644
index 00000000..05271222
--- /dev/null
+++ b/qpdf/qtest/qpdf/streams-with-newlines.pdf
@@ -0,0 +1,90 @@
+%PDF-1.3
+%¿÷¢þ
+1 0 obj
+<< /Pages 2 0 R /Type /Catalog >>
+endobj
+2 0 obj
+<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
+endobj
+4 0 obj
+<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
+endobj
+5 0 obj
+<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >>
+endobj
+6 0 obj
+<< /Length 94 >>
+stream
+% Stream contains a newline as part of its length
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+7 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+8 0 obj
+[ /PDF /Text ]
+endobj
+9 0 obj
+<< /Length 127 >>
+stream
+% Stream data does not end with a newline but endstream is preceded by
+% a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ET
+endstream
+endobj
+10 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+11 0 obj
+[ /PDF /Text ]
+endobj
+12 0 obj
+<< /Length 131 >>
+stream
+% Stream data does not end with a newline and endstream is not
+% preceded by a newline.
+BT
+ /F1 24 Tf
+ 72 720 Td
+ (Potato) Tj
+ETendstream
+endobj
+13 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+14 0 obj
+[ /PDF /Text ]
+endobj
+xref
+0 15
+0000000000 65535 f
+0000000015 00000 n
+0000000064 00000 n
+0000000135 00000 n
+0000000278 00000 n
+0000000423 00000 n
+0000000569 00000 n
+0000000712 00000 n
+0000000819 00000 n
+0000000849 00000 n
+0000001027 00000 n
+0000001135 00000 n
+0000001166 00000 n
+0000001348 00000 n
+0000001456 00000 n
+trailer << /Root 1 0 R /Size 15 /ID [<ff82013f9cede898ae8db2f2f177aa1d><7eb8172a38e90a48184c5bf01c8020b0>] >>
+startxref
+1487
+%%EOF