aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2017-08-16 12:26:31 +0200
committerJay Berkenbilt <ejb@ql.org>2017-08-19 20:50:55 +0200
commit2d2f61966525cb948bcb6307cccbc3493b1825b5 (patch)
tree862dbdd156d2af563d0d20f755f7abeb2c200889
parente0d1cd1f4b2de30967f9c70460c2d0765f003676 (diff)
downloadqpdf-2d2f61966525cb948bcb6307cccbc3493b1825b5.tar.zst
Implement Pl_RunLength pipeline
-rw-r--r--TODO6
-rw-r--r--include/qpdf/Pl_RunLength.hh42
-rw-r--r--libqpdf/Pl_RunLength.cc171
-rw-r--r--libqpdf/build.mk1
-rw-r--r--libtests/build.mk1
-rw-r--r--libtests/libtests.testcov3
-rw-r--r--libtests/qtest/runlength.test75
-rw-r--r--libtests/qtest/runlength/011
-rw-r--r--libtests/qtest/runlength/01.encoded1
-rw-r--r--libtests/qtest/runlength/021
-rw-r--r--libtests/qtest/runlength/02.encoded1
-rw-r--r--libtests/qtest/runlength/03bin0 -> 516 bytes
-rw-r--r--libtests/qtest/runlength/03.encodedbin0 -> 275 bytes
-rw-r--r--libtests/qtest/runlength/041
-rw-r--r--libtests/qtest/runlength/04.encoded1
-rw-r--r--libtests/qtest/runlength/051
-rw-r--r--libtests/qtest/runlength/05.encoded1
-rw-r--r--libtests/qtest/runlength/empty.encoded1
-rw-r--r--libtests/runlength.cc47
19 files changed, 352 insertions, 3 deletions
diff --git a/TODO b/TODO
index 6061c44e..d77f159f 100644
--- a/TODO
+++ b/TODO
@@ -33,9 +33,9 @@ Soon
prioritized so that we can poll all registered filters to see
whether they are capable of filtering a particular stream.
- * If possible, consider adding RLE, CCITT3, CCITT4, or any other easy
- filters. For some reference code that we probably can't use but
- may be handy anyway, see
+ * If possible, consider adding CCITT3, CCITT4, or any other easy
+ filters. For some reference code that we probably can't use but may
+ be handy anyway, see
http://partners.adobe.com/public/developer/ps/sdk/index_archive.html
* If possible, support the following types of broken files:
diff --git a/include/qpdf/Pl_RunLength.hh b/include/qpdf/Pl_RunLength.hh
new file mode 100644
index 00000000..86855382
--- /dev/null
+++ b/include/qpdf/Pl_RunLength.hh
@@ -0,0 +1,42 @@
+// Copyright (c) 2005-2015 Jay Berkenbilt
+//
+// This file is part of qpdf. This software may be distributed under
+// the terms of version 2 of the Artistic License which may be found
+// in the source distribution. It is provided "as is" without express
+// or implied warranty.
+
+#ifndef __PL_RUNLENGTH_HH__
+#define __PL_RUNLENGTH_HH__
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_RunLength: public Pipeline
+{
+ public:
+ enum action_e { a_encode, a_decode };
+
+ QPDF_DLL
+ Pl_RunLength(char const* identifier, Pipeline* next,
+ action_e action);
+ QPDF_DLL
+ virtual ~Pl_RunLength();
+
+ QPDF_DLL
+ virtual void write(unsigned char* data, size_t len);
+ QPDF_DLL
+ virtual void finish();
+
+ private:
+ void encode(unsigned char* data, size_t len);
+ void decode(unsigned char* data, size_t len);
+ void flush_encode();
+
+ enum state_e { st_top, st_copying, st_run };
+
+ action_e action;
+ state_e state;
+ unsigned char buf[128];
+ unsigned int length;
+};
+
+#endif // __PL_RUNLENGTH_HH__
diff --git a/libqpdf/Pl_RunLength.cc b/libqpdf/Pl_RunLength.cc
new file mode 100644
index 00000000..1e8c56ca
--- /dev/null
+++ b/libqpdf/Pl_RunLength.cc
@@ -0,0 +1,171 @@
+#include <qpdf/Pl_RunLength.hh>
+
+#include <qpdf/QUtil.hh>
+#include <qpdf/QTC.hh>
+
+Pl_RunLength::Pl_RunLength(char const* identifier, Pipeline* next,
+ action_e action) :
+ Pipeline(identifier, next),
+ action(action),
+ state(st_top),
+ length(0)
+{
+}
+
+Pl_RunLength::~Pl_RunLength()
+{
+}
+
+void
+Pl_RunLength::write(unsigned char* data, size_t len)
+{
+ if (this->action == a_encode)
+ {
+ encode(data, len);
+ }
+ else
+ {
+ decode(data, len);
+ }
+}
+
+void
+Pl_RunLength::encode(unsigned char* data, size_t len)
+{
+ for (size_t i = 0; i < len; ++i)
+ {
+ if ((this->state == st_top) != (this->length <= 1))
+ {
+ throw std::logic_error(
+ "Pl_RunLength::encode: state/length inconsistency");
+ }
+ unsigned char ch = data[i];
+ if ((this->length > 0) &&
+ ((this->state == st_copying) || (this->length < 128)) &&
+ (ch == this->buf[this->length-1]))
+ {
+ QTC::TC("libtests", "Pl_RunLength: switch to run",
+ (this->length == 128) ? 0 : 1);
+ if (this->state == st_copying)
+ {
+ --this->length;
+ flush_encode();
+ this->buf[0] = ch;
+ this->length = 1;
+ }
+ this->state = st_run;
+ this->buf[this->length] = ch;
+ ++this->length;
+ }
+ else
+ {
+ if ((this->length == 128) || (this->state == st_run))
+ {
+ flush_encode();
+ }
+ else if (this->length > 0)
+ {
+ this->state = st_copying;
+ }
+ this->buf[this->length] = ch;
+ ++this->length;
+ }
+ }
+}
+
+void
+Pl_RunLength::decode(unsigned char* data, size_t len)
+{
+ for (size_t i = 0; i < len; ++i)
+ {
+ unsigned char ch = data[i];
+ switch (this->state)
+ {
+ case st_top:
+ if (ch < 128)
+ {
+ // length represents remaining number of bytes to copy
+ this->length = 1 + ch;
+ this->state = st_copying;
+ }
+ else if (ch > 128)
+ {
+ // length represents number of copies of next byte
+ this->length = 257 - ch;
+ this->state = st_run;
+ }
+ else // ch == 128
+ {
+ // EOD; stay in this state
+ }
+ break;
+
+ case st_copying:
+ this->getNext()->write(&ch, 1);
+ if (--this->length == 0)
+ {
+ this->state = st_top;
+ }
+ break;
+
+ case st_run:
+ for (unsigned int j = 0; j < this->length; ++j)
+ {
+ this->getNext()->write(&ch, 1);
+ }
+ this->state = st_top;
+ break;
+ }
+ }
+}
+
+void
+Pl_RunLength::flush_encode()
+{
+ if (this->length == 128)
+ {
+ QTC::TC("libtests", "Pl_RunLength flush full buffer",
+ (this->state == st_copying ? 0 :
+ this->state == st_run ? 1 :
+ -1));
+ }
+ if (this->length == 0)
+ {
+ QTC::TC("libtests", "Pl_RunLength flush empty buffer");
+ }
+ if (this->state == st_run)
+ {
+ if ((this->length < 2) || (this->length > 128))
+ {
+ throw std::logic_error(
+ "Pl_RunLength: invalid length in flush_encode for run");
+ }
+ unsigned char ch = static_cast<unsigned char>(257 - this->length);
+ this->getNext()->write(&ch, 1);
+ this->getNext()->write(&this->buf[0], 1);
+ }
+ else if (this->length > 0)
+ {
+ unsigned char ch = static_cast<unsigned char>(this->length - 1);
+ this->getNext()->write(&ch, 1);
+ this->getNext()->write(this->buf, this->length);
+ }
+ this->state = st_top;
+ this->length = 0;
+}
+
+void
+Pl_RunLength::finish()
+{
+ // When decoding, we might have read a length byte not followed by
+ // data, which means the stream was terminated early, but we will
+ // just ignore this case since this is the only sensible thing to
+ // do.
+ if (this->action == a_encode)
+ {
+ flush_encode();
+ unsigned char ch = 128;
+ this->getNext()->write(&ch, 1);
+ }
+ this->getNext()->finish();
+}
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
index b8cf1dbc..9a8652a6 100644
--- a/libqpdf/build.mk
+++ b/libqpdf/build.mk
@@ -28,6 +28,7 @@ SRCS_libqpdf = \
libqpdf/Pl_PNGFilter.cc \
libqpdf/Pl_QPDFTokenizer.cc \
libqpdf/Pl_RC4.cc \
+ libqpdf/Pl_RunLength.cc \
libqpdf/Pl_SHA2.cc \
libqpdf/Pl_StdioFile.cc \
libqpdf/QPDF.cc \
diff --git a/libtests/build.mk b/libtests/build.mk
index f779d211..7977c8c5 100644
--- a/libtests/build.mk
+++ b/libtests/build.mk
@@ -14,6 +14,7 @@ BINS_libtests = \
qutil \
random \
rc4 \
+ runlength \
sha2
TARGETS_libtests = $(foreach B,$(BINS_libtests),libtests/$(OUTPUT_DIR)/$(call binname,$(B)))
diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov
index a5fe625f..01ca9efe 100644
--- a/libtests/libtests.testcov
+++ b/libtests/libtests.testcov
@@ -24,3 +24,6 @@ InputSource start_chars matched but not check 0
InputSource not enough bytes 0
InputSource findLast found more than one 0
InputSource found match at buf[0] 0
+Pl_RunLength: switch to run 1
+Pl_RunLength flush full buffer 1
+Pl_RunLength flush empty buffer 0
diff --git a/libtests/qtest/runlength.test b/libtests/qtest/runlength.test
new file mode 100644
index 00000000..26b6155d
--- /dev/null
+++ b/libtests/qtest/runlength.test
@@ -0,0 +1,75 @@
+#!/usr/bin/env perl
+require 5.008;
+use warnings;
+use strict;
+
+chdir("runlength") or die "chdir testdir failed: $!\n";
+
+require TestDriver;
+
+my $td = new TestDriver('runlength');
+
+cleanup();
+
+my @files = (
+ "01", # basic case, ends with copy
+ "02", # basic case, ends with run
+ "03", # long run run
+ "04", # ends with copy, length % 128 == 0
+ "05", # run starts at byte 128
+ "empty", # empty file
+ );
+
+# Create this rather than committing an empty file, which always looks
+# like an error.
+open(F, ">empty");
+close(F);
+
+foreach my $f (@files)
+{
+ $td->runtest("encode $f",
+ {$td->COMMAND => "runlength -encode $f a"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ $td->runtest("check encoded output",
+ {$td->FILE => "a"},
+ {$td->FILE => "$f.encoded"});
+ $td->runtest("decode $f.encoded",
+ {$td->COMMAND => "runlength -decode $f.encoded a"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+ $td->runtest("check decoded output",
+ {$td->FILE => "a"},
+ {$td->FILE => "$f"});
+}
+
+concatenate("01.encoded", "02.encoded", "concat.encoded");
+concatenate("01", "02", "concat");
+
+$td->runtest("decode with embedded EOD",
+ {$td->COMMAND => "runlength -decode concat.encoded a"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("check decoded output",
+ {$td->FILE => "a"},
+ {$td->FILE => "concat"});
+
+cleanup();
+
+$td->report(2 + (4 * scalar(@files)));
+
+sub cleanup
+{
+ system("rm -f a concat concat.encoded empty");
+}
+
+sub concatenate
+{
+ my ($a, $b, $out) = @_;
+ open(F, ">$out");
+ foreach my $f ($a, $b)
+ {
+ local $/ = undef;
+ open(G, "<$f");
+ print F <G>;
+ close(G);
+ }
+ close(F);
+}
diff --git a/libtests/qtest/runlength/01 b/libtests/qtest/runlength/01
new file mode 100644
index 00000000..c39144df
--- /dev/null
+++ b/libtests/qtest/runlength/01
@@ -0,0 +1 @@
+wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstv \ No newline at end of file
diff --git a/libtests/qtest/runlength/01.encoded b/libtests/qtest/runlength/01.encoded
new file mode 100644
index 00000000..fc26c6f6
--- /dev/null
+++ b/libtests/qtest/runlength/01.encoded
@@ -0,0 +1 @@
+čwüqrstv€ \ No newline at end of file
diff --git a/libtests/qtest/runlength/02 b/libtests/qtest/runlength/02
new file mode 100644
index 00000000..b5d6cfc8
--- /dev/null
+++ b/libtests/qtest/runlength/02
@@ -0,0 +1 @@
+wwwwwwwwwwwwwwwwwwwwwwwwwqqqqqrstvxxxxxxxxxxxxxxxxxxxxxxx \ No newline at end of file
diff --git a/libtests/qtest/runlength/02.encoded b/libtests/qtest/runlength/02.encoded
new file mode 100644
index 00000000..85bb323e
--- /dev/null
+++ b/libtests/qtest/runlength/02.encoded
@@ -0,0 +1 @@
+čwüqrstvęx€ \ No newline at end of file
diff --git a/libtests/qtest/runlength/03 b/libtests/qtest/runlength/03
new file mode 100644
index 00000000..b8dbdb28
--- /dev/null
+++ b/libtests/qtest/runlength/03
Binary files differ
diff --git a/libtests/qtest/runlength/03.encoded b/libtests/qtest/runlength/03.encoded
new file mode 100644
index 00000000..0fac6b58
--- /dev/null
+++ b/libtests/qtest/runlength/03.encoded
Binary files differ
diff --git a/libtests/qtest/runlength/04 b/libtests/qtest/runlength/04
new file mode 100644
index 00000000..9ad537ac
--- /dev/null
+++ b/libtests/qtest/runlength/04
@@ -0,0 +1 @@
+wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab \ No newline at end of file
diff --git a/libtests/qtest/runlength/04.encoded b/libtests/qtest/runlength/04.encoded
new file mode 100644
index 00000000..0c97dde2
--- /dev/null
+++ b/libtests/qtest/runlength/04.encoded
@@ -0,0 +1 @@
+wabababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababab€ \ No newline at end of file
diff --git a/libtests/qtest/runlength/05 b/libtests/qtest/runlength/05
new file mode 100644
index 00000000..979bf52b
--- /dev/null
+++ b/libtests/qtest/runlength/05
@@ -0,0 +1 @@
+ababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababbbbbb \ No newline at end of file
diff --git a/libtests/qtest/runlength/05.encoded b/libtests/qtest/runlength/05.encoded
new file mode 100644
index 00000000..2a645eef
--- /dev/null
+++ b/libtests/qtest/runlength/05.encoded
@@ -0,0 +1 @@
+~abababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababababaűb€ \ No newline at end of file
diff --git a/libtests/qtest/runlength/empty.encoded b/libtests/qtest/runlength/empty.encoded
new file mode 100644
index 00000000..5416677b
--- /dev/null
+++ b/libtests/qtest/runlength/empty.encoded
@@ -0,0 +1 @@
+€ \ No newline at end of file
diff --git a/libtests/runlength.cc b/libtests/runlength.cc
new file mode 100644
index 00000000..3dfe0563
--- /dev/null
+++ b/libtests/runlength.cc
@@ -0,0 +1,47 @@
+#include <qpdf/Pl_RunLength.hh>
+#include <qpdf/Pl_StdioFile.hh>
+#include <qpdf/QUtil.hh>
+
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <stdlib.h>
+
+int main(int argc, char* argv[])
+{
+ if (argc != 4)
+ {
+ std::cerr << "Usage: runlength {-encode|-decode} infile outfile"
+ << std::endl;
+ exit(2);
+ }
+
+ bool encode = (strcmp("-encode", argv[1]) == 0);
+ char* infilename = argv[2];
+ char* outfilename = argv[3];
+
+ FILE* infile = QUtil::safe_fopen(infilename, "rb");
+ FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
+ Pl_StdioFile out("stdout", outfile);
+ unsigned char buf[100];
+ bool done = false;
+ Pl_RunLength rl(
+ "runlength", &out,
+ (encode ? Pl_RunLength::a_encode : Pl_RunLength::a_decode));
+ while (! done)
+ {
+ size_t len = fread(buf, 1, sizeof(buf), infile);
+ if (len <= 0)
+ {
+ done = true;
+ }
+ else
+ {
+ rl.write(buf, len);
+ }
+ }
+ rl.finish();
+ fclose(infile);
+ fclose(outfile);
+ return 0;
+}