aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-05-02 21:46:07 +0200
committerJay Berkenbilt <ejb@ql.org>2022-05-04 00:31:22 +0200
commit3d9bac43da5937235c962a53e68475f796c370aa (patch)
treebc05d9b248a0a5bad0f5ae56c490823c701cff21
parentf07284da18d1f326420efcf672b9c7c4d4232566 (diff)
downloadqpdf-3d9bac43da5937235c962a53e68475f796c370aa.tar.zst
Add internal Pl_Base64
Bidirectional base64; will be used by JSON v2.
-rw-r--r--TODO1
-rw-r--r--libqpdf/CMakeLists.txt1
-rw-r--r--libqpdf/Pl_Base64.cc191
-rw-r--r--libqpdf/qpdf/Pl_Base64.hh30
-rw-r--r--libtests/CMakeLists.txt1
-rw-r--r--libtests/base64.cc81
-rw-r--r--libtests/qtest/base64.test58
-rw-r--r--libtests/qtest/base64/1.dec1
-rw-r--r--libtests/qtest/base64/1.enc1
-rw-r--r--libtests/qtest/base64/2.dec1
-rw-r--r--libtests/qtest/base64/2.enc1
-rw-r--r--libtests/qtest/base64/3.dec1
-rw-r--r--libtests/qtest/base64/3.enc1
-rw-r--r--libtests/qtest/base64/4.dec1
-rw-r--r--libtests/qtest/base64/4.enc1
-rw-r--r--libtests/qtest/base64/5.decbin0 -> 350 bytes
-rw-r--r--libtests/qtest/base64/5.enc1
17 files changed, 371 insertions, 1 deletions
diff --git a/TODO b/TODO
index ef70b7ac..14b7cfcb 100644
--- a/TODO
+++ b/TODO
@@ -45,7 +45,6 @@ notes from 5/2:
Need new pipelines:
* Pl_OStream(std::ostream) with semantics like Pl_StdioFile
* Pl_String to std::string with semantics like Pl_Buffer
-* Pl_Base64
New Pipeline methods:
* writeString(std::string const&)
diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt
index 305977de..72b87975 100644
--- a/libqpdf/CMakeLists.txt
+++ b/libqpdf/CMakeLists.txt
@@ -35,6 +35,7 @@ set(libqpdf_SOURCES
Pl_AES_PDF.cc
Pl_ASCII85Decoder.cc
Pl_ASCIIHexDecoder.cc
+ Pl_Base64.cc
Pl_Buffer.cc
Pl_Concatenate.cc
Pl_Count.cc
diff --git a/libqpdf/Pl_Base64.cc b/libqpdf/Pl_Base64.cc
new file mode 100644
index 00000000..bfacc1db
--- /dev/null
+++ b/libqpdf/Pl_Base64.cc
@@ -0,0 +1,191 @@
+#include <qpdf/Pl_Base64.hh>
+
+#include <qpdf/QIntC.hh>
+#include <qpdf/QUtil.hh>
+#include <algorithm>
+#include <cstring>
+#include <stdexcept>
+
+static char
+to_c(unsigned int ch)
+{
+ return static_cast<char>(ch);
+}
+
+static unsigned char
+to_uc(int ch)
+{
+ return static_cast<unsigned char>(ch);
+}
+
+static int
+to_i(int i)
+{
+ return static_cast<int>(i);
+}
+
+Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
+ Pipeline(identifier, next),
+ action(action),
+ pos(0),
+ end_of_data(false),
+ finished(false)
+{
+ reset();
+}
+
+void
+Pl_Base64::write(unsigned char* data, size_t len)
+{
+ if (finished) {
+ throw std::logic_error("Pl_Base64 used after finished");
+ }
+ if (this->action == a_decode) {
+ decode(data, len);
+ } else {
+ encode(data, len);
+ }
+}
+
+void
+Pl_Base64::decode(unsigned char* data, size_t len)
+{
+ unsigned char* p = data;
+ while (len > 0) {
+ if (!QUtil::is_space(to_c(*p))) {
+ this->buf[this->pos++] = *p;
+ if (this->pos == 4) {
+ flush();
+ }
+ }
+ ++p;
+ --len;
+ }
+}
+
+void
+Pl_Base64::encode(unsigned char* data, size_t len)
+{
+ unsigned char* p = data;
+ while (len > 0) {
+ this->buf[this->pos++] = *p;
+ if (this->pos == 3) {
+ flush();
+ }
+ ++p;
+ --len;
+ }
+}
+
+void
+Pl_Base64::flush()
+{
+ if (this->action == a_decode) {
+ flush_decode();
+ } else {
+ flush_encode();
+ }
+ reset();
+}
+
+void
+Pl_Base64::flush_decode()
+{
+ if (this->end_of_data) {
+ throw std::runtime_error(
+ getIdentifier() + ": base64 decode: data follows pad characters");
+ }
+ int pad = 0;
+ int shift = 18;
+ int outval = 0;
+ for (size_t i = 0; i < 4; ++i) {
+ int v = 0;
+ char ch = to_c(this->buf[i]);
+ if ((ch >= 'A') && (ch <= 'Z')) {
+ v = ch - 'A';
+ } else if ((ch >= 'a') && (ch <= 'z')) {
+ v = ch - 'a' + 26;
+ } else if ((ch >= '0') && (ch <= '9')) {
+ v = ch - '0' + 52;
+ } else if ((ch == '+') || (ch == '-')) {
+ v = 62;
+ } else if ((ch == '/') || (ch == '_')) {
+ v = 63;
+ } else if (
+ (ch == '=') && ((i == 3) || ((i == 2) && (this->buf[3] == '=')))) {
+ ++pad;
+ this->end_of_data = true;
+ v = 0;
+ } else {
+ throw std::runtime_error(
+ getIdentifier() + ": base64 decode: invalid input");
+ }
+ outval |= v << shift;
+ shift -= 6;
+ }
+ unsigned char out[3] = {
+ to_uc(outval >> 16),
+ to_uc(0xff & (outval >> 8)),
+ to_uc(0xff & outval),
+ };
+
+ getNext()->write(out, QIntC::to_size(3 - pad));
+}
+
+void
+Pl_Base64::flush_encode()
+{
+ int outval = ((this->buf[0] << 16) | (this->buf[1] << 8) | (this->buf[2]));
+ unsigned char out[4] = {
+ to_uc(outval >> 18),
+ to_uc(0x3f & (outval >> 12)),
+ to_uc(0x3f & (outval >> 6)),
+ to_uc(0x3f & outval),
+ };
+ for (size_t i = 0; i < 4; ++i) {
+ int ch = to_i(out[i]);
+ if (ch < 26) {
+ ch += 'A';
+ } else if (ch < 52) {
+ ch -= 26;
+ ch += 'a';
+ } else if (ch < 62) {
+ ch -= 52;
+ ch += '0';
+ } else if (ch == 62) {
+ ch = '+';
+ } else if (ch == 63) {
+ ch = '/';
+ }
+ out[i] = to_uc(ch);
+ }
+ for (size_t i = 0; i < 3 - this->pos; ++i) {
+ out[3 - i] = '=';
+ }
+ getNext()->write(out, 4);
+}
+
+void
+Pl_Base64::finish()
+{
+ if (this->pos > 0) {
+ if (finished) {
+ throw std::logic_error("Pl_Base64 used after finished");
+ }
+ if (this->action == a_decode) {
+ for (size_t i = this->pos; i < 4; ++i) {
+ this->buf[i] = '=';
+ }
+ }
+ flush();
+ }
+ this->finished = true;
+ getNext()->finish();
+}
+
+void
+Pl_Base64::reset()
+{
+ this->pos = 0;
+ memset(buf, 0, 4);
+}
diff --git a/libqpdf/qpdf/Pl_Base64.hh b/libqpdf/qpdf/Pl_Base64.hh
new file mode 100644
index 00000000..313bd2cb
--- /dev/null
+++ b/libqpdf/qpdf/Pl_Base64.hh
@@ -0,0 +1,30 @@
+#ifndef PL_BASE64_HH
+#define PL_BASE64_HH
+
+#include <qpdf/Pipeline.hh>
+
+class Pl_Base64: public Pipeline
+{
+ public:
+ enum action_e { a_encode, a_decode };
+ Pl_Base64(char const* identifier, Pipeline* next, action_e);
+ virtual ~Pl_Base64() = default;
+ virtual void write(unsigned char* buf, size_t len) override;
+ virtual void finish() override;
+
+ private:
+ void decode(unsigned char* buf, size_t len);
+ void encode(unsigned char* buf, size_t len);
+ void flush();
+ void flush_decode();
+ void flush_encode();
+ void reset();
+
+ action_e action;
+ unsigned char buf[4];
+ size_t pos;
+ bool end_of_data;
+ bool finished;
+};
+
+#endif // PL_BASE64_HH
diff --git a/libtests/CMakeLists.txt b/libtests/CMakeLists.txt
index 96f93482..9eb9a490 100644
--- a/libtests/CMakeLists.txt
+++ b/libtests/CMakeLists.txt
@@ -3,6 +3,7 @@ set(TEST_PROGRAMS
aes
arg_parser
ascii85
+ base64
bits
buffer
closed_file_input_source
diff --git a/libtests/base64.cc b/libtests/base64.cc
new file mode 100644
index 00000000..66f2d828
--- /dev/null
+++ b/libtests/base64.cc
@@ -0,0 +1,81 @@
+#include <qpdf/Pl_Base64.hh>
+
+#include <qpdf/Pl_StdioFile.hh>
+#include <qpdf/QUtil.hh>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <stdexcept>
+
+static bool
+write_some(FILE* f, size_t bytes, Pipeline* p)
+{
+ unsigned char buf[1000];
+ assert(bytes <= sizeof(buf));
+ size_t len = fread(buf, 1, bytes, f);
+ if (len > 0) {
+ p->write(buf, len);
+ }
+ if (len < bytes) {
+ if (ferror(f)) {
+ std::cerr << "error reading file" << std::endl;
+ exit(2);
+ }
+ p->finish();
+ return false;
+ }
+ return (len == bytes);
+}
+
+static void
+usage()
+{
+ std::cerr << "Usage: base64 encode|decode" << std::endl;
+ exit(2);
+}
+
+int
+main(int argc, char* argv[])
+{
+ if (argc != 2) {
+ usage();
+ }
+ QUtil::binary_stdout();
+ QUtil::binary_stdin();
+ Pl_Base64::action_e action = Pl_Base64::a_decode;
+ if (strcmp(argv[1], "encode") == 0) {
+ action = Pl_Base64::a_encode;
+ } else if (strcmp(argv[1], "decode") != 0) {
+ usage();
+ }
+
+ try {
+ Pl_StdioFile out("stdout", stdout);
+ Pl_Base64 decode("decode", &out, action);
+ // The comments are "n: n%4 n%3", where n is the number of
+ // bytes read at the end of the call, and are there to
+ // indicate that we are reading in chunks that exercise
+ // various boundary conditions around subsequent writes and
+ // the state of buf and pos. There are some writes that don't
+ // do flush at all, some that call flush multiple times, and
+ // some that start in the middle and do flush, and this is
+ // true for both encode and decode.
+ if (write_some(stdin, 1, &decode) && // 1: 1 1
+ write_some(stdin, 4, &decode) && // 5: 1 2
+ write_some(stdin, 2, &decode) && // 7: 3 1
+ write_some(stdin, 2, &decode) && // 9: 1 0
+ write_some(stdin, 7, &decode) && // 16: 0 1
+ write_some(stdin, 1, &decode) && // 17: 1 2
+ write_some(stdin, 9, &decode) && // 26: 2 2
+ write_some(stdin, 2, &decode)) { // 28: 0 1
+ while (write_some(stdin, 1000, &decode)) {
+ }
+ }
+ } catch (std::exception& e) {
+ std::cout << "exception: " << e.what() << std::endl;
+ exit(2);
+ }
+
+ return 0;
+}
diff --git a/libtests/qtest/base64.test b/libtests/qtest/base64.test
new file mode 100644
index 00000000..9e709c73
--- /dev/null
+++ b/libtests/qtest/base64.test
@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+require 5.008;
+use warnings;
+use strict;
+
+chdir("base64") or die "chdir testdir failed: $!\n";
+
+require TestDriver;
+
+my $td = new TestDriver('base64');
+
+cleanup();
+
+# ** Do not use normalize newlines on these tests. **
+
+my $n = 5;
+for (my $i = 1; $i <= $n; ++$i)
+{
+ $td->runtest("encode $i",
+ {$td->COMMAND => "base64 encode < $i.dec"},
+ {$td->FILE => "$i.enc", $td->EXIT_STATUS => 0});
+ $td->runtest("code $i",
+ {$td->COMMAND => "base64 decode < $i.enc"},
+ {$td->FILE => "$i.dec", $td->EXIT_STATUS => 0});
+}
+
+$td->runtest("non-zero discard bits",
+ {$td->COMMAND => "echo c2FsYWR= | base64 decode"},
+ {$td->STRING => "salad", $td->EXIT_STATUS => 0});
+$td->runtest("write with +/",
+ {$td->COMMAND => "echo +/== | base64 decode > a"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("write with -_",
+ {$td->COMMAND => "echo -_== | base64 decode > b"},
+ {$td->STRING => "", $td->EXIT_STATUS => 0});
+$td->runtest("interchangeability of +/ and -_",
+ {$td->FILE => "a"},
+ {$td->FILE => "b"});
+
+$td->runtest("invalid characters",
+ {$td->COMMAND => "echo aaaaa! | base64 decode"},
+ {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
+$td->runtest("invalid pad",
+ {$td->COMMAND => "echo a= | base64 decode"},
+ {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
+$td->runtest("data after pad",
+ {$td->COMMAND => "echo aa==potato | base64 decode"},
+ {$td->REGEXP => ".*data follows pad characters.*",
+ $td->EXIT_STATUS => 2});
+
+cleanup();
+
+$td->report(7 + (2 * $n));
+
+sub cleanup
+{
+ unlink('a', 'b');
+}
diff --git a/libtests/qtest/base64/1.dec b/libtests/qtest/base64/1.dec
new file mode 100644
index 00000000..d800886d
--- /dev/null
+++ b/libtests/qtest/base64/1.dec
@@ -0,0 +1 @@
+123 \ No newline at end of file
diff --git a/libtests/qtest/base64/1.enc b/libtests/qtest/base64/1.enc
new file mode 100644
index 00000000..e644af9e
--- /dev/null
+++ b/libtests/qtest/base64/1.enc
@@ -0,0 +1 @@
+MTIz \ No newline at end of file
diff --git a/libtests/qtest/base64/2.dec b/libtests/qtest/base64/2.dec
new file mode 100644
index 00000000..274c0052
--- /dev/null
+++ b/libtests/qtest/base64/2.dec
@@ -0,0 +1 @@
+1234 \ No newline at end of file
diff --git a/libtests/qtest/base64/2.enc b/libtests/qtest/base64/2.enc
new file mode 100644
index 00000000..9c06465b
--- /dev/null
+++ b/libtests/qtest/base64/2.enc
@@ -0,0 +1 @@
+MTIzNA== \ No newline at end of file
diff --git a/libtests/qtest/base64/3.dec b/libtests/qtest/base64/3.dec
new file mode 100644
index 00000000..11d0d991
--- /dev/null
+++ b/libtests/qtest/base64/3.dec
@@ -0,0 +1 @@
+This file has a multiple of four bytes and is longer than four bytes...
diff --git a/libtests/qtest/base64/3.enc b/libtests/qtest/base64/3.enc
new file mode 100644
index 00000000..6dd9347a
--- /dev/null
+++ b/libtests/qtest/base64/3.enc
@@ -0,0 +1 @@
+VGhpcyBmaWxlIGhhcyBhIG11bHRpcGxlIG9mIGZvdXIgYnl0ZXMgYW5kIGlzIGxvbmdlciB0aGFuIGZvdXIgYnl0ZXMuLi4K \ No newline at end of file
diff --git a/libtests/qtest/base64/4.dec b/libtests/qtest/base64/4.dec
new file mode 100644
index 00000000..a5e2af49
--- /dev/null
+++ b/libtests/qtest/base64/4.dec
@@ -0,0 +1 @@
+This file has a non-multiple of four bytes and is longer than four bytes.
diff --git a/libtests/qtest/base64/4.enc b/libtests/qtest/base64/4.enc
new file mode 100644
index 00000000..e43b9c1f
--- /dev/null
+++ b/libtests/qtest/base64/4.enc
@@ -0,0 +1 @@
+VGhpcyBmaWxlIGhhcyBhIG5vbi1tdWx0aXBsZSBvZiBmb3VyIGJ5dGVzIGFuZCBpcyBsb25nZXIgdGhhbiBmb3VyIGJ5dGVzLgo= \ No newline at end of file
diff --git a/libtests/qtest/base64/5.dec b/libtests/qtest/base64/5.dec
new file mode 100644
index 00000000..ed99eb1a
--- /dev/null
+++ b/libtests/qtest/base64/5.dec
Binary files differ
diff --git a/libtests/qtest/base64/5.enc b/libtests/qtest/base64/5.enc
new file mode 100644
index 00000000..e399235c
--- /dev/null
+++ b/libtests/qtest/base64/5.enc
@@ -0,0 +1 @@
+VGhpcyBmaWxlIGFzIG9uZSBvZiBldmVyeSBieXRlIGluIGl0LiBOb3QgdGhhdCBpdCByZWFsbHkgbWFrZXMgYW55CmRpZmZlcmVuY2UsIGJ1dCB3aHkgbm90LgoKAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wo= \ No newline at end of file