aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-05-07 17:12:15 +0200
committerJay Berkenbilt <ejb@ql.org>2022-05-08 19:45:20 +0200
commit1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c (patch)
treef60184566462e0c5df996ca54bc8f6ba3f3a356f
parent3246923cf2189554f7c348ebf51c9774c09deec8 (diff)
downloadqpdf-1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c.tar.zst
Implement JSON v2 for Stream
Not fully exercised in this commit
-rw-r--r--TODO2
-rw-r--r--include/qpdf/Constants.h6
-rw-r--r--include/qpdf/QPDFObjectHandle.hh43
-rw-r--r--libqpdf/QPDFObjectHandle.cc13
-rw-r--r--libqpdf/QPDF_Stream.cc118
-rw-r--r--libqpdf/qpdf/QPDF_Stream.hh6
-rw-r--r--qpdf/qtest/qpdf/direct-pages-json-objects.out4
-rw-r--r--qpdf/qtest/qpdf/direct-pages-json-pages.out4
-rw-r--r--qpdf/qtest/qpdf/page_api_2-json-objects.out8
-rw-r--r--qpdf/qtest/qpdf/page_api_2-json-pages.out8
10 files changed, 199 insertions, 13 deletions
diff --git a/TODO b/TODO
index db022f5a..94aa2dec 100644
--- a/TODO
+++ b/TODO
@@ -63,6 +63,8 @@ General things to remember:
* Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
+* Test stream with invalid data
+
* Consider using camelCase in multi-word key names to be consistent
with job JSON and with how JSON is often represented in languages
that use it more natively.
diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h
index c50a9563..babf215c 100644
--- a/include/qpdf/Constants.h
+++ b/include/qpdf/Constants.h
@@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
qpdf_dl_specialized, /* also decode other non-lossy filters */
qpdf_dl_all /* also decode lossy filters */
};
+/* For JSON encoding */
+enum qpdf_stream_data_json_e {
+ qpdf_sj_none = 0,
+ qpdf_sj_inline,
+ qpdf_sj_file,
+};
/* R3 Encryption Parameters */
diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh
index 82f4e365..eb16ad39 100644
--- a/include/qpdf/QPDFObjectHandle.hh
+++ b/include/qpdf/QPDFObjectHandle.hh
@@ -1339,8 +1339,8 @@ class QPDFObjectHandle
// unambiguous. The getStreamJSON() call can be used to add
// encoding of the stream's data.
// * Object types that are only valid in content streams (inline
- // image, operator) as well as "reserved" objects are not
- // representable and will be serialized as "null".
+ // image, operator) are serialized as "null". Attempting to
+ // serialize a "reserved" object is an error.
// If dereference_indirect is true and this is an indirect object,
// show the actual contents of the object. The effect of
// dereference_indirect applies only to this object. It is not
@@ -1350,9 +1350,42 @@ class QPDFObjectHandle
// Deprecated version uses v1 for backward compatibility.
// ABI: remove for qpdf 12
- [[deprecated("Use getJSON(int version)")]]
- QPDF_DLL
- JSON getJSON(bool dereference_indirect = false);
+ [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
+ getJSON(bool dereference_indirect = false);
+
+ // This method can be called on a stream to get a more extended
+ // JSON representation of the stream that includes the stream's
+ // data. The JSON object returned is always a dictionary whose
+ // "dict" key is an encoding of the stream's dictionary. The
+ // representation of the data is determined by the json_data
+ // field.
+ //
+ // The json_data field may have the value qpdf_sj_none,
+ // qpdf_sj_inline, or qpdf_sj_file.
+ //
+ // If json_data is qpdf_sj_none, stream data is not represented.
+ //
+ // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
+ // data is filtered or not based on the value of decode_level,
+ // which has the same meaning as with pipeStreamData.
+ //
+ // If json_data is qpdf_sj_inline, the base64-encoded stream data
+ // is included in the "data" field of the dictionary that is
+ // returned.
+ //
+ // If json_data is qpdf_sj_file, then the Pipeline ("p") and
+ // data_filename argument must be supplied. The value of
+ // data_filename is stored in the resulting json in the "datafile"
+ // key but is not otherwise use. The stream data itself (raw or
+ // filtered depending on decode level), is written to the
+ // pipeline via pipeStreamData().
+ QPDF_DLL
+ JSON getStreamJSON(
+ int json_version,
+ qpdf_stream_data_json_e json_data,
+ qpdf_stream_decode_level_e decode_level,
+ Pipeline* p,
+ std::string const& data_filename);
// Legacy helper methods for commonly performed operations on
// pages. Newer code should use QPDFPageObjectHelper instead. The
diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc
index 33155097..1d6a9ccf 100644
--- a/libqpdf/QPDFObjectHandle.cc
+++ b/libqpdf/QPDFObjectHandle.cc
@@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
}
}
+JSON
+QPDFObjectHandle::getStreamJSON(
+ int json_version,
+ qpdf_stream_data_json_e json_data,
+ qpdf_stream_decode_level_e decode_level,
+ Pipeline* p,
+ std::string const& data_filename)
+{
+ assertStream();
+ return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
+ json_version, json_data, decode_level, p, data_filename);
+}
+
QPDFObjectHandle
QPDFObjectHandle::wrapInArray()
{
diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc
index 8940b7cf..67a3ad0d 100644
--- a/libqpdf/QPDF_Stream.cc
+++ b/libqpdf/QPDF_Stream.cc
@@ -2,8 +2,10 @@
#include <qpdf/ContentNormalizer.hh>
#include <qpdf/Pipeline.hh>
+#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Count.hh>
+#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QIntC.hh>
@@ -54,6 +56,18 @@ namespace
return nullptr;
}
};
+
+ class StreamBlobProvider
+ {
+ public:
+ StreamBlobProvider(
+ QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
+ void operator()(Pipeline*);
+
+ private:
+ QPDF_Stream* stream;
+ qpdf_stream_decode_level_e decode_level;
+ };
} // namespace
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
@@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>>
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
};
+StreamBlobProvider::StreamBlobProvider(
+ QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
+ stream(stream),
+ decode_level(decode_level)
+{
+}
+
+void
+StreamBlobProvider::operator()(Pipeline* p)
+{
+ this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
+}
+
QPDF_Stream::QPDF_Stream(
QPDF* qpdf,
int objid,
@@ -153,8 +180,95 @@ QPDF_Stream::unparse()
JSON
QPDF_Stream::getJSON(int json_version)
{
- // QXXXQ
- return this->stream_dict.getJSON(json_version);
+ if (json_version == 1) {
+ return this->stream_dict.getJSON(json_version);
+ }
+ return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
+}
+
+JSON
+QPDF_Stream::getStreamJSON(
+ int json_version,
+ qpdf_stream_data_json_e json_data,
+ qpdf_stream_decode_level_e decode_level,
+ Pipeline* p,
+ std::string const& data_filename)
+{
+ switch (json_data) {
+ case qpdf_sj_none:
+ case qpdf_sj_inline:
+ if (p != nullptr) {
+ throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
+ "only be suppiled json_data is file");
+ }
+ break;
+ case qpdf_sj_file:
+ if (p == nullptr) {
+ throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
+ "be be suppiled json_data is file");
+ }
+ if (data_filename.empty()) {
+ throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
+ "must be supplied when json_data is file");
+ }
+ break;
+ }
+
+ auto dict = this->stream_dict;
+ JSON result = JSON::makeDictionary();
+ if (json_data != qpdf_sj_none) {
+ std::shared_ptr<Buffer> buf;
+ bool filtered = false;
+ bool filter = (decode_level != qpdf_dl_none);
+ for (int attempt = 1; attempt <= 2; ++attempt) {
+ Pl_Discard discard;
+ std::shared_ptr<Pl_Buffer> buf_pl;
+ Pipeline* data_pipeline = nullptr;
+ if (json_data == qpdf_sj_file) {
+ // We need to capture the data to write
+ buf_pl = std::make_shared<Pl_Buffer>("stream data");
+ data_pipeline = buf_pl.get();
+ } else {
+ data_pipeline = &discard;
+ }
+ filtered = pipeStreamData(
+ data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
+ if (filter && (!filtered)) {
+ // Try again
+ filter = false;
+ } else {
+ if (buf_pl.get()) {
+ buf = buf_pl->getBufferSharedPointer();
+ }
+ break;
+ }
+ }
+ // We can use unsafeShallowCopy because we are only
+ // touching top-level keys.
+ dict = this->stream_dict.unsafeShallowCopy();
+ dict.removeKey("/Length");
+ if (filtered) {
+ dict.removeKey("/Filter");
+ dict.removeKey("/DecodeParms");
+ }
+ if (json_data == qpdf_sj_file) {
+ result.addDictionaryMember(
+ "datafile", JSON::makeString(data_filename));
+ if (!buf.get()) {
+ throw std::logic_error(
+ "QPDF_Stream: failed to get stream data in json file mode");
+ }
+ p->write(buf->getBuffer(), buf->getSize());
+ } else if (json_data == qpdf_sj_inline) {
+ result.addDictionaryMember(
+ "data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
+ } else {
+ throw std::logic_error(
+ "QPDF_Stream: unexpected value of json_data");
+ }
+ }
+ result.addDictionaryMember("dict", dict.getJSON(json_version));
+ return result;
}
QPDFObject::object_type_e
diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh
index 5d8de669..fcf98ffa 100644
--- a/libqpdf/qpdf/QPDF_Stream.hh
+++ b/libqpdf/qpdf/QPDF_Stream.hh
@@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& decode_parms);
void
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
+ JSON getStreamJSON(
+ int json_version,
+ qpdf_stream_data_json_e json_data,
+ qpdf_stream_decode_level_e decode_level,
+ Pipeline* p,
+ std::string const& data_filename);
void replaceDict(QPDFObjectHandle const& new_dict);
diff --git a/qpdf/qtest/qpdf/direct-pages-json-objects.out b/qpdf/qtest/qpdf/direct-pages-json-objects.out
index 1e0fe469..a7cf4e96 100644
--- a/qpdf/qtest/qpdf/direct-pages-json-objects.out
+++ b/qpdf/qtest/qpdf/direct-pages-json-objects.out
@@ -49,7 +49,9 @@
"/Type": "/Pages"
},
"3 0 R": {
- "/Length": "4 0 R"
+ "dict": {
+ "/Length": "4 0 R"
+ }
},
"4 0 R": 44,
"5 0 R": {
diff --git a/qpdf/qtest/qpdf/direct-pages-json-pages.out b/qpdf/qtest/qpdf/direct-pages-json-pages.out
index d58aafb1..4ebc4d29 100644
--- a/qpdf/qtest/qpdf/direct-pages-json-pages.out
+++ b/qpdf/qtest/qpdf/direct-pages-json-pages.out
@@ -39,7 +39,9 @@
"/Type": "/Pages"
},
"3 0 R": {
- "/Length": "4 0 R"
+ "dict": {
+ "/Length": "4 0 R"
+ }
},
"4 0 R": 44,
"5 0 R": {
diff --git a/qpdf/qtest/qpdf/page_api_2-json-objects.out b/qpdf/qtest/qpdf/page_api_2-json-objects.out
index 995a00e4..3fc137ac 100644
--- a/qpdf/qtest/qpdf/page_api_2-json-objects.out
+++ b/qpdf/qtest/qpdf/page_api_2-json-objects.out
@@ -62,7 +62,9 @@
"/Type": "/Page"
},
"6 0 R": {
- "/Length": "7 0 R"
+ "dict": {
+ "/Length": "7 0 R"
+ }
},
"7 0 R": 47,
"8 0 R": {
@@ -72,7 +74,9 @@
"/Type": "/Font"
},
"9 0 R": {
- "/Length": "10 0 R"
+ "dict": {
+ "/Length": "10 0 R"
+ }
},
"10 0 R": 47,
"trailer": {
diff --git a/qpdf/qtest/qpdf/page_api_2-json-pages.out b/qpdf/qtest/qpdf/page_api_2-json-pages.out
index caf27100..c4b7632c 100644
--- a/qpdf/qtest/qpdf/page_api_2-json-pages.out
+++ b/qpdf/qtest/qpdf/page_api_2-json-pages.out
@@ -94,7 +94,9 @@
"/Type": "/Page"
},
"6 0 R": {
- "/Length": "7 0 R"
+ "dict": {
+ "/Length": "7 0 R"
+ }
},
"7 0 R": 47,
"8 0 R": {
@@ -104,7 +106,9 @@
"/Type": "/Font"
},
"9 0 R": {
- "/Length": "10 0 R"
+ "dict": {
+ "/Length": "10 0 R"
+ }
},
"10 0 R": 47,
"11 0 R": {