From 1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 7 May 2022 11:12:15 -0400 Subject: Implement JSON v2 for Stream Not fully exercised in this commit --- TODO | 2 + include/qpdf/Constants.h | 6 ++ include/qpdf/QPDFObjectHandle.hh | 43 ++++++++-- libqpdf/QPDFObjectHandle.cc | 13 +++ libqpdf/QPDF_Stream.cc | 118 +++++++++++++++++++++++++- libqpdf/qpdf/QPDF_Stream.hh | 6 ++ qpdf/qtest/qpdf/direct-pages-json-objects.out | 4 +- qpdf/qtest/qpdf/direct-pages-json-pages.out | 4 +- qpdf/qtest/qpdf/page_api_2-json-objects.out | 8 +- qpdf/qtest/qpdf/page_api_2-json-pages.out | 8 +- 10 files changed, 199 insertions(+), 13 deletions(-) diff --git a/TODO b/TODO index db022f5a..94aa2dec 100644 --- a/TODO +++ b/TODO @@ -63,6 +63,8 @@ General things to remember: * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. +* Test stream with invalid data + * Consider using camelCase in multi-word key names to be consistent with job JSON and with how JSON is often represented in languages that use it more natively. diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index c50a9563..babf215c 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e { qpdf_dl_specialized, /* also decode other non-lossy filters */ qpdf_dl_all /* also decode lossy filters */ }; +/* For JSON encoding */ +enum qpdf_stream_data_json_e { + qpdf_sj_none = 0, + qpdf_sj_inline, + qpdf_sj_file, +}; /* R3 Encryption Parameters */ diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 82f4e365..eb16ad39 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1339,8 +1339,8 @@ class QPDFObjectHandle // unambiguous. The getStreamJSON() call can be used to add // encoding of the stream's data. // * Object types that are only valid in content streams (inline - // image, operator) as well as "reserved" objects are not - // representable and will be serialized as "null". + // image, operator) are serialized as "null". Attempting to + // serialize a "reserved" object is an error. // If dereference_indirect is true and this is an indirect object, // show the actual contents of the object. The effect of // dereference_indirect applies only to this object. It is not @@ -1350,9 +1350,42 @@ class QPDFObjectHandle // Deprecated version uses v1 for backward compatibility. // ABI: remove for qpdf 12 - [[deprecated("Use getJSON(int version)")]] - QPDF_DLL - JSON getJSON(bool dereference_indirect = false); + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON + getJSON(bool dereference_indirect = false); + + // This method can be called on a stream to get a more extended + // JSON representation of the stream that includes the stream's + // data. The JSON object returned is always a dictionary whose + // "dict" key is an encoding of the stream's dictionary. The + // representation of the data is determined by the json_data + // field. + // + // The json_data field may have the value qpdf_sj_none, + // qpdf_sj_inline, or qpdf_sj_file. + // + // If json_data is qpdf_sj_none, stream data is not represented. + // + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream + // data is filtered or not based on the value of decode_level, + // which has the same meaning as with pipeStreamData. + // + // If json_data is qpdf_sj_inline, the base64-encoded stream data + // is included in the "data" field of the dictionary that is + // returned. + // + // If json_data is qpdf_sj_file, then the Pipeline ("p") and + // data_filename argument must be supplied. The value of + // data_filename is stored in the resulting json in the "datafile" + // key but is not otherwise use. The stream data itself (raw or + // filtered depending on decode level), is written to the + // pipeline via pipeStreamData(). + QPDF_DLL + JSON getStreamJSON( + int json_version, + qpdf_stream_data_json_e json_data, + qpdf_stream_decode_level_e decode_level, + Pipeline* p, + std::string const& data_filename); // Legacy helper methods for commonly performed operations on // pages. Newer code should use QPDFPageObjectHelper instead. The diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 33155097..1d6a9ccf 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) } } +JSON +QPDFObjectHandle::getStreamJSON( + int json_version, + qpdf_stream_data_json_e json_data, + qpdf_stream_decode_level_e decode_level, + Pipeline* p, + std::string const& data_filename) +{ + assertStream(); + return dynamic_cast(obj.get())->getStreamJSON( + json_version, json_data, decode_level, p, data_filename); +} + QPDFObjectHandle QPDFObjectHandle::wrapInArray() { diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 8940b7cf..67a3ad0d 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -2,8 +2,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -54,6 +56,18 @@ namespace return nullptr; } }; + + class StreamBlobProvider + { + public: + StreamBlobProvider( + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level); + void operator()(Pipeline*); + + private: + QPDF_Stream* stream; + qpdf_stream_decode_level_e decode_level; + }; } // namespace std::map QPDF_Stream::filter_abbreviations = { @@ -81,6 +95,19 @@ std::map()>> {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, }; +StreamBlobProvider::StreamBlobProvider( + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) : + stream(stream), + decode_level(decode_level) +{ +} + +void +StreamBlobProvider::operator()(Pipeline* p) +{ + this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false); +} + QPDF_Stream::QPDF_Stream( QPDF* qpdf, int objid, @@ -153,8 +180,95 @@ QPDF_Stream::unparse() JSON QPDF_Stream::getJSON(int json_version) { - // QXXXQ - return this->stream_dict.getJSON(json_version); + if (json_version == 1) { + return this->stream_dict.getJSON(json_version); + } + return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, ""); +} + +JSON +QPDF_Stream::getStreamJSON( + int json_version, + qpdf_stream_data_json_e json_data, + qpdf_stream_decode_level_e decode_level, + Pipeline* p, + std::string const& data_filename) +{ + switch (json_data) { + case qpdf_sj_none: + case qpdf_sj_inline: + if (p != nullptr) { + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should " + "only be suppiled json_data is file"); + } + break; + case qpdf_sj_file: + if (p == nullptr) { + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must " + "be be suppiled json_data is file"); + } + if (data_filename.empty()) { + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " + "must be supplied when json_data is file"); + } + break; + } + + auto dict = this->stream_dict; + JSON result = JSON::makeDictionary(); + if (json_data != qpdf_sj_none) { + std::shared_ptr buf; + bool filtered = false; + bool filter = (decode_level != qpdf_dl_none); + for (int attempt = 1; attempt <= 2; ++attempt) { + Pl_Discard discard; + std::shared_ptr buf_pl; + Pipeline* data_pipeline = nullptr; + if (json_data == qpdf_sj_file) { + // We need to capture the data to write + buf_pl = std::make_shared("stream data"); + data_pipeline = buf_pl.get(); + } else { + data_pipeline = &discard; + } + filtered = pipeStreamData( + data_pipeline, nullptr, 0, decode_level, false, (attempt == 1)); + if (filter && (!filtered)) { + // Try again + filter = false; + } else { + if (buf_pl.get()) { + buf = buf_pl->getBufferSharedPointer(); + } + break; + } + } + // We can use unsafeShallowCopy because we are only + // touching top-level keys. + dict = this->stream_dict.unsafeShallowCopy(); + dict.removeKey("/Length"); + if (filtered) { + dict.removeKey("/Filter"); + dict.removeKey("/DecodeParms"); + } + if (json_data == qpdf_sj_file) { + result.addDictionaryMember( + "datafile", JSON::makeString(data_filename)); + if (!buf.get()) { + throw std::logic_error( + "QPDF_Stream: failed to get stream data in json file mode"); + } + p->write(buf->getBuffer(), buf->getSize()); + } else if (json_data == qpdf_sj_inline) { + result.addDictionaryMember( + "data", JSON::makeBlob(StreamBlobProvider(this, decode_level))); + } else { + throw std::logic_error( + "QPDF_Stream: unexpected value of json_data"); + } + } + result.addDictionaryMember("dict", dict.getJSON(json_version)); + return result; } QPDFObject::object_type_e diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 5d8de669..fcf98ffa 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject QPDFObjectHandle const& decode_parms); void addTokenFilter(std::shared_ptr token_filter); + JSON getStreamJSON( + int json_version, + qpdf_stream_data_json_e json_data, + qpdf_stream_decode_level_e decode_level, + Pipeline* p, + std::string const& data_filename); void replaceDict(QPDFObjectHandle const& new_dict); diff --git a/qpdf/qtest/qpdf/direct-pages-json-objects.out b/qpdf/qtest/qpdf/direct-pages-json-objects.out index 1e0fe469..a7cf4e96 100644 --- a/qpdf/qtest/qpdf/direct-pages-json-objects.out +++ b/qpdf/qtest/qpdf/direct-pages-json-objects.out @@ -49,7 +49,9 @@ "/Type": "/Pages" }, "3 0 R": { - "/Length": "4 0 R" + "dict": { + "/Length": "4 0 R" + } }, "4 0 R": 44, "5 0 R": { diff --git a/qpdf/qtest/qpdf/direct-pages-json-pages.out b/qpdf/qtest/qpdf/direct-pages-json-pages.out index d58aafb1..4ebc4d29 100644 --- a/qpdf/qtest/qpdf/direct-pages-json-pages.out +++ b/qpdf/qtest/qpdf/direct-pages-json-pages.out @@ -39,7 +39,9 @@ "/Type": "/Pages" }, "3 0 R": { - "/Length": "4 0 R" + "dict": { + "/Length": "4 0 R" + } }, "4 0 R": 44, "5 0 R": { diff --git a/qpdf/qtest/qpdf/page_api_2-json-objects.out b/qpdf/qtest/qpdf/page_api_2-json-objects.out index 995a00e4..3fc137ac 100644 --- a/qpdf/qtest/qpdf/page_api_2-json-objects.out +++ b/qpdf/qtest/qpdf/page_api_2-json-objects.out @@ -62,7 +62,9 @@ "/Type": "/Page" }, "6 0 R": { - "/Length": "7 0 R" + "dict": { + "/Length": "7 0 R" + } }, "7 0 R": 47, "8 0 R": { @@ -72,7 +74,9 @@ "/Type": "/Font" }, "9 0 R": { - "/Length": "10 0 R" + "dict": { + "/Length": "10 0 R" + } }, "10 0 R": 47, "trailer": { diff --git a/qpdf/qtest/qpdf/page_api_2-json-pages.out b/qpdf/qtest/qpdf/page_api_2-json-pages.out index caf27100..c4b7632c 100644 --- a/qpdf/qtest/qpdf/page_api_2-json-pages.out +++ b/qpdf/qtest/qpdf/page_api_2-json-pages.out @@ -94,7 +94,9 @@ "/Type": "/Page" }, "6 0 R": { - "/Length": "7 0 R" + "dict": { + "/Length": "7 0 R" + } }, "7 0 R": 47, "8 0 R": { @@ -104,7 +106,9 @@ "/Type": "/Font" }, "9 0 R": { - "/Length": "10 0 R" + "dict": { + "/Length": "10 0 R" + } }, "10 0 R": 47, "11 0 R": { -- cgit v1.2.3-70-g09d2