From 16f4f94cd99b4d0f633596074e8d9358db135517 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 7 May 2022 07:53:45 -0400 Subject: Prepare code for JSON v2 Update getJSON() methods and calls to them --- include/qpdf/JSON.hh | 2 ++ include/qpdf/QPDFJob.hh | 5 +++++ include/qpdf/QPDFObject.hh | 2 +- include/qpdf/QPDFObjectHandle.hh | 47 +++++++++++++++++++++++++++++++++------- 4 files changed, 47 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index fdacd442..79f2a25f 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -51,6 +51,8 @@ class InputSource; class JSON { public: + static int constexpr LATEST = 2; + QPDF_DLL std::string unparse() const; diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 352348ab..b664ab50 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -520,6 +521,10 @@ class QPDFJob void doJSONAcroform(Pipeline* p, bool& first, QPDF& pdf); void doJSONEncrypt(Pipeline* p, bool& first, QPDF& pdf); void doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf); + void addOutlinesToJson( + std::vector outlines, + JSON& j, + std::map& page_numbers); enum remove_unref_e { re_auto, re_yes, re_no }; diff --git a/include/qpdf/QPDFObject.hh b/include/qpdf/QPDFObject.hh index 43146e53..982cd126 100644 --- a/include/qpdf/QPDFObject.hh +++ b/include/qpdf/QPDFObject.hh @@ -64,7 +64,7 @@ class QPDFObject virtual ~QPDFObject() = default; virtual std::string unparse() = 0; - virtual JSON getJSON() = 0; + virtual JSON getJSON(int json_version) = 0; // Return a unique type code for the object virtual object_type_e getTypeCode() const = 0; diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 77bef52b..82f4e365 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1304,15 +1304,40 @@ class QPDFObjectHandle QPDF_DLL std::string unparseBinary(); - // Return encoded as JSON. For most object types, there is an - // obvious mapping. The JSON is generated as follows: - // * Names are encoded as strings representing the normalized name - // in PDF syntax as returned by unparse() + // Return encoded as JSON. The constant JSON::LATEST can be used + // to specify the latest available JSON version. The JSON is + // generated as follows: + // * Arrays, dictionaries, booleans, nulls, integers, and real + // numbers are represented by their native JSON types. + // * Names are encoded as strings representing the canonical + // representation (after parsing #xx) and preceded by a slash, + // just as unparse() returns. For example, the JSON for the + // PDF-syntax name /Text#2fPlain would be "/Text/Plain". // * Indirect references are encoded as strings containing "obj gen R" - // * Strings are encoded as UTF-8 strings with unrepresentable binary - // characters encoded as \uHHHH - // * Encoding streams just encodes the stream's dictionary; the stream - // data is not represented + // * Strings + // * JSON v1: Strings are encoded as UTF-8 strings with + // unrepresentable binary characters encoded as \uHHHH. + // Characters in PDF Doc encoding that don't have + // bidirectional unicode mappings are not reversible. There is + // no way to tell the difference between a string that looks + // like a name or indirect object from an actual name or + // indirect object. + // * JSON v2: + // * Unicode strings and strings encoded with PDF Doc encoding + // that can be bidrectionally mapped two Unicode (which is + // all strings without undefined characters) are represented + // as "u:" followed by the UTF-8 encoded string. Example: + // "u:potato". + // * All other strings are represented as "b:" followed by a + // hexadecimal encoding of the string. Example: "b:0102cacb" + // * Streams + // * JSON v1: Only the stream's dictionary is encoded. There is + // no way tell a stream from a dictionary other than context. + // * JSON v2: A stream is encoded as {"dict": {...}} with the + // value being the encoding of the stream's dictionary. Since + // "dict" does not otherwise represent anything, this is + // unambiguous. The getStreamJSON() call can be used to add + // encoding of the stream's data. // * Object types that are only valid in content streams (inline // image, operator) as well as "reserved" objects are not // representable and will be serialized as "null". @@ -1321,6 +1346,12 @@ class QPDFObjectHandle // dereference_indirect applies only to this object. It is not // recursive. QPDF_DLL + JSON getJSON(int json_version, bool dereference_indirect = false); + + // Deprecated version uses v1 for backward compatibility. + // ABI: remove for qpdf 12 + [[deprecated("Use getJSON(int version)")]] + QPDF_DLL JSON getJSON(bool dereference_indirect = false); // Legacy helper methods for commonly performed operations on -- cgit v1.2.3-70-g09d2