aboutsummaryrefslogtreecommitdiffstats
path: root/include/qpdf/JSON.hh
blob: 4b829017ec9835681b20657fb0976d1398929995 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
// Copyright (c) 2005-2022 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.

#ifndef JSON_HH
#define JSON_HH

// This is a simple JSON serializer and parser, primarily designed for
// serializing QPDF Objects as JSON. While it may work as a
// general-purpose JSON parser/serializer, there are better options.
// JSON objects contain their data as smart pointers. When one JSON object
// is added to another, this pointer is copied. This means you can
// create temporary JSON objects on the stack, add them to other
// objects, and let them go out of scope safely. It also means that if
// a JSON object is added in more than one place, all copies
// share the underlying data. This makes them similar in structure and
// behavior to QPDFObjectHandle and may feel natural within the QPDF
// codebase, but it is also a good reason not to use this as a
// general-purpose JSON package.

#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
#include <qpdf/Types.h>

#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

class Pipeline;
class InputSource;

class JSON
{
  public:
    static int constexpr LATEST = 2;
    JSON() = default;

    QPDF_DLL
    std::string unparse() const;

    // Write the JSON object through a pipeline. The `depth` parameter
    // specifies how deeply nested this is in another JSON structure,
    // which makes it possible to write clean-looking JSON
    // incrementally.
    QPDF_DLL
    void write(Pipeline*, size_t depth = 0) const;

    // Helper methods for writing JSON incrementally.
    //
    // "first" -- Several methods take a `bool& first` parameter. The
    // open methods always set it to true, and the methods to output
    // items always set it to false. This way, the item and close
    // methods can always know whether or not a first item is being
    // written. The intended mode of operation is to start with a new
    // `bool first = true` each time a new container is opened and
    // to pass that `first` through to all the methods that are
    // called to add top-level items to the container as well as to
    // close the container. This lets the JSON object use it to keep
    // track of when it's writing a first object and when it's not. If
    // incrementally writing multiple levels of depth, a new `first`
    // should used for each new container that is opened.
    //
    // "depth" -- Indicate the level of depth. This is used for
    // consistent indentation. When writing incrementally, whenever
    // you call a method to add an item to a container, the value of
    // `depth` should be one more than whatever value is passed to the
    // container open and close methods.

    // Open methods ignore the value of first and set it to false
    QPDF_DLL
    static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
    // Close methods don't modify first. A true value indicates that
    // we are closing an empty object.
    QPDF_DLL
    static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
    // The item methods use the value of first to determine if this is
    // the first item and always set it to false.
    QPDF_DLL
    static void writeDictionaryItem(
        Pipeline*,
        bool& first,
        std::string const& key,
        JSON const& value,
        size_t depth = 0);
    // Write just the key of a new dictionary item, useful if writing
    // nested structures. Calls writeNext.
    QPDF_DLL
    static void writeDictionaryKey(
        Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
    QPDF_DLL
    static void writeArrayItem(
        Pipeline*, bool& first, JSON const& element, size_t depth = 0);
    // If writing nested structures incrementally, call writeNext
    // before opening a new array or container in the midst of an
    // existing one. The `first` you pass to writeNext should be the
    // one for the parent object. The depth should be the one for the
    // child object. Then start a new `first` for the nested item.
    // Note that writeDictionaryKey and writeArrayItem call writeNext
    // for you, so this is most important when writing subsequent
    // items or container openers to an array.
    QPDF_DLL
    static void writeNext(Pipeline* p, bool& first, size_t depth = 0);

    // The JSON spec calls dictionaries "objects", but that creates
    // too much confusion when referring to instances of the JSON
    // class.
    QPDF_DLL
    static JSON makeDictionary();
    // addDictionaryMember returns the newly added item.
    QPDF_DLL
    JSON addDictionaryMember(std::string const& key, JSON const&);
    QPDF_DLL
    static JSON makeArray();
    // addArrayElement returns the newly added item.
    QPDF_DLL
    JSON addArrayElement(JSON const&);
    QPDF_DLL
    static JSON makeString(std::string const& utf8);
    QPDF_DLL
    static JSON makeInt(long long int value);
    QPDF_DLL
    static JSON makeReal(double value);
    QPDF_DLL
    static JSON makeNumber(std::string const& encoded);
    QPDF_DLL
    static JSON makeBool(bool value);
    QPDF_DLL
    static JSON makeNull();

    // A blob serializes as a string. The function will be called by
    // JSON with a pipeline and should write binary data to the
    // pipeline but not call finish(). JSON will call finish() at the
    // right time.
    QPDF_DLL
    static JSON makeBlob(std::function<void(Pipeline*)>);

    QPDF_DLL
    bool isArray() const;

    QPDF_DLL
    bool isDictionary() const;

    // If the key is already in the dictionary, return true.
    // Otherwise, mark it as seen and return false. This is primarily
    // intended to be used by the parser to detect duplicate keys when
    // the reactor blocks them from being added to the final
    // dictionary.
    QPDF_DLL
    bool checkDictionaryKeySeen(std::string const& key);

    // Accessors. Accessor behavior:
    //
    // - If argument is wrong type, including null, return false
    // - If argument is right type, return true and initialize the value
    QPDF_DLL
    bool getString(std::string& utf8) const;
    QPDF_DLL
    bool getNumber(std::string& value) const;
    QPDF_DLL
    bool getBool(bool& value) const;
    QPDF_DLL
    bool isNull() const;
    QPDF_DLL
    bool forEachDictItem(
        std::function<void(std::string const& key, JSON value)> fn) const;
    QPDF_DLL
    bool forEachArrayItem(std::function<void(JSON value)> fn) const;

    // Check this JSON object against a "schema". This is not a schema
    // according to any standard. It's just a template of what the
    // JSON is supposed to contain. The checking does the following:
    //
    //   * The schema is a nested structure containing dictionaries,
    //     single-element arrays, and strings only.
    //   * Recursively walk the schema. In the items below, "schema
    //     object" refers to an object in the schema, and "checked
    //     object" refers to the corresponding part of the object
    //     being checked.
    //   * If the schema object is a dictionary, the checked object
    //     must have a dictionary in the same place with the same
    //     keys. If flags contains f_optional, a key in the schema
    //     does not have to be present in the object. Otherwise, all
    //     keys have to be present. Any key in the object must be
    //     present in the schema.
    //   * If the schema object is an array of length 1, the checked
    //     object may either be a single item or an array of items.
    //     The single item or each element of the checked object's
    //     array is validated against the single element of the
    //     schema's array. The rationale behind this logic is that a
    //     single element may appear wherever the schema allows a
    //     variable-length array. This makes it possible to start
    //     allowing an array in the future where a single element was
    //     previously required without breaking backward
    //     compatibility.
    //   * If the schema object is an array of length > 1, the checked
    //     object must be an array of the same length. In this case,
    //     each element of the checked object array is validated
    //     against the corresponding element of the schema array.
    //   * Otherwise, the value must be a string whose value is a
    //     description of the object's corresponding value, which may
    //     have any type.
    //
    // QPDF's JSON output conforms to certain strict compatibility
    // rules as discussed in the manual. The idea is that a JSON
    // structure created manually in qpdf.cc doubles as both JSON help
    // information and a schema for validating the JSON that qpdf
    // generates. Any discrepancies are a bug in qpdf.
    //
    // Flags is a bitwise or of values from check_flags_e.
    enum check_flags_e {
        f_none = 0,
        f_optional = 1 << 0,
    };
    QPDF_DLL
    bool checkSchema(
        JSON schema, unsigned long flags, std::list<std::string>& errors);

    // Same as passing 0 for flags
    QPDF_DLL
    bool checkSchema(JSON schema, std::list<std::string>& errors);

    // An pointer to a Reactor class can be passed to parse, which
    // will enable the caller to react to incremental events in the
    // construction of the JSON object. This makes it possible to
    // implement SAX-like handling of very large JSON objects.
    class QPDF_DLL_CLASS Reactor
    {
      public:
        QPDF_DLL
        virtual ~Reactor() = default;

        // The start/end methods are called when parsing of a
        // dictionary or array is started or ended. The item methods
        // are called when an item is added to a dictionary or array.
        // When adding a container to another container, the item
        // method is called with an empty container before the lower
        // container's start method is called. See important notes in
        // "Item methods" below.

        // During parsing of a JSON string, the parser is operating on
        // a single object at a time. When a dictionary or array is
        // started, a new context begins, and when that dictionary or
        // array is ended, the previous context is resumed. So, for
        // example, if you have `{"a": [1]}`, you will receive the
        // following method calls
        //
        // dictionaryStart -- current object is the top-level dictionary
        // dictionaryItem  -- called with "a" and an empty array
        // arrayStart      -- current object is the array
        // arrayItem       -- called with the "1" object
        // containerEnd    -- now current object is the dictionary again
        // containerEnd    -- current object is undefined
        //
        // If the top-level item in a JSON string is a scalar, the
        // topLevelScalar() method will be called. No argument is
        // passed since the object is the same as what is returned by
        // parse().

        QPDF_DLL
        virtual void dictionaryStart() = 0;
        QPDF_DLL
        virtual void arrayStart() = 0;
        QPDF_DLL
        virtual void containerEnd(JSON const& value) = 0;
        QPDF_DLL
        virtual void topLevelScalar() = 0;

        // Item methods:
        //
        // The return value of the item methods indicate whether the
        // item has been "consumed". If the item method returns true,
        // then the item will not be added to the containing JSON
        // object. This is what allows arbitrarily large JSON objects
        // to be parsed and not have to be kept in memory.
        //
        // NOTE: When a dictionary or an array is added to a
        // container, the dictionaryItem or arrayItem method is called
        // when the child item's start delimiter is encountered, so
        // the JSON object passed in at that time will always be in
        // its initial, empty state. Additionally, the child item's
        // start method is not called until after the parent item's
        // item method is called. This makes it possible to keep track
        // of the current depth level by incrementing level on start
        // methods and decrementing on end methods.

        QPDF_DLL
        virtual bool
        dictionaryItem(std::string const& key, JSON const& value) = 0;
        QPDF_DLL
        virtual bool arrayItem(JSON const& value) = 0;
    };

    // Create a JSON object from a string.
    QPDF_DLL
    static JSON parse(std::string const&);
    // Create a JSON object from an input source. See above for
    // information about how to use the Reactor.
    QPDF_DLL
    static JSON parse(InputSource&, Reactor* reactor = nullptr);

    // parse calls setOffsets to set the inclusive start and
    // non-inclusive end offsets of an object relative to its input
    // string. Otherwise, both values are 0.
    QPDF_DLL
    void setStart(qpdf_offset_t);
    QPDF_DLL
    void setEnd(qpdf_offset_t);
    QPDF_DLL
    qpdf_offset_t getStart() const;
    QPDF_DLL
    qpdf_offset_t getEnd() const;

  private:
    static std::string encode_string(std::string const& utf8);
    static void
    writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter);

    struct JSON_value
    {
        virtual ~JSON_value() = default;
        virtual void write(Pipeline*, size_t depth) const = 0;
    };
    struct JSON_dictionary: public JSON_value
    {
        virtual ~JSON_dictionary() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::map<std::string, std::shared_ptr<JSON_value>> members;
        std::set<std::string> parsed_keys;
    };
    struct JSON_array: public JSON_value
    {
        virtual ~JSON_array() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::vector<std::shared_ptr<JSON_value>> elements;
    };
    struct JSON_string: public JSON_value
    {
        JSON_string(std::string const& utf8);
        virtual ~JSON_string() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::string utf8;
        std::string encoded;
    };
    struct JSON_number: public JSON_value
    {
        JSON_number(long long val);
        JSON_number(double val);
        JSON_number(std::string const& val);
        virtual ~JSON_number() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::string encoded;
    };
    struct JSON_bool: public JSON_value
    {
        JSON_bool(bool val);
        virtual ~JSON_bool() = default;
        virtual void write(Pipeline*, size_t depth) const;
        bool value;
    };
    struct JSON_null: public JSON_value
    {
        virtual ~JSON_null() = default;
        virtual void write(Pipeline*, size_t depth) const;
    };
    struct JSON_blob: public JSON_value
    {
        JSON_blob(std::function<void(Pipeline*)> fn);
        virtual ~JSON_blob() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::function<void(Pipeline*)> fn;
    };

    JSON(std::shared_ptr<JSON_value>);

    static bool checkSchemaInternal(
        JSON_value* this_v,
        JSON_value* sch_v,
        unsigned long flags,
        std::list<std::string>& errors,
        std::string prefix);

    class Members
    {
        friend class JSON;

      public:
        QPDF_DLL
        ~Members() = default;

      private:
        Members(std::shared_ptr<JSON_value>);
        Members(Members const&) = delete;

        std::shared_ptr<JSON_value> value;
        // start and end are only populated for objects created by parse
        qpdf_offset_t start;
        qpdf_offset_t end;
    };

    std::shared_ptr<Members> m;
};

#endif // JSON_HH