aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-12-17 23:40:54 +0100
committerJay Berkenbilt <ejb@ql.org>2018-12-22 01:11:57 +0100
commit6580ffe983aa9c2885555b6b4d7cf68fd3f16301 (patch)
treec1f65c4ec5e7433f94c98ec0f9ffe37951a59b8e
parentfa3664357b6fd23b6d74c6835bbf6c8e911892aa (diff)
downloadqpdf-6580ffe983aa9c2885555b6b4d7cf68fd3f16301.tar.zst
Preliminary implementation of json mode
The json mode implemented in this commit is not the final version, or are the command line arguments used to invoke it.
-rw-r--r--qpdf/qpdf.cc239
1 files changed, 239 insertions, 0 deletions
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index b2945419..8914e601 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -17,6 +17,7 @@
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
+#include <qpdf/QPDFOutlineDocumentHelper.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFWriter.hh>
@@ -117,6 +118,7 @@ struct Options
show_filtered_stream_data(false),
show_pages(false),
show_page_images(false),
+ show_json(false),
check(false),
require_outfile(true),
infilename(0),
@@ -189,6 +191,7 @@ struct Options
bool show_filtered_stream_data;
bool show_pages;
bool show_page_images;
+ bool show_json;
bool check;
std::vector<PageSpec> page_specs;
std::map<std::string, RotationSpec> rotations;
@@ -549,6 +552,75 @@ void usage(std::string const& msg)
exit(EXIT_ERROR);
}
+static JSON json_schema()
+{
+ // This JSON object doubles as a schema and as documentation for
+ // our JSON output. Any schema mismatch is a bug in qpdf. This
+ // helps to enforce our policy of consistently providing a known
+ // structure where every documented key will always be present,
+ // which makes it easier to consume our JSON. This is discussed in
+ // more depth in the manual.
+ JSON schema = JSON::makeDictionary();
+ schema.addDictionaryMember(
+ "version", JSON::makeString(
+ "JSON format serial number; increased for non-compatible changes"));
+ schema.addDictionaryMember(
+ "objects", JSON::makeString(
+ "Original objects; keys are 'trailer' or 'n n R'"));
+ JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
+ addArrayElement(JSON::makeDictionary());
+ page.addDictionaryMember(
+ "object",
+ JSON::makeString("reference to original page object"));
+ JSON image = page.addDictionaryMember("images", JSON::makeArray()).
+ addArrayElement(JSON::makeDictionary());
+ image.addDictionaryMember(
+ "object",
+ JSON::makeString("reference to image stream"));
+ image.addDictionaryMember(
+ "width",
+ JSON::makeString("image width"));
+ image.addDictionaryMember(
+ "height",
+ JSON::makeString("image height"));
+ image.addDictionaryMember("filter", JSON::makeArray()).
+ addArrayElement(
+ JSON::makeString("filters applied to image data"));
+ image.addDictionaryMember("decodeparms", JSON::makeArray()).
+ addArrayElement(
+ JSON::makeString("decode parameters for image data"));
+ image.addDictionaryMember(
+ "filterable",
+ JSON::makeString("whether image data can be decoded"
+ " using the decode level qpdf was invoked with"));
+ page.addDictionaryMember("contents", JSON::makeArray()).
+ addArrayElement(
+ JSON::makeString("reference to each content stream"));
+ page.addDictionaryMember(
+ "label",
+ JSON::makeString("page label dictionary, or null if none"));
+ JSON labels = schema.addDictionaryMember("pagelabels", JSON::makeArray()).
+ addArrayElement(JSON::makeDictionary());
+ labels.addDictionaryMember(
+ "index",
+ JSON::makeString("starting page position starting from zero"));
+ labels.addDictionaryMember(
+ "label",
+ JSON::makeString("page label dictionary"));
+ JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
+ addArrayElement(JSON::makeDictionary());
+ outline.addDictionaryMember(
+ "object",
+ JSON::makeString("reference to outline that targets this page"));
+ outline.addDictionaryMember(
+ "title",
+ JSON::makeString("outline title"));
+ outline.addDictionaryMember(
+ "destination",
+ JSON::makeString("outline destination dictionary"));
+ return schema;
+}
+
static std::string show_bool(bool v)
{
return v ? "allowed" : "not allowed";
@@ -1613,6 +1685,11 @@ static void parse_options(int argc, char* argv[], Options& o)
{
o.show_page_images = true;
}
+ else if (strcmp(arg, "show-json") == 0)
+ {
+ o.show_json = true;
+ o.require_outfile = false;
+ }
else if (strcmp(arg, "check") == 0)
{
o.check = true;
@@ -1884,6 +1961,164 @@ static void do_show_pages(QPDF& pdf, Options& o)
}
}
+static void do_show_json(QPDF& pdf, Options& o)
+{
+ JSON j = JSON::makeDictionary();
+ // This version is updated every time a non-backward-compatible
+ // change is made to the JSON format. Clients of the JSON are to
+ // ignore unrecognized keys, so we only update the version of a
+ // key disappears or if its value changes meaning.
+ j.addDictionaryMember("version", JSON::makeInt(1));
+
+ // Objects
+
+ // Add all objects. Do this first before other code below modifies
+ // things by doing stuff like calling
+ // pushInheritedAttributesToPage.
+ JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary());
+ j_objects.addDictionaryMember("trailer", pdf.getTrailer().getJSON(true));
+ std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
+ for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin();
+ iter != objects.end(); ++iter)
+ {
+ j_objects.addDictionaryMember(
+ (*iter).unparse(), (*iter).getJSON(true));
+ }
+
+ // Pages
+
+ JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray());
+ QPDFPageDocumentHelper dh(pdf);
+ QPDFPageLabelDocumentHelper pldh(pdf);
+ QPDFOutlineDocumentHelper odh(pdf);
+ dh.pushInheritedAttributesToPage();
+ std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
+ size_t pageno = 0;
+ for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
+ iter != pages.end(); ++iter, ++pageno)
+ {
+ JSON j_page = j_pages.addArrayElement(JSON::makeDictionary());
+ QPDFPageObjectHelper& ph(*iter);
+ QPDFObjectHandle page = ph.getObjectHandle();
+ j_page.addDictionaryMember("object", page.getJSON());
+ JSON j_images = j_page.addDictionaryMember(
+ "images", JSON::makeArray());
+ std::map<std::string, QPDFObjectHandle> images =
+ ph.getPageImages();
+ for (std::map<std::string, QPDFObjectHandle>::iterator iter =
+ images.begin();
+ iter != images.end(); ++iter)
+ {
+ JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
+ j_image.addDictionaryMember(
+ "name", JSON::makeString((*iter).first));
+ QPDFObjectHandle image = (*iter).second;
+ QPDFObjectHandle dict = image.getDict();
+ j_image.addDictionaryMember("object", image.getJSON());
+ j_image.addDictionaryMember(
+ "width", dict.getKey("/Width").getJSON());
+ j_image.addDictionaryMember(
+ "height", dict.getKey("/Height").getJSON());
+ QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
+ j_image.addDictionaryMember(
+ "filter", filters.getJSON());
+ QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
+ QPDFObjectHandle dp_array;
+ if (decode_parms.isArray())
+ {
+ dp_array = decode_parms;
+ }
+ else
+ {
+ dp_array = QPDFObjectHandle::newArray();
+ for (int i = 0; i < filters.getArrayNItems(); ++i)
+ {
+ dp_array.appendItem(decode_parms);
+ }
+ }
+ j_image.addDictionaryMember("decodeparms", dp_array.getJSON());
+ j_image.addDictionaryMember(
+ "filterable",
+ JSON::makeBool(
+ image.pipeStreamData(0, 0, o.decode_level, true)));
+ }
+ j_page.addDictionaryMember("images", j_images);
+ JSON j_contents = j_page.addDictionaryMember(
+ "contents", JSON::makeArray());
+ std::vector<QPDFObjectHandle> content = ph.getPageContents();
+ for (std::vector<QPDFObjectHandle>::iterator iter = content.begin();
+ iter != content.end(); ++iter)
+ {
+ j_contents.addArrayElement((*iter).getJSON());
+ }
+ j_page.addDictionaryMember(
+ "label", pldh.getLabelForPage(pageno).getJSON());
+ JSON j_outlines = j_page.addDictionaryMember(
+ "outlines", JSON::makeArray());
+ std::list<QPDFOutlineObjectHelper> outlines =
+ odh.getOutlinesForPage(page.getObjGen());
+ for (std::list<QPDFOutlineObjectHelper>::iterator oiter =
+ outlines.begin();
+ oiter != outlines.end(); ++oiter)
+ {
+ JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
+ j_outline.addDictionaryMember(
+ "object", (*oiter).getObjectHandle().getJSON());
+ j_outline.addDictionaryMember(
+ "title", JSON::makeString((*oiter).getTitle()));
+ j_outline.addDictionaryMember(
+ "destination", (*oiter).getDest().getJSON(true));
+ }
+ }
+
+ // Page labels
+
+ JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray());
+ if (pldh.hasPageLabels())
+ {
+ std::vector<QPDFObjectHandle> labels;
+ pldh.getLabelsForPageRange(0, pages.size() - 1, 0, labels);
+ for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin();
+ iter != labels.end(); ++iter)
+ {
+ std::vector<QPDFObjectHandle>::iterator next = iter;
+ ++next;
+ if (next == labels.end())
+ {
+ // This can't happen, so ignore it. This could only
+ // happen if getLabelsForPageRange somehow returned an
+ // odd number of items.
+ break;
+ }
+ JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
+ j_label.addDictionaryMember("index", (*iter).getJSON());
+ ++iter;
+ j_label.addDictionaryMember("label", (*iter).getJSON());
+ }
+ }
+
+ // Check against schema
+
+ JSON schema = json_schema();
+ std::list<std::string> errors;
+ if (! j.checkSchema(schema, errors))
+ {
+ std::cerr
+ << whoami << " didn't create JSON that complies with its own\n\
+rules. Please report this as a bug at\n\
+ https://github.com/qpdf/qpdf/issues/new\n\
+ideally with the file that caused the error and the output below. Thanks!\n\
+\n";
+ for (std::list<std::string>::iterator iter = errors.begin();
+ iter != errors.end(); ++iter)
+ {
+ std::cerr << (*iter) << std::endl;
+ }
+ }
+
+ std::cout << j.serialize() << std::endl;
+}
+
static void do_inspection(QPDF& pdf, Options& o)
{
int exit_code = 0;
@@ -1891,6 +2126,10 @@ static void do_inspection(QPDF& pdf, Options& o)
{
do_check(pdf, o, exit_code);
}
+ if (o.show_json)
+ {
+ do_show_json(pdf, o);
+ }
if (o.show_npages)
{
QTC::TC("qpdf", "qpdf npages");