aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-05-18 01:18:02 +0200
committerJay Berkenbilt <ejb@ql.org>2022-05-20 15:16:25 +0200
commit0fe8d4476205c97e402e555aac41a88e70e3e9b2 (patch)
tree87c9ee190bdfe4deeb8c517a3da6ab6b2a2230eb
parent63c7eefe9db8d8e87d07198355627af01cc1814d (diff)
downloadqpdf-0fe8d4476205c97e402e555aac41a88e70e3e9b2.tar.zst
Support stream data -- not tested
There are no automated tests yet, but committing work so far in preparation for some refactoring.
-rw-r--r--TODO9
-rw-r--r--include/qpdf/QPDF.hh5
-rw-r--r--libqpdf/QPDF_Dictionary.cc7
-rw-r--r--libqpdf/QPDF_Name.cc6
-rw-r--r--libqpdf/QPDF_json.cc68
5 files changed, 77 insertions, 18 deletions
diff --git a/TODO b/TODO
index 64d537a1..f3eaebaf 100644
--- a/TODO
+++ b/TODO
@@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work"
Output JSON v2
==============
-XXX
-
* Reread from perspective of update
* Test all ignore cases with QTC
* Test case of correct file with dict before data/datafile
* Have a test case if possible that exercises the object description
which means we need some kind of semantic error that gets caught
after creation.
+* Test invalid data, invalid data file
+* Tests: round-trip through json, round-trip through qpdf --qdf
Try to never flatten pages tree. Make sure we do something reasonable
with pages tree repair. The problem is that if pages tree repair is
@@ -236,6 +236,11 @@ Other documentation fodder:
You can't create a PDF from v1 json because
+* Change: names are written in canonical form with a leading slash
+ just as they are treated in the code. In v1, they were written in
+ PDF syntax in the json file. Example: /text#2fplain in pdf will be
+ written as /text/plain in json v2 and as /text#2fplain in json v1.
+
* The PDF version header is not recorded
* Strings cannot be unambiguously encoded/decoded
diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh
index f3ce4684..146015dc 100644
--- a/include/qpdf/QPDF.hh
+++ b/include/qpdf/QPDF.hh
@@ -998,7 +998,8 @@ class QPDF
class JSONReactor: public JSON::Reactor
{
public:
- JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
+ JSONReactor(
+ QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete);
virtual ~JSONReactor() = default;
virtual void dictionaryStart() override;
virtual void arrayStart() override;
@@ -1033,7 +1034,7 @@ class QPDF
QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
QPDF& pdf;
- std::string filename;
+ std::shared_ptr<InputSource> is;
bool must_be_complete;
bool errors;
bool parse_error;
diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc
index 67d59a2d..26239317 100644
--- a/libqpdf/QPDF_Dictionary.cc
+++ b/libqpdf/QPDF_Dictionary.cc
@@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version)
JSON j = JSON::makeDictionary();
for (auto& iter: this->items) {
if (!iter.second.isNull()) {
- j.addDictionaryMember(
- QPDF_Name::normalizeName(iter.first),
- iter.second.getJSON(json_version));
+ std::string key =
+ (json_version == 1 ? QPDF_Name::normalizeName(iter.first)
+ : iter.first);
+ j.addDictionaryMember(key, iter.second.getJSON(json_version));
}
}
return j;
diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc
index 8dc48faa..236d6133 100644
--- a/libqpdf/QPDF_Name.cc
+++ b/libqpdf/QPDF_Name.cc
@@ -42,7 +42,11 @@ QPDF_Name::unparse()
JSON
QPDF_Name::getJSON(int json_version)
{
- return JSON::makeString(normalizeName(this->name));
+ if (json_version == 1) {
+ return JSON::makeString(normalizeName(this->name));
+ } else {
+ return JSON::makeString(this->name);
+ }
}
QPDFObject::object_type_e
diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc
index d71c75ba..1037a2cf 100644
--- a/libqpdf/QPDF_json.cc
+++ b/libqpdf/QPDF_json.cc
@@ -1,9 +1,11 @@
#include <qpdf/QPDF.hh>
#include <qpdf/FileInputSource.hh>
+#include <qpdf/Pl_Base64.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
+#include <algorithm>
#include <regex>
// This chart shows an example of the state transitions that would
@@ -52,17 +54,40 @@ static char const* JSON_PDF = (
"9\n"
"%%EOF\n");
+// Note use of [\\s\\S] rather than . to match any character since .
+// doesn't match newlines.
static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
-static std::regex UNICODE_RE("^u:(.*)$");
+static std::regex UNICODE_RE("^u:([\\s\\S]*)$");
static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
-static std::regex NAME_RE("^/.*$");
+static std::regex NAME_RE("^/[\\s\\S]*$");
+
+static std::function<void(Pipeline*)>
+provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end)
+{
+ return [is, start, end](Pipeline* p) {
+ Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
+ p = &decode;
+ size_t bytes = end - start;
+ char buf[8192];
+ is->seek(QIntC::to_offset(start), SEEK_SET);
+ size_t len = 0;
+ while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
+ p->write(buf, len);
+ bytes -= len;
+ if (bytes == 0) {
+ break;
+ }
+ }
+ decode.finish();
+ };
+}
QPDF::JSONReactor::JSONReactor(
- QPDF& pdf, std::string const& filename, bool must_be_complete) :
+ QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
pdf(pdf),
- filename(filename),
+ is(is),
must_be_complete(must_be_complete),
errors(false),
parse_error(false),
@@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
replacement =
pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
replaceObject(tos, replacement);
- replacement.replaceStreamData(
- "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
}
} else {
// Ignore unknown keys for forward compatibility
@@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
throw std::logic_error("no object on stack in st_stream");
}
auto tos = object_stack.back();
+ auto uninitialized = QPDFObjectHandle();
if (!tos.isStream()) {
// QXXXQ QTC in update mode
error(value.getStart(), "this object is not a stream");
@@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
}
} else if (key == "data") {
this->saw_data = true;
- // QXXXQ
+ std::string v;
+ if (!value.getString(v)) {
+ error(value.getStart(), "\"stream.data\" must be a string");
+ } else {
+ // The range includes the quotes.
+ auto start = value.getStart() + 1;
+ auto end = value.getEnd() - 1;
+ if (end < start) {
+ throw std::logic_error("QPDF_json: JSON string length < 0");
+ }
+ tos.replaceStreamData(
+ provide_data(is, start, end), uninitialized, uninitialized);
+ }
} else if (key == "datafile") {
this->saw_datafile = true;
- // QXXXQ
+ std::string filename;
+ if (value.getString(filename)) {
+ tos.replaceStreamData(
+ QUtil::file_provider(filename),
+ uninitialized,
+ uninitialized);
+ } else {
+ error(
+ value.getStart(),
+ "\"stream.datafile\" must be a string containing a file "
+ "name");
+ }
} else {
// Ignore unknown keys for forward compatibility.
// QXXXQ QTC
@@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value)
// QXXXQ include object number in description
result.setObjectDescription(
&this->pdf,
- this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
+ this->is->getName() + " offset " +
+ QUtil::uint_to_string(value.getStart()));
return result;
}
@@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
void
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
{
- JSONReactor reactor(*this, is->getName(), must_be_complete);
+ JSONReactor reactor(*this, is, must_be_complete);
try {
JSON::parse(*is, &reactor);
} catch (std::runtime_error& e) {