aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorm-holger <m-holger@kubitscheck.org>2024-02-10 13:03:28 +0100
committerm-holger <m-holger@kubitscheck.org>2024-02-16 11:52:44 +0100
commit431987475b392daf4094570565881e1ebfc9528a (patch)
treef0e6fc17e4abd1a6908d2b799847949b6f4cdfd7
parente2737ab646bff6aa07ba72e0cc15cc955d9afcc0 (diff)
downloadqpdf-431987475b392daf4094570565881e1ebfc9528a.tar.zst
Add new method QPDF_Name::analyzeJSONEncoding
Provide a custom method to check whether a name is valid utf8. Integrate checking for characters that need to be escaped in JSON.
-rw-r--r--libqpdf/QPDF_Dictionary.cc31
-rw-r--r--libqpdf/QPDF_Name.cc73
-rw-r--r--libqpdf/qpdf/QPDF_Name.hh5
3 files changed, 78 insertions, 31 deletions
diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc
index 53d78a2b..ca7fa04a 100644
--- a/libqpdf/QPDF_Dictionary.cc
+++ b/libqpdf/QPDF_Dictionary.cc
@@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version)
if (json_version == 1) {
j.addDictionaryMember(
QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
+ } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
+ j.addDictionaryMember(iter.first, iter.second.getJSON(json_version));
} else {
- bool has_8bit_chars;
- bool is_valid_utf8;
- bool is_utf16;
- QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
- std::string key = !has_8bit_chars || is_valid_utf8
- ? iter.first
- : "n:" + QPDF_Name::normalizeName(iter.first);
- j.addDictionaryMember(key, iter.second.getJSON(json_version));
+ j.addDictionaryMember(
+ "n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
}
}
}
@@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p)
if (!iter.second.isNull()) {
p.writeNext();
if (json_version == 1) {
- p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": ";
- } else {
- bool has_8bit_chars;
- bool is_valid_utf8;
- bool is_utf16;
- QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
- if (!has_8bit_chars || is_valid_utf8) {
- p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
+ p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
+ << "\": ";
+ } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
+ if (res.second) {
+ p << "\"" << iter.first << "\": ";
} else {
- p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
- << "\": ";
+ p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
}
+ } else {
+ p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
+ << "\": ";
}
iter.second.writeJSON(json_version, p);
}
diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc
index 458b1428..04614769 100644
--- a/libqpdf/QPDF_Name.cc
+++ b/libqpdf/QPDF_Name.cc
@@ -3,6 +3,8 @@
#include <qpdf/JSON_writer.hh>
#include <qpdf/QUtil.hh>
+#include <string_view>
+
QPDF_Name::QPDF_Name(std::string const& name) :
QPDFValue(::ot_name, "name"),
name(name)
@@ -52,20 +54,65 @@ QPDF_Name::unparse()
return normalizeName(this->name);
}
+std::pair<bool, bool>
+QPDF_Name::analyzeJSONEncoding(const std::string& name)
+{
+ std::basic_string_view<unsigned char> view{
+ reinterpret_cast<const unsigned char*>(name.data()), name.size()};
+
+ int tail = 0; // Number of continuation characters expected.
+ bool tail2 = false; // Potential overlong 3 octet utf-8.
+ bool tail3 = false; // potential overlong 4 octet
+ bool needs_escaping = false;
+ for (auto const& c: view) {
+ if (tail) {
+ if ((c & 0xc0) != 0x80) {
+ return {false, false};
+ }
+ if (tail2) {
+ if ((c & 0xe0) == 0x80) {
+ return {false, false};
+ }
+ tail2 = false;
+ } else if (tail3) {
+ if ((c & 0xf0) == 0x80) {
+ return {false, false};
+ }
+ tail3 = false;
+ }
+ tail--;
+ } else if (c < 0x80) {
+ if (!needs_escaping) {
+ needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33);
+ }
+ } else if ((c & 0xe0) == 0xc0) {
+ if ((c & 0xfe) == 0xc0) {
+ return {false, false};
+ }
+ tail = 1;
+ } else if ((c & 0xf0) == 0xe0) {
+ tail2 = (c == 0xe0);
+ tail = 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ tail3 = (c == 0xf0);
+ tail = 3;
+ } else {
+ return {false, false};
+ }
+ }
+ return {tail == 0, !needs_escaping};
+}
+
JSON
QPDF_Name::getJSON(int json_version)
{
if (json_version == 1) {
return JSON::makeString(normalizeName(this->name));
} else {
- bool has_8bit_chars;
- bool is_valid_utf8;
- bool is_utf16;
- QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
- if (!has_8bit_chars || is_valid_utf8) {
- return JSON::makeString(this->name);
+ if (auto res = analyzeJSONEncoding(name); res.first) {
+ return JSON::makeString(name);
} else {
- return JSON::makeString("n:" + normalizeName(this->name));
+ return JSON::makeString("n:" + normalizeName(name));
}
}
}
@@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer& p)
if (json_version == 1) {
p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
} else {
- bool has_8bit_chars;
- bool is_valid_utf8;
- bool is_utf16;
- QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
- if (!has_8bit_chars || is_valid_utf8) {
- p << "\"" << JSON::Writer::encode_string(name) << "\"";
+ if (auto res = analyzeJSONEncoding(name); res.first) {
+ if (res.second) {
+ p << "\"" << name << "\"";
+ } else {
+ p << "\"" << JSON::Writer::encode_string(name) << "\"";
+ }
} else {
p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
}
diff --git a/libqpdf/qpdf/QPDF_Name.hh b/libqpdf/qpdf/QPDF_Name.hh
index 167ddef5..fd4ac458 100644
--- a/libqpdf/qpdf/QPDF_Name.hh
+++ b/libqpdf/qpdf/QPDF_Name.hh
@@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue
// Put # into strings with characters unsuitable for name token
static std::string normalizeName(std::string const& name);
+
+ // Check whether name is valid utf-8 and whether it contains characters that require escaping.
+ // Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no
+ // characters require or {true, false} if escaping is required.
+ static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name);
std::string
getStringValue() const override
{