aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDF_String.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf/QPDF_String.cc')
-rw-r--r--libqpdf/QPDF_String.cc178
1 files changed, 178 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
new file mode 100644
index 00000000..cc8ca042
--- /dev/null
+++ b/libqpdf/QPDF_String.cc
@@ -0,0 +1,178 @@
+
+#include <qpdf/QPDF_String.hh>
+
+#include <qpdf/QUtil.hh>
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+#include <string.h>
+
+// See above about ctype.
+static bool is_iso_latin1_printable(unsigned char ch)
+{
+ return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
+}
+
+QPDF_String::QPDF_String(std::string const& val) :
+ val(val)
+{
+}
+
+QPDF_String::~QPDF_String()
+{
+}
+
+std::string
+QPDF_String::unparse()
+{
+ return unparse(false);
+}
+
+std::string
+QPDF_String::unparse(bool force_binary)
+{
+ bool use_hexstring = force_binary;
+ if (! use_hexstring)
+ {
+ unsigned int nonprintable = 0;
+ int consecutive_printable = 0;
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ char ch = this->val[i];
+ // Note: do not use locale to determine printability. The PDF
+ // specification accepts arbitrary binary data. Some locales
+ // imply multibyte characters. We'll consider something
+ // printable if it is printable in ISO-Latin-1. We'll code
+ // this manually rather than being rude and setting locale.
+ if ((ch == 0) || (! (is_iso_latin1_printable(ch) ||
+ strchr("\n\r\t\b\f", ch))))
+ {
+ ++nonprintable;
+ consecutive_printable = 0;
+ }
+ else
+ {
+ if (++consecutive_printable > 5)
+ {
+ // If there are more than 5 consecutive printable
+ // characters, I want to see them as such.
+ nonprintable = 0;
+ break;
+ }
+ }
+ }
+
+ // Use hex notation if more than 20% of the characters are not
+ // printable in the current locale. Uniformly distributed random
+ // characters will not pass this test even with ISO-Latin-1 in
+ // which 76% are either printable or in the set of standard
+ // escaped characters.
+ if (5 * nonprintable > val.length())
+ {
+ use_hexstring = true;
+ }
+ }
+ std::string result;
+ if (use_hexstring)
+ {
+ result += "<";
+ char num[3];
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ sprintf(num, "%02x", (unsigned char) this->val[i]);
+ result += num;
+ }
+ result += ">";
+ }
+ else
+ {
+ result += "(";
+ char num[5];
+ for (unsigned int i = 0; i < this->val.length(); ++i)
+ {
+ char ch = this->val[i];
+ switch (ch)
+ {
+ case '\n':
+ result += "\\n";
+ break;
+
+ case '\r':
+ result += "\\r";
+ break;
+
+ case '\t':
+ result += "\\t";
+ break;
+
+ case '\b':
+ result += "\\b";
+ break;
+
+ case '\f':
+ result += "\\f";
+ break;
+
+ case '(':
+ result += "\\(";
+ break;
+
+ case ')':
+ result += "\\)";
+ break;
+
+ case '\\':
+ result += "\\\\";
+ break;
+
+ default:
+ if (is_iso_latin1_printable(ch))
+ {
+ result += this->val[i];
+ }
+ else
+ {
+ sprintf(num, "\\%03o", (unsigned char)ch);
+ result += num;
+ }
+ break;
+ }
+ }
+ result += ")";
+ }
+
+ return result;
+}
+
+std::string
+QPDF_String::getVal() const
+{
+ return this->val;
+}
+
+std::string
+QPDF_String::getUTF8Val() const
+{
+ std::string result;
+ unsigned int len = this->val.length();
+ if ((len >= 2) && (len % 2 == 0) &&
+ (this->val[0] == '\xfe') && (this->val[1] == '\xff'))
+ {
+ // This is a Unicode string using big-endian UTF-16. This
+ // code is not actually correct as it doesn't properly handle
+ // characters past 0xffff.
+ for (unsigned int i = 2; i < len; i += 2)
+ {
+ result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
+ ((unsigned char) this->val[i+1]));
+ }
+ }
+ else
+ {
+ for (unsigned int i = 0; i < len; ++i)
+ {
+ result += QUtil::toUTF8((unsigned char) this->val[i]);
+ }
+ }
+ return result;
+}