1 files changed, 178 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc
new file mode 100644
index 00000000..cc8ca042
--- /dev/null
+++ b/libqpdf/QPDF_String.cc
@@ -0,0 +1,178 @@
+
+#include <qpdf/QPDF_String.hh>
+
+#include <qpdf/QUtil.hh>
+// DO NOT USE ctype -- it is locale dependent for some things, and
+// it's not worth the risk of including it in case it may accidentally
+// be used.
+#include <string.h>
+
+// See above about ctype.
+static bool is_iso_latin1_printable(unsigned char ch)
+{
+    return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
+}
+
+QPDF_String::QPDF_String(std::string const& val) :
+    val(val)
+{
+}
+
+QPDF_String::~QPDF_String()
+{
+}
+
+std::string
+QPDF_String::unparse()
+{
+    return unparse(false);
+}
+
+std::string
+QPDF_String::unparse(bool force_binary)
+{
+    bool use_hexstring = force_binary;
+    if (! use_hexstring)
+    {
+	unsigned int nonprintable = 0;
+	int consecutive_printable = 0;
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    char ch = this->val[i];
+	    // Note: do not use locale to determine printability.  The PDF
+	    // specification accepts arbitrary binary data.  Some locales
+	    // imply multibyte characters.  We'll consider something
+	    // printable if it is printable in ISO-Latin-1.  We'll code
+	    // this manually rather than being rude and setting locale.
+	    if ((ch == 0) || (! (is_iso_latin1_printable(ch) ||
+				 strchr("\n\r\t\b\f", ch))))
+	    {
+		++nonprintable;
+		consecutive_printable = 0;
+	    }
+	    else
+	    {
+		if (++consecutive_printable > 5)
+		{
+		    // If there are more than 5 consecutive printable
+		    // characters, I want to see them as such.
+		    nonprintable = 0;
+		    break;
+		}
+	    }
+	}
+
+	// Use hex notation if more than 20% of the characters are not
+	// printable in the current locale.  Uniformly distributed random
+	// characters will not pass this test even with ISO-Latin-1 in
+	// which 76% are either printable or in the set of standard
+	// escaped characters.
+	if (5 * nonprintable > val.length())
+	{
+	    use_hexstring = true;
+	}
+    }
+    std::string result;
+    if (use_hexstring)
+    {
+	result += "<";
+	char num[3];
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    sprintf(num, "%02x", (unsigned char) this->val[i]);
+	    result += num;
+	}
+	result += ">";
+    }
+    else
+    {
+	result += "(";
+	char num[5];
+	for (unsigned int i = 0; i < this->val.length(); ++i)
+	{
+	    char ch = this->val[i];
+	    switch (ch)
+	    {
+	      case '\n':
+		result += "\\n";
+		break;
+
+	      case '\r':
+		result += "\\r";
+		break;
+
+	      case '\t':
+		result += "\\t";
+		break;
+
+	      case '\b':
+		result += "\\b";
+		break;
+
+	      case '\f':
+		result += "\\f";
+		break;
+
+	      case '(':
+		result += "\\(";
+		break;
+
+	      case ')':
+		result += "\\)";
+		break;
+
+	      case '\\':
+		result += "\\\\";
+		break;
+
+	      default:
+		if (is_iso_latin1_printable(ch))
+		{
+		    result += this->val[i];
+		}
+		else
+		{
+		    sprintf(num, "\\%03o", (unsigned char)ch);
+		    result += num;
+		}
+		break;
+	    }
+	}
+	result += ")";
+    }
+
+    return result;
+}
+
+std::string
+QPDF_String::getVal() const
+{
+    return this->val;
+}
+
+std::string
+QPDF_String::getUTF8Val() const
+{
+    std::string result;
+    unsigned int len = this->val.length();
+    if ((len >= 2) && (len % 2 == 0) &&
+	(this->val[0] == '\xfe') && (this->val[1] == '\xff'))
+    {
+	// This is a Unicode string using big-endian UTF-16.  This
+	// code is not actually correct as it doesn't properly handle
+	// characters past 0xffff.
+	for (unsigned int i = 2; i < len; i += 2)
+	{
+	    result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
+				    ((unsigned char) this->val[i+1]));
+	}
+    }
+    else
+    {
+	for (unsigned int i = 0; i < len; ++i)
+	{
+	    result += QUtil::toUTF8((unsigned char) this->val[i]);
+	}
+    }
+    return result;
+}