diff options
author | Jay Berkenbilt <ejb@ql.org> | 2008-04-29 14:55:25 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2008-04-29 14:55:25 +0200 |
commit | 9a0b88bf7777c153dc46ace22db74ef24d51583a (patch) | |
tree | f567ac1cf2bf5071a611eb49323a935b6ac938ff /libqpdf/QPDF_String.cc | |
download | qpdf-9a0b88bf7777c153dc46ace22db74ef24d51583a.tar.zst |
update release date to actual daterelease-qpdf-2.0
git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649
Diffstat (limited to 'libqpdf/QPDF_String.cc')
-rw-r--r-- | libqpdf/QPDF_String.cc | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc new file mode 100644 index 00000000..cc8ca042 --- /dev/null +++ b/libqpdf/QPDF_String.cc @@ -0,0 +1,178 @@ + +#include <qpdf/QPDF_String.hh> + +#include <qpdf/QUtil.hh> +// DO NOT USE ctype -- it is locale dependent for some things, and +// it's not worth the risk of including it in case it may accidentally +// be used. +#include <string.h> + +// See above about ctype. +static bool is_iso_latin1_printable(unsigned char ch) +{ + return (((ch >= 32) && (ch <= 126)) || (ch >= 160)); +} + +QPDF_String::QPDF_String(std::string const& val) : + val(val) +{ +} + +QPDF_String::~QPDF_String() +{ +} + +std::string +QPDF_String::unparse() +{ + return unparse(false); +} + +std::string +QPDF_String::unparse(bool force_binary) +{ + bool use_hexstring = force_binary; + if (! use_hexstring) + { + unsigned int nonprintable = 0; + int consecutive_printable = 0; + for (unsigned int i = 0; i < this->val.length(); ++i) + { + char ch = this->val[i]; + // Note: do not use locale to determine printability. The PDF + // specification accepts arbitrary binary data. Some locales + // imply multibyte characters. We'll consider something + // printable if it is printable in ISO-Latin-1. We'll code + // this manually rather than being rude and setting locale. + if ((ch == 0) || (! (is_iso_latin1_printable(ch) || + strchr("\n\r\t\b\f", ch)))) + { + ++nonprintable; + consecutive_printable = 0; + } + else + { + if (++consecutive_printable > 5) + { + // If there are more than 5 consecutive printable + // characters, I want to see them as such. + nonprintable = 0; + break; + } + } + } + + // Use hex notation if more than 20% of the characters are not + // printable in the current locale. Uniformly distributed random + // characters will not pass this test even with ISO-Latin-1 in + // which 76% are either printable or in the set of standard + // escaped characters. + if (5 * nonprintable > val.length()) + { + use_hexstring = true; + } + } + std::string result; + if (use_hexstring) + { + result += "<"; + char num[3]; + for (unsigned int i = 0; i < this->val.length(); ++i) + { + sprintf(num, "%02x", (unsigned char) this->val[i]); + result += num; + } + result += ">"; + } + else + { + result += "("; + char num[5]; + for (unsigned int i = 0; i < this->val.length(); ++i) + { + char ch = this->val[i]; + switch (ch) + { + case '\n': + result += "\\n"; + break; + + case '\r': + result += "\\r"; + break; + + case '\t': + result += "\\t"; + break; + + case '\b': + result += "\\b"; + break; + + case '\f': + result += "\\f"; + break; + + case '(': + result += "\\("; + break; + + case ')': + result += "\\)"; + break; + + case '\\': + result += "\\\\"; + break; + + default: + if (is_iso_latin1_printable(ch)) + { + result += this->val[i]; + } + else + { + sprintf(num, "\\%03o", (unsigned char)ch); + result += num; + } + break; + } + } + result += ")"; + } + + return result; +} + +std::string +QPDF_String::getVal() const +{ + return this->val; +} + +std::string +QPDF_String::getUTF8Val() const +{ + std::string result; + unsigned int len = this->val.length(); + if ((len >= 2) && (len % 2 == 0) && + (this->val[0] == '\xfe') && (this->val[1] == '\xff')) + { + // This is a Unicode string using big-endian UTF-16. This + // code is not actually correct as it doesn't properly handle + // characters past 0xffff. + for (unsigned int i = 2; i < len; i += 2) + { + result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) + + ((unsigned char) this->val[i+1])); + } + } + else + { + for (unsigned int i = 0; i < len; ++i) + { + result += QUtil::toUTF8((unsigned char) this->val[i]); + } + } + return result; +} |