From 27e8d4bbfffef1072043ef21725ab85eabaee63b Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 17 Oct 2009 17:31:52 +0000 Subject: tweak when we decide to use hex strings vs literal strings git-svn-id: svn+q:///qpdf/trunk@810 71b93d88-0707-0410-a8cf-f5a4172ac649 --- libqpdf/QPDF_String.cc | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'libqpdf') diff --git a/libqpdf/QPDF_String.cc b/libqpdf/QPDF_String.cc index 69c2ecb7..274b2375 100644 --- a/libqpdf/QPDF_String.cc +++ b/libqpdf/QPDF_String.cc @@ -9,6 +9,10 @@ #include // See above about ctype. +static bool is_ascii_printable(unsigned char ch) +{ + return ((ch >= 32) && (ch <= 126)); +} static bool is_iso_latin1_printable(unsigned char ch) { return (((ch >= 32) && (ch <= 126)) || (ch >= 160)); @@ -40,12 +44,13 @@ QPDF_String::unparse(bool force_binary) for (unsigned int i = 0; i < this->val.length(); ++i) { char ch = this->val[i]; - // Note: do not use locale to determine printability. The PDF - // specification accepts arbitrary binary data. Some locales - // imply multibyte characters. We'll consider something - // printable if it is printable in ISO-Latin-1. We'll code - // this manually rather than being rude and setting locale. - if ((ch == 0) || (! (is_iso_latin1_printable(ch) || + // Note: do not use locale to determine printability. The + // PDF specification accepts arbitrary binary data. Some + // locales imply multibyte characters. We'll consider + // something printable if it is printable in 7-bit ASCII. + // We'll code this manually rather than being rude and + // setting locale. + if ((ch == 0) || (! (is_ascii_printable(ch) || strchr("\n\r\t\b\f", ch)))) { ++nonprintable; @@ -64,10 +69,7 @@ QPDF_String::unparse(bool force_binary) } // Use hex notation if more than 20% of the characters are not - // printable in the current locale. Uniformly distributed random - // characters will not pass this test even with ISO-Latin-1 in - // which 76% are either printable or in the set of standard - // escaped characters. + // printable in plain ASCII. if (5 * nonprintable > val.length()) { use_hexstring = true; -- cgit v1.2.3-54-g00ecf