aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--include/qpdf/QUtil.hh6
-rw-r--r--libqpdf/QUtil.cc23
-rw-r--r--libtests/qtest/qutil/qutil.out4
-rw-r--r--libtests/qutil.cc13
5 files changed, 52 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 0d9053e0..9eedd250 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2019-01-03 Jay Berkenbilt <ejb@ql.org>
+
+ * Add method QUtil::utf8_to_ascii, which returns an ASCII string
+ for a UTF-8 string, replacing out-of-range characters with a
+ specified substitute.
+
2019-01-02 Jay Berkenbilt <ejb@ql.org>
* Add method QPDFObjectHandle::getResourceNames that returns a set
diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh
index 96c0530b..5532149c 100644
--- a/include/qpdf/QUtil.hh
+++ b/include/qpdf/QUtil.hh
@@ -152,6 +152,12 @@ namespace QUtil
QPDF_DLL
std::string toUTF16(unsigned long uval);
+ // Convert a UTF-8 encoded string to ASCII by replacing all
+ // characters outside of ascii with the given unknown_char.
+ QPDF_DLL
+ std::string utf8_to_ascii(
+ std::string const& utf8, char unknown_char = '?');
+
// If secure random number generation is supported on your
// platform and qpdf was not compiled with insecure random number
// generation, this returns a cryptographically secure random
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 44ffec7f..7c2d9bc9 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max)
}
return result;
}
+
+std::string
+QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
+{
+ std::string ascii_value;
+ for (size_t i = 0; i < utf8.length(); ++i)
+ {
+ unsigned char ch = static_cast<unsigned char>(utf8.at(i));
+ if (ch < 128)
+ {
+ ascii_value.append(1, ch);
+ }
+ else if ((ch & 0xc0) == 0x80)
+ {
+ // Ignore subsequent byte of UTF-8 encoded character
+ }
+ else
+ {
+ ascii_value.append(1, unknown_char);
+ }
+ }
+ return ascii_value;
+}
diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out
index 8223bf5b..f47301e4 100644
--- a/libtests/qtest/qutil/qutil.out
+++ b/libtests/qtest/qutil/qutil.out
@@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0
0xdead -> ff fd
0x7fffffff -> ff fd
0x80000000 -> ff fd
+---- utf8_to_ascii
+Does π have fingers?
+Does ? have fingers?
+Does * have fingers?
---- whoami
quack1
quack2
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index 025f4e43..de51da58 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -220,6 +220,17 @@ void to_utf16_test()
print_utf16(0x80000000UL);
}
+void utf8_to_ascii_test()
+{
+ char const* input = "Does \317\200 have fingers?";
+ std::cout << input
+ << std::endl
+ << QUtil::utf8_to_ascii(input)
+ << std::endl
+ << QUtil::utf8_to_ascii(input, '*')
+ << std::endl;
+}
+
void print_whoami(char const* str)
{
PointerHolder<char> dup(true, QUtil::copy_string(str));
@@ -328,6 +339,8 @@ int main(int argc, char* argv[])
to_utf8_test();
std::cout << "---- utf16" << std::endl;
to_utf16_test();
+ std::cout << "---- utf8_to_ascii" << std::endl;
+ utf8_to_ascii_test();
std::cout << "---- whoami" << std::endl;
get_whoami_test();
std::cout << "---- file" << std::endl;