aboutsummaryrefslogtreecommitdiffstats
path: root/libtests
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-09-26 14:05:28 +0200
committerJay Berkenbilt <ejb@ql.org>2022-09-26 14:06:47 +0200
commitf4ca04cec1a0c4a3c8341ff15f68c06bed89c0d7 (patch)
tree4699cc60ca8e4779db4635a7342f4ff9dfffceb1 /libtests
parent4fb7d1335a4660bb8748773294f2dea979fcdbb7 (diff)
downloadqpdf-f4ca04cec1a0c4a3c8341ff15f68c06bed89c0d7.tar.zst
Fix edge case in character encoding (fixes #778)
Avoid representing as PDF Doc encoding any string whose PDF Doc encoding representation starts with a UTF-16 or UTF-8 marker.
Diffstat (limited to 'libtests')
-rw-r--r--libtests/qutil.cc15
1 files changed, 15 insertions, 0 deletions
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index 82c2dd1a..972046b9 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -436,6 +436,21 @@ transcoding_test()
assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8));
std::cout << other_to_utf8 << std::endl;
std::cout << "done other characters" << std::endl;
+ // These valid UTF8 strings when converted to PDFDoc would end up
+ // with a byte sequence that would be recognized as UTF-8 or
+ // UTF-16 rather than PDFDoc. A special case is required to store
+ // them as UTF-16 rather than PDFDoc.
+ static std::string fe_ff("\xc3\xbe\xc3\xbf potato");
+ static std::string ff_fe("\xc3\xbf\xc3\xbe potato");
+ static std::string ef_bb_bf("\xc3\xaf\xc2\xbb\xc2\xbf potato");
+ assert(!QUtil::utf8_to_pdf_doc(fe_ff, pdfdoc));
+ assert(pdfdoc == "?\xfe\xff potato");
+ assert(!QUtil::utf8_to_pdf_doc(ff_fe, pdfdoc));
+ assert(pdfdoc == "?\xff\xfe potato");
+ assert(!QUtil::utf8_to_pdf_doc(ef_bb_bf, pdfdoc));
+ assert(pdfdoc == "?\xef\xbb\xbf potato");
+ assert(QUtil::utf8_to_pdf_doc("\xc3\xbe\xc3\xbe", pdfdoc));
+ assert(QUtil::utf8_to_pdf_doc("\xc3\xaf\xc2\xbb\xc2\xbe", pdfdoc));
}
void