diff options
author | Jay Berkenbilt <ejb@ql.org> | 2022-09-26 14:05:28 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2022-09-26 14:06:47 +0200 |
commit | f4ca04cec1a0c4a3c8341ff15f68c06bed89c0d7 (patch) | |
tree | 4699cc60ca8e4779db4635a7342f4ff9dfffceb1 /libtests/qutil.cc | |
parent | 4fb7d1335a4660bb8748773294f2dea979fcdbb7 (diff) | |
download | qpdf-f4ca04cec1a0c4a3c8341ff15f68c06bed89c0d7.tar.zst |
Fix edge case in character encoding (fixes #778)
Avoid representing as PDF Doc encoding any string whose PDF Doc
encoding representation starts with a UTF-16 or UTF-8 marker.
Diffstat (limited to 'libtests/qutil.cc')
-rw-r--r-- | libtests/qutil.cc | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/libtests/qutil.cc b/libtests/qutil.cc index 82c2dd1a..972046b9 100644 --- a/libtests/qutil.cc +++ b/libtests/qutil.cc @@ -436,6 +436,21 @@ transcoding_test() assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8)); std::cout << other_to_utf8 << std::endl; std::cout << "done other characters" << std::endl; + // These valid UTF8 strings when converted to PDFDoc would end up + // with a byte sequence that would be recognized as UTF-8 or + // UTF-16 rather than PDFDoc. A special case is required to store + // them as UTF-16 rather than PDFDoc. + static std::string fe_ff("\xc3\xbe\xc3\xbf potato"); + static std::string ff_fe("\xc3\xbf\xc3\xbe potato"); + static std::string ef_bb_bf("\xc3\xaf\xc2\xbb\xc2\xbf potato"); + assert(!QUtil::utf8_to_pdf_doc(fe_ff, pdfdoc)); + assert(pdfdoc == "?\xfe\xff potato"); + assert(!QUtil::utf8_to_pdf_doc(ff_fe, pdfdoc)); + assert(pdfdoc == "?\xff\xfe potato"); + assert(!QUtil::utf8_to_pdf_doc(ef_bb_bf, pdfdoc)); + assert(pdfdoc == "?\xef\xbb\xbf potato"); + assert(QUtil::utf8_to_pdf_doc("\xc3\xbe\xc3\xbe", pdfdoc)); + assert(QUtil::utf8_to_pdf_doc("\xc3\xaf\xc2\xbb\xc2\xbe", pdfdoc)); } void |