aboutsummaryrefslogtreecommitdiffstats
path: root/libtests
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-02-15 14:29:29 +0100
committerJay Berkenbilt <ejb@ql.org>2022-02-15 14:32:38 +0100
commit1065bbb0165b4608bd715866332751be9213cd51 (patch)
tree1d6a5687ec36503843abd1629e7b42e376708042 /libtests
parent2b8d0f385b56d2a7307679ace4c50adbdbbddd03 (diff)
downloadqpdf-1065bbb0165b4608bd715866332751be9213cd51.tar.zst
Handle odd PDFDoc codepoints in UTF-8 during transcoding (fixes #650)
There are codepoints in PDFDoc that are not valid UTF-8 but map to valid UTF-8. We were handling those correctly with bidirectional mapping. However, if those same code points appeared in UTF-8, where they have no meaning, they were left as fixed points when converting to PDFDoc, where they do have meaning. This change recognizes them as errors.
Diffstat (limited to 'libtests')
-rw-r--r--libtests/qtest/qutil/qutil.out3
-rw-r--r--libtests/qutil.cc13
2 files changed, 12 insertions, 4 deletions
diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out
index aedf49e1..fa284237 100644
--- a/libtests/qtest/qutil/qutil.out
+++ b/libtests/qtest/qutil/qutil.out
@@ -88,7 +88,8 @@ alternatives
2: 83a9e99e
0: 717561636b
done alternatives
-w˘wˇwˆw˙w˝w˛w˚w˜w�w�w
+w˘wˇwˆw˙w˝w˛w˚w˜w�w�w�w
+w?w?w?w?w?w?w?w?w?w?w-w
done other characters
---- whoami
quack1
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index 2142346e..2e4d9cdd 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -418,9 +418,16 @@ void transcoding_test()
print_alternatives(utf8);
print_alternatives("quack");
std::cout << "done alternatives" << std::endl;
- std::string other = QUtil::pdf_doc_to_utf8(
- "w\030w\031w\032w\033w\034w\035w\036w\037w\177w\255w");
- std::cout << other << std::endl;
+ // These are characters are either valid in PDFDoc and invalid in
+ // UTF-8 or the other way around.
+ std::string other("w\x18w\x19w\x1aw\x1bw\x1cw\x1dw\x1ew\x1fw\x7fw");
+ std::string other_doc = other + "\x9fw\xadw";
+ std::cout << QUtil::pdf_doc_to_utf8(other_doc) << std::endl;
+ std::string other_utf8 =
+ other + QUtil::toUTF8(0x9f) + "w" + QUtil::toUTF8(0xad) + "w";
+ std::string other_to_utf8;
+ assert(! QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8));
+ std::cout << other_to_utf8 << std::endl;
std::cout << "done other characters" << std::endl;
}