aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--libqpdf/QUtil.cc46
-rw-r--r--libtests/qtest/qutil/qutil.out3
-rw-r--r--libtests/qutil.cc17
-rw-r--r--qpdf/qtest/qpdf/json-image-streams-all.out2
-rw-r--r--qpdf/qtest/qpdf/json-image-streams-small.out4
-rw-r--r--qpdf/qtest/qpdf/json-image-streams-specialized.out2
-rw-r--r--qpdf/qtest/qpdf/json-image-streams.out2
-rw-r--r--qpdf/qtest/qpdf/json-page-labels-num-tree.out4
-rw-r--r--qpdf/qtest/qpdf/page_api_2-json.out4
10 files changed, 72 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index 546c5658..2ef10600 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2022-01-11 Jay Berkenbilt <ejb@ql.org>
+
+ * Bug fix: add missing characters from PDF doc encoding.
+ Fixes #606.
+
2021-12-29 Jay Berkenbilt <ejb@ql.org>
* Add method QUtil::file_can_be_opened
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index daa663a3..c71e7923 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -37,8 +37,20 @@
# include <sys/stat.h>
#endif
-// First element is 128
+// First element is 24
+static unsigned short pdf_doc_low_to_unicode[] = {
+ 0x02d8, // 0x18 BREVE
+ 0x02c7, // 0x19 CARON
+ 0x02c6, // 0x1a MODIFIER LETTER CIRCUMFLEX ACCENT
+ 0x02d9, // 0x1b DOT ABOVE
+ 0x02dd, // 0x1c DOUBLE ACUTE ACCENT
+ 0x02db, // 0x1d OGONEK
+ 0x02da, // 0x1e RING ABOVE
+ 0x02dc, // 0x1f SMALL TILDE
+};
+// First element is 127
static unsigned short pdf_doc_to_unicode[] = {
+ 0xfffd, // 0x7f UNDEFINED
0x2022, // 0x80 BULLET
0x2020, // 0x81 DAGGER
0x2021, // 0x82 DOUBLE DAGGER
@@ -2032,6 +2044,30 @@ encode_pdfdoc(unsigned long codepoint)
unsigned char ch = '\0';
switch (codepoint)
{
+ case 0x02d8:
+ ch = 0x18;
+ break;
+ case 0x02c7:
+ ch = 0x19;
+ break;
+ case 0x02c6:
+ ch = 0x1a;
+ break;
+ case 0x02d9:
+ ch = 0x1b;
+ break;
+ case 0x02dd:
+ ch = 0x1c;
+ break;
+ case 0x02db:
+ ch = 0x1d;
+ break;
+ case 0x02da:
+ ch = 0x1e;
+ break;
+ case 0x02dc:
+ ch = 0x1f;
+ break;
case 0x2022:
ch = 0x80;
break;
@@ -2427,9 +2463,13 @@ QUtil::pdf_doc_to_utf8(std::string const& val)
{
unsigned char ch = static_cast<unsigned char>(val.at(i));
unsigned short ch_short = ch;
- if ((ch >= 128) && (ch <= 160))
+ if ((ch >= 127) && (ch <= 160))
+ {
+ ch_short = pdf_doc_to_unicode[ch - 127];
+ }
+ else if ((ch >= 24) && (ch <= 31))
{
- ch_short = pdf_doc_to_unicode[ch - 128];
+ ch_short = pdf_doc_low_to_unicode[ch - 24];
}
result += QUtil::toUTF8(ch_short);
}
diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out
index 90f1fd16..bcb89def 100644
--- a/libtests/qtest/qutil/qutil.out
+++ b/libtests/qtest/qutil/qutil.out
@@ -69,6 +69,7 @@ HAGOOGAMAGOOGLE: 0
<c0>Does * have fingers?
---- transcoding
bidirectional pdf doc done
+bidirectional pdf doc low done
bidirectional win ansi done
bidirectional mac roman done
analysis done
@@ -85,6 +86,8 @@ alternatives
2: 83a9e99e
0: 717561636b
done alternatives
+w˘wˇwˆw˙w˝w˛w˚w˜w�w
+done low characters
---- whoami
quack1
quack2
diff --git a/libtests/qutil.cc b/libtests/qutil.cc
index 46eb840c..cd2b7796 100644
--- a/libtests/qutil.cc
+++ b/libtests/qutil.cc
@@ -308,12 +308,12 @@ void utf8_to_ascii_test()
void transcoding_test(std::string (*to_utf8)(std::string const&),
std::string (*from_utf8)(std::string const&, char),
- int last, std::string unknown)
+ int first, int last, std::string unknown)
{
std::string in(" ");
std::string out;
std::string back;
- for (int i = 128; i <= last; ++i)
+ for (int i = first; i <= last; ++i)
{
in.at(0) = static_cast<char>(static_cast<unsigned char>(i));
out = (*to_utf8)(in);
@@ -355,13 +355,16 @@ void print_alternatives(std::string const& str)
void transcoding_test()
{
transcoding_test(&QUtil::pdf_doc_to_utf8,
- &QUtil::utf8_to_pdf_doc, 160, "\x9f");
+ &QUtil::utf8_to_pdf_doc, 127, 160, "\x9f");
std::cout << "bidirectional pdf doc done" << std::endl;
+ transcoding_test(&QUtil::pdf_doc_to_utf8,
+ &QUtil::utf8_to_pdf_doc, 24, 31, "?");
+ std::cout << "bidirectional pdf doc low done" << std::endl;
transcoding_test(&QUtil::win_ansi_to_utf8,
- &QUtil::utf8_to_win_ansi, 160, "?");
+ &QUtil::utf8_to_win_ansi, 128, 160, "?");
std::cout << "bidirectional win ansi done" << std::endl;
transcoding_test(&QUtil::mac_roman_to_utf8,
- &QUtil::utf8_to_mac_roman, 255, "?");
+ &QUtil::utf8_to_mac_roman, 128, 255, "?");
std::cout << "bidirectional mac roman done" << std::endl;
check_analyze("pi = \317\200", true, true, false);
check_analyze("pi != \317", true, false, false);
@@ -396,6 +399,10 @@ void transcoding_test()
print_alternatives(utf8);
print_alternatives("quack");
std::cout << "done alternatives" << std::endl;
+ std::string low = QUtil::pdf_doc_to_utf8(
+ "w\030w\031w\032w\033w\034w\035w\036w\037w\177w");
+ std::cout << low << std::endl;
+ std::cout << "done low characters" << std::endl;
}
void print_whoami(char const* str)
diff --git a/qpdf/qtest/qpdf/json-image-streams-all.out b/qpdf/qtest/qpdf/json-image-streams-all.out
index 3dea8852..fa5a211c 100644
--- a/qpdf/qtest/qpdf/json-image-streams-all.out
+++ b/qpdf/qtest/qpdf/json-image-streams-all.out
@@ -604,7 +604,7 @@
"trailer": {
"/ID": [
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
- "'+“‰¤V2«PP ç`m\"\u001d"
+ "'+“‰¤V2«PP ç`m\"˛"
],
"/Root": "1 0 R",
"/Size": 31
diff --git a/qpdf/qtest/qpdf/json-image-streams-small.out b/qpdf/qtest/qpdf/json-image-streams-small.out
index 92d0c4f3..be7aaabb 100644
--- a/qpdf/qtest/qpdf/json-image-streams-small.out
+++ b/qpdf/qtest/qpdf/json-image-streams-small.out
@@ -615,8 +615,8 @@
},
"trailer": {
"/ID": [
- "Z§¯•Py»’~’46\u001dı\u0011¢",
- "Z§¯•Py»’~’46\u001dı\u0011¢"
+ "Z§¯•Py»’~’46˛ı\u0011¢",
+ "Z§¯•Py»’~’46˛ı\u0011¢"
],
"/Root": "1 0 R",
"/Size": 31
diff --git a/qpdf/qtest/qpdf/json-image-streams-specialized.out b/qpdf/qtest/qpdf/json-image-streams-specialized.out
index c342f9e6..50a1fc0d 100644
--- a/qpdf/qtest/qpdf/json-image-streams-specialized.out
+++ b/qpdf/qtest/qpdf/json-image-streams-specialized.out
@@ -604,7 +604,7 @@
"trailer": {
"/ID": [
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
- "'+“‰¤V2«PP ç`m\"\u001d"
+ "'+“‰¤V2«PP ç`m\"˛"
],
"/Root": "1 0 R",
"/Size": 31
diff --git a/qpdf/qtest/qpdf/json-image-streams.out b/qpdf/qtest/qpdf/json-image-streams.out
index 2cfbd531..ac8ca2b9 100644
--- a/qpdf/qtest/qpdf/json-image-streams.out
+++ b/qpdf/qtest/qpdf/json-image-streams.out
@@ -604,7 +604,7 @@
"trailer": {
"/ID": [
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
- "'+“‰¤V2«PP ç`m\"\u001d"
+ "'+“‰¤V2«PP ç`m\"˛"
],
"/Root": "1 0 R",
"/Size": 31
diff --git a/qpdf/qtest/qpdf/json-page-labels-num-tree.out b/qpdf/qtest/qpdf/json-page-labels-num-tree.out
index d0f73a61..cc474335 100644
--- a/qpdf/qtest/qpdf/json-page-labels-num-tree.out
+++ b/qpdf/qtest/qpdf/json-page-labels-num-tree.out
@@ -1518,8 +1518,8 @@
"99 0 R": 47,
"trailer": {
"/ID": [
- "’ù\u0019Þxtó¼\\·¯½\u001eŁ7»",
- "\rþ\u0018©LÞ\u000fKýÈl\u0003¯\u0019\u0001\u000e"
+ "’ùˇÞxtó¼\\·¯½˚Ł7»",
+ "\rþ˘©LÞ\u000fKýÈl\u0003¯ˇ\u0001\u000e"
],
"/Root": "1 0 R",
"/Size": 100
diff --git a/qpdf/qtest/qpdf/page_api_2-json.out b/qpdf/qtest/qpdf/page_api_2-json.out
index 172ce1c1..bef00d02 100644
--- a/qpdf/qtest/qpdf/page_api_2-json.out
+++ b/qpdf/qtest/qpdf/page_api_2-json.out
@@ -178,8 +178,8 @@
},
"trailer": {
"/ID": [
- "û\u0018·ƒÿ{5⁄\u0005Ú−S*º‘o",
- "÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý\u001f\u0002"
+ "û˘·ƒÿ{5⁄\u0005Ú−S*º‘o",
+ "÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý˜\u0002"
],
"/Info": "2 0 R",
"/Root": "1 0 R",