aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QUtil.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-01-11 21:06:17 +0100
committerJay Berkenbilt <jberkenbilt@users.noreply.github.com>2022-01-11 21:55:19 +0100
commit370710657a7e7c771668107d1b6407fc350a2891 (patch)
treefe258f61196f155b67d3fcd193d0dd052c72a251 /libqpdf/QUtil.cc
parent77c31305fe1e9fde7ebf221fca94e7628cbf5a28 (diff)
downloadqpdf-370710657a7e7c771668107d1b6407fc350a2891.tar.zst
Add missing characters from PDF doc encoding (fixes #606)
Diffstat (limited to 'libqpdf/QUtil.cc')
-rw-r--r--libqpdf/QUtil.cc46
1 files changed, 43 insertions, 3 deletions
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index daa663a3..c71e7923 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -37,8 +37,20 @@
# include <sys/stat.h>
#endif
-// First element is 128
+// First element is 24
+static unsigned short pdf_doc_low_to_unicode[] = {
+ 0x02d8, // 0x18 BREVE
+ 0x02c7, // 0x19 CARON
+ 0x02c6, // 0x1a MODIFIER LETTER CIRCUMFLEX ACCENT
+ 0x02d9, // 0x1b DOT ABOVE
+ 0x02dd, // 0x1c DOUBLE ACUTE ACCENT
+ 0x02db, // 0x1d OGONEK
+ 0x02da, // 0x1e RING ABOVE
+ 0x02dc, // 0x1f SMALL TILDE
+};
+// First element is 127
static unsigned short pdf_doc_to_unicode[] = {
+ 0xfffd, // 0x7f UNDEFINED
0x2022, // 0x80 BULLET
0x2020, // 0x81 DAGGER
0x2021, // 0x82 DOUBLE DAGGER
@@ -2032,6 +2044,30 @@ encode_pdfdoc(unsigned long codepoint)
unsigned char ch = '\0';
switch (codepoint)
{
+ case 0x02d8:
+ ch = 0x18;
+ break;
+ case 0x02c7:
+ ch = 0x19;
+ break;
+ case 0x02c6:
+ ch = 0x1a;
+ break;
+ case 0x02d9:
+ ch = 0x1b;
+ break;
+ case 0x02dd:
+ ch = 0x1c;
+ break;
+ case 0x02db:
+ ch = 0x1d;
+ break;
+ case 0x02da:
+ ch = 0x1e;
+ break;
+ case 0x02dc:
+ ch = 0x1f;
+ break;
case 0x2022:
ch = 0x80;
break;
@@ -2427,9 +2463,13 @@ QUtil::pdf_doc_to_utf8(std::string const& val)
{
unsigned char ch = static_cast<unsigned char>(val.at(i));
unsigned short ch_short = ch;
- if ((ch >= 128) && (ch <= 160))
+ if ((ch >= 127) && (ch <= 160))
+ {
+ ch_short = pdf_doc_to_unicode[ch - 127];
+ }
+ else if ((ch >= 24) && (ch <= 31))
{
- ch_short = pdf_doc_to_unicode[ch - 128];
+ ch_short = pdf_doc_low_to_unicode[ch - 24];
}
result += QUtil::toUTF8(ch_short);
}