diff options
author | Jay Berkenbilt <ejb@ql.org> | 2022-01-11 21:06:17 +0100 |
---|---|---|
committer | Jay Berkenbilt <jberkenbilt@users.noreply.github.com> | 2022-01-11 21:55:19 +0100 |
commit | 370710657a7e7c771668107d1b6407fc350a2891 (patch) | |
tree | fe258f61196f155b67d3fcd193d0dd052c72a251 /libqpdf/QUtil.cc | |
parent | 77c31305fe1e9fde7ebf221fca94e7628cbf5a28 (diff) | |
download | qpdf-370710657a7e7c771668107d1b6407fc350a2891.tar.zst |
Add missing characters from PDF doc encoding (fixes #606)
Diffstat (limited to 'libqpdf/QUtil.cc')
-rw-r--r-- | libqpdf/QUtil.cc | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index daa663a3..c71e7923 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -37,8 +37,20 @@ # include <sys/stat.h> #endif -// First element is 128 +// First element is 24 +static unsigned short pdf_doc_low_to_unicode[] = { + 0x02d8, // 0x18 BREVE + 0x02c7, // 0x19 CARON + 0x02c6, // 0x1a MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02d9, // 0x1b DOT ABOVE + 0x02dd, // 0x1c DOUBLE ACUTE ACCENT + 0x02db, // 0x1d OGONEK + 0x02da, // 0x1e RING ABOVE + 0x02dc, // 0x1f SMALL TILDE +}; +// First element is 127 static unsigned short pdf_doc_to_unicode[] = { + 0xfffd, // 0x7f UNDEFINED 0x2022, // 0x80 BULLET 0x2020, // 0x81 DAGGER 0x2021, // 0x82 DOUBLE DAGGER @@ -2032,6 +2044,30 @@ encode_pdfdoc(unsigned long codepoint) unsigned char ch = '\0'; switch (codepoint) { + case 0x02d8: + ch = 0x18; + break; + case 0x02c7: + ch = 0x19; + break; + case 0x02c6: + ch = 0x1a; + break; + case 0x02d9: + ch = 0x1b; + break; + case 0x02dd: + ch = 0x1c; + break; + case 0x02db: + ch = 0x1d; + break; + case 0x02da: + ch = 0x1e; + break; + case 0x02dc: + ch = 0x1f; + break; case 0x2022: ch = 0x80; break; @@ -2427,9 +2463,13 @@ QUtil::pdf_doc_to_utf8(std::string const& val) { unsigned char ch = static_cast<unsigned char>(val.at(i)); unsigned short ch_short = ch; - if ((ch >= 128) && (ch <= 160)) + if ((ch >= 127) && (ch <= 160)) + { + ch_short = pdf_doc_to_unicode[ch - 127]; + } + else if ((ch >= 24) && (ch <= 31)) { - ch_short = pdf_doc_to_unicode[ch - 128]; + ch_short = pdf_doc_low_to_unicode[ch - 24]; } result += QUtil::toUTF8(ch_short); } |