aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QUtil.cc
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2022-04-23 22:03:02 +0200
committerJay Berkenbilt <ejb@ql.org>2022-04-24 00:25:43 +0200
commit5bbb0d4c307bff58e9928a1c757438d033687ce3 (patch)
tree3b9baa1845f9eefc42748039c053dcfbe5f80e62 /libqpdf/QUtil.cc
parent37f05e67d87518a26ab7f50da8c39d9c9a0570db (diff)
downloadqpdf-5bbb0d4c307bff58e9928a1c757438d033687ce3.tar.zst
Replace switch statements with static map initializers
Character transcoding from Unicode to single-byte characters used hard-coded switch statements because the code predated our adoption of C++11. Now we have thread-safe, static initialization of map literals, so use that instead.
Diffstat (limited to 'libqpdf/QUtil.cc')
-rw-r--r--libqpdf/QUtil.cc636
1 files changed, 69 insertions, 567 deletions
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 51bd304c..a9e77777 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -16,6 +16,7 @@
#include <fstream>
#include <iomanip>
#include <locale>
+#include <map>
#include <memory>
#include <regex>
#include <set>
@@ -251,6 +252,59 @@ static unsigned short mac_roman_to_unicode[] = {
0x02c7, // 0xff
};
+static std::map<unsigned long, unsigned char> unicode_to_win_ansi = {
+ {0x20ac, 0x80}, {0x201a, 0x82}, {0x192, 0x83}, {0x201e, 0x84},
+ {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87}, {0x2c6, 0x88},
+ {0x2030, 0x89}, {0x160, 0x8a}, {0x2039, 0x8b}, {0x152, 0x8c},
+ {0x17d, 0x8e}, {0x2018, 0x91}, {0x2019, 0x92}, {0x201c, 0x93},
+ {0x201d, 0x94}, {0x2022, 0x95}, {0x2013, 0x96}, {0x2014, 0x97},
+ {0x303, 0x98}, {0x2122, 0x99}, {0x161, 0x9a}, {0x203a, 0x9b},
+ {0x153, 0x9c}, {0x17e, 0x9e}, {0x178, 0x9f}, {0xa0, 0xa0},
+};
+static std::map<unsigned long, unsigned char> unicode_to_mac_roman = {
+ {0xc4, 0x80}, {0xc5, 0x81}, {0xc7, 0x82}, {0xc9, 0x83},
+ {0xd1, 0x84}, {0xd6, 0x85}, {0xdc, 0x86}, {0xe1, 0x87},
+ {0xe0, 0x88}, {0xe2, 0x89}, {0xe4, 0x8a}, {0xe3, 0x8b},
+ {0xe5, 0x8c}, {0xe7, 0x8d}, {0xe9, 0x8e}, {0xe8, 0x8f},
+ {0xea, 0x90}, {0xeb, 0x91}, {0xed, 0x92}, {0xec, 0x93},
+ {0xee, 0x94}, {0xef, 0x95}, {0xf1, 0x96}, {0xf3, 0x97},
+ {0xf2, 0x98}, {0xf4, 0x99}, {0xf6, 0x9a}, {0xf5, 0x9b},
+ {0xfa, 0x9c}, {0xf9, 0x9d}, {0xfb, 0x9e}, {0xfc, 0x9f},
+ {0x2020, 0xa0}, {0xb0, 0xa1}, {0xa2, 0xa2}, {0xa3, 0xa3},
+ {0xa7, 0xa4}, {0x2022, 0xa5}, {0xb6, 0xa6}, {0xdf, 0xa7},
+ {0xae, 0xa8}, {0xa9, 0xa9}, {0x2122, 0xaa}, {0x301, 0xab},
+ {0x308, 0xac}, {0xc6, 0xae}, {0xd8, 0xaf}, {0xb1, 0xb1},
+ {0xa5, 0xb4}, {0x3bc, 0xb5}, {0x1d43, 0xbb}, {0x1d52, 0xbc},
+ {0xe6, 0xbe}, {0xf8, 0xbf}, {0xbf, 0xc0}, {0xa1, 0xc1},
+ {0xac, 0xc2}, {0x192, 0xc4}, {0xab, 0xc7}, {0xbb, 0xc8},
+ {0x2026, 0xc9}, {0xc0, 0xcb}, {0xc3, 0xcc}, {0xd5, 0xcd},
+ {0x152, 0xce}, {0x153, 0xcf}, {0x2013, 0xd0}, {0x2014, 0xd1},
+ {0x201c, 0xd2}, {0x201d, 0xd3}, {0x2018, 0xd4}, {0x2019, 0xd5},
+ {0xf7, 0xd6}, {0xff, 0xd8}, {0x178, 0xd9}, {0x2044, 0xda},
+ {0xa4, 0xdb}, {0x2039, 0xdc}, {0x203a, 0xdd}, {0xfb01, 0xde},
+ {0xfb02, 0xdf}, {0x2021, 0xe0}, {0xb7, 0xe1}, {0x201a, 0xe2},
+ {0x201e, 0xe3}, {0x2030, 0xe4}, {0xc2, 0xe5}, {0xca, 0xe6},
+ {0xc1, 0xe7}, {0xcb, 0xe8}, {0xc8, 0xe9}, {0xcd, 0xea},
+ {0xce, 0xeb}, {0xcf, 0xec}, {0xcc, 0xed}, {0xd3, 0xee},
+ {0xd4, 0xef}, {0xd2, 0xf1}, {0xda, 0xf2}, {0xdb, 0xf3},
+ {0xd9, 0xf4}, {0x131, 0xf5}, {0x2c6, 0xf6}, {0x303, 0xf7},
+ {0x304, 0xf8}, {0x306, 0xf9}, {0x307, 0xfa}, {0x30a, 0xfb},
+ {0x327, 0xfc}, {0x30b, 0xfd}, {0x328, 0xfe}, {0x2c7, 0xff},
+};
+static std::map<unsigned long, unsigned char> unicode_to_pdf_doc = {
+ {0x02d8, 0x18}, {0x02c7, 0x19}, {0x02c6, 0x1a}, {0x02d9, 0x1b},
+ {0x02dd, 0x1c}, {0x02db, 0x1d}, {0x02da, 0x1e}, {0x02dc, 0x1f},
+ {0x2022, 0x80}, {0x2020, 0x81}, {0x2021, 0x82}, {0x2026, 0x83},
+ {0x2014, 0x84}, {0x2013, 0x85}, {0x0192, 0x86}, {0x2044, 0x87},
+ {0x2039, 0x88}, {0x203a, 0x89}, {0x2212, 0x8a}, {0x2030, 0x8b},
+ {0x201e, 0x8c}, {0x201c, 0x8d}, {0x201d, 0x8e}, {0x2018, 0x8f},
+ {0x2019, 0x90}, {0x201a, 0x91}, {0x2122, 0x92}, {0xfb01, 0x93},
+ {0xfb02, 0x94}, {0x0141, 0x95}, {0x0152, 0x96}, {0x0160, 0x97},
+ {0x0178, 0x98}, {0x017d, 0x99}, {0x0131, 0x9a}, {0x0142, 0x9b},
+ {0x0153, 0x9c}, {0x0161, 0x9d}, {0x017e, 0x9e}, {0xfffd, 0x9f},
+ {0x20ac, 0xa0},
+};
+
namespace
{
class FileCloser
@@ -1447,583 +1501,31 @@ enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
static unsigned char
encode_winansi(unsigned long codepoint)
{
- // Use this ugly switch statement to avoid a static, which is not
- // thread-safe.
- unsigned char ch = '\0';
- switch (codepoint) {
- case 0x20ac:
- ch = 0x80;
- break;
- case 0x201a:
- ch = 0x82;
- break;
- case 0x192:
- ch = 0x83;
- break;
- case 0x201e:
- ch = 0x84;
- break;
- case 0x2026:
- ch = 0x85;
- break;
- case 0x2020:
- ch = 0x86;
- break;
- case 0x2021:
- ch = 0x87;
- break;
- case 0x2c6:
- ch = 0x88;
- break;
- case 0x2030:
- ch = 0x89;
- break;
- case 0x160:
- ch = 0x8a;
- break;
- case 0x2039:
- ch = 0x8b;
- break;
- case 0x152:
- ch = 0x8c;
- break;
- case 0x17d:
- ch = 0x8e;
- break;
- case 0x2018:
- ch = 0x91;
- break;
- case 0x2019:
- ch = 0x92;
- break;
- case 0x201c:
- ch = 0x93;
- break;
- case 0x201d:
- ch = 0x94;
- break;
- case 0x2022:
- ch = 0x95;
- break;
- case 0x2013:
- ch = 0x96;
- break;
- case 0x2014:
- ch = 0x97;
- break;
- case 0x303:
- ch = 0x98;
- break;
- case 0x2122:
- ch = 0x99;
- break;
- case 0x161:
- ch = 0x9a;
- break;
- case 0x203a:
- ch = 0x9b;
- break;
- case 0x153:
- ch = 0x9c;
- break;
- case 0x17e:
- ch = 0x9e;
- break;
- case 0x178:
- ch = 0x9f;
- break;
- case 0xa0:
- ch = 0xa0;
- break;
- default:
- break;
- }
- return ch;
+ auto i = unicode_to_win_ansi.find(codepoint);
+ if (i != unicode_to_win_ansi.end()) {
+ return i->second;
+ }
+ return '\0';
}
static unsigned char
encode_macroman(unsigned long codepoint)
{
- // Use this ugly switch statement to avoid a static, which is not
- // thread-safe.
- unsigned char ch = '\0';
- switch (codepoint) {
- case 0xc4:
- ch = 0x80;
- break;
- case 0xc5:
- ch = 0x81;
- break;
- case 0xc7:
- ch = 0x82;
- break;
- case 0xc9:
- ch = 0x83;
- break;
- case 0xd1:
- ch = 0x84;
- break;
- case 0xd6:
- ch = 0x85;
- break;
- case 0xdc:
- ch = 0x86;
- break;
- case 0xe1:
- ch = 0x87;
- break;
- case 0xe0:
- ch = 0x88;
- break;
- case 0xe2:
- ch = 0x89;
- break;
- case 0xe4:
- ch = 0x8a;
- break;
- case 0xe3:
- ch = 0x8b;
- break;
- case 0xe5:
- ch = 0x8c;
- break;
- case 0xe7:
- ch = 0x8d;
- break;
- case 0xe9:
- ch = 0x8e;
- break;
- case 0xe8:
- ch = 0x8f;
- break;
- case 0xea:
- ch = 0x90;
- break;
- case 0xeb:
- ch = 0x91;
- break;
- case 0xed:
- ch = 0x92;
- break;
- case 0xec:
- ch = 0x93;
- break;
- case 0xee:
- ch = 0x94;
- break;
- case 0xef:
- ch = 0x95;
- break;
- case 0xf1:
- ch = 0x96;
- break;
- case 0xf3:
- ch = 0x97;
- break;
- case 0xf2:
- ch = 0x98;
- break;
- case 0xf4:
- ch = 0x99;
- break;
- case 0xf6:
- ch = 0x9a;
- break;
- case 0xf5:
- ch = 0x9b;
- break;
- case 0xfa:
- ch = 0x9c;
- break;
- case 0xf9:
- ch = 0x9d;
- break;
- case 0xfb:
- ch = 0x9e;
- break;
- case 0xfc:
- ch = 0x9f;
- break;
- case 0x2020:
- ch = 0xa0;
- break;
- case 0xb0:
- ch = 0xa1;
- break;
- case 0xa2:
- ch = 0xa2;
- break;
- case 0xa3:
- ch = 0xa3;
- break;
- case 0xa7:
- ch = 0xa4;
- break;
- case 0x2022:
- ch = 0xa5;
- break;
- case 0xb6:
- ch = 0xa6;
- break;
- case 0xdf:
- ch = 0xa7;
- break;
- case 0xae:
- ch = 0xa8;
- break;
- case 0xa9:
- ch = 0xa9;
- break;
- case 0x2122:
- ch = 0xaa;
- break;
- case 0x301:
- ch = 0xab;
- break;
- case 0x308:
- ch = 0xac;
- break;
- case 0xc6:
- ch = 0xae;
- break;
- case 0xd8:
- ch = 0xaf;
- break;
- case 0xb1:
- ch = 0xb1;
- break;
- case 0xa5:
- ch = 0xb4;
- break;
- case 0x3bc:
- ch = 0xb5;
- break;
- case 0x1d43:
- ch = 0xbb;
- break;
- case 0x1d52:
- ch = 0xbc;
- break;
- case 0xe6:
- ch = 0xbe;
- break;
- case 0xf8:
- ch = 0xbf;
- break;
- case 0xbf:
- ch = 0xc0;
- break;
- case 0xa1:
- ch = 0xc1;
- break;
- case 0xac:
- ch = 0xc2;
- break;
- case 0x192:
- ch = 0xc4;
- break;
- case 0xab:
- ch = 0xc7;
- break;
- case 0xbb:
- ch = 0xc8;
- break;
- case 0x2026:
- ch = 0xc9;
- break;
- case 0xc0:
- ch = 0xcb;
- break;
- case 0xc3:
- ch = 0xcc;
- break;
- case 0xd5:
- ch = 0xcd;
- break;
- case 0x152:
- ch = 0xce;
- break;
- case 0x153:
- ch = 0xcf;
- break;
- case 0x2013:
- ch = 0xd0;
- break;
- case 0x2014:
- ch = 0xd1;
- break;
- case 0x201c:
- ch = 0xd2;
- break;
- case 0x201d:
- ch = 0xd3;
- break;
- case 0x2018:
- ch = 0xd4;
- break;
- case 0x2019:
- ch = 0xd5;
- break;
- case 0xf7:
- ch = 0xd6;
- break;
- case 0xff:
- ch = 0xd8;
- break;
- case 0x178:
- ch = 0xd9;
- break;
- case 0x2044:
- ch = 0xda;
- break;
- case 0xa4:
- ch = 0xdb;
- break;
- case 0x2039:
- ch = 0xdc;
- break;
- case 0x203a:
- ch = 0xdd;
- break;
- case 0xfb01:
- ch = 0xde;
- break;
- case 0xfb02:
- ch = 0xdf;
- break;
- case 0x2021:
- ch = 0xe0;
- break;
- case 0xb7:
- ch = 0xe1;
- break;
- case 0x201a:
- ch = 0xe2;
- break;
- case 0x201e:
- ch = 0xe3;
- break;
- case 0x2030:
- ch = 0xe4;
- break;
- case 0xc2:
- ch = 0xe5;
- break;
- case 0xca:
- ch = 0xe6;
- break;
- case 0xc1:
- ch = 0xe7;
- break;
- case 0xcb:
- ch = 0xe8;
- break;
- case 0xc8:
- ch = 0xe9;
- break;
- case 0xcd:
- ch = 0xea;
- break;
- case 0xce:
- ch = 0xeb;
- break;
- case 0xcf:
- ch = 0xec;
- break;
- case 0xcc:
- ch = 0xed;
- break;
- case 0xd3:
- ch = 0xee;
- break;
- case 0xd4:
- ch = 0xef;
- break;
- case 0xd2:
- ch = 0xf1;
- break;
- case 0xda:
- ch = 0xf2;
- break;
- case 0xdb:
- ch = 0xf3;
- break;
- case 0xd9:
- ch = 0xf4;
- break;
- case 0x131:
- ch = 0xf5;
- break;
- case 0x2c6:
- ch = 0xf6;
- break;
- case 0x303:
- ch = 0xf7;
- break;
- case 0x304:
- ch = 0xf8;
- break;
- case 0x306:
- ch = 0xf9;
- break;
- case 0x307:
- ch = 0xfa;
- break;
- case 0x30a:
- ch = 0xfb;
- break;
- case 0x327:
- ch = 0xfc;
- break;
- case 0x30b:
- ch = 0xfd;
- break;
- case 0x328:
- ch = 0xfe;
- break;
- case 0x2c7:
- ch = 0xff;
- break;
- default:
- break;
- }
- return ch;
+ auto i = unicode_to_mac_roman.find(codepoint);
+ if (i != unicode_to_mac_roman.end()) {
+ return i->second;
+ }
+ return '\0';
}
static unsigned char
encode_pdfdoc(unsigned long codepoint)
{
- // Use this ugly switch statement to avoid a static, which is not
- // thread-safe.
- unsigned char ch = '\0';
- switch (codepoint) {
- case 0x02d8:
- ch = 0x18;
- break;
- case 0x02c7:
- ch = 0x19;
- break;
- case 0x02c6:
- ch = 0x1a;
- break;
- case 0x02d9:
- ch = 0x1b;
- break;
- case 0x02dd:
- ch = 0x1c;
- break;
- case 0x02db:
- ch = 0x1d;
- break;
- case 0x02da:
- ch = 0x1e;
- break;
- case 0x02dc:
- ch = 0x1f;
- break;
- case 0x2022:
- ch = 0x80;
- break;
- case 0x2020:
- ch = 0x81;
- break;
- case 0x2021:
- ch = 0x82;
- break;
- case 0x2026:
- ch = 0x83;
- break;
- case 0x2014:
- ch = 0x84;
- break;
- case 0x2013:
- ch = 0x85;
- break;
- case 0x0192:
- ch = 0x86;
- break;
- case 0x2044:
- ch = 0x87;
- break;
- case 0x2039:
- ch = 0x88;
- break;
- case 0x203a:
- ch = 0x89;
- break;
- case 0x2212:
- ch = 0x8a;
- break;
- case 0x2030:
- ch = 0x8b;
- break;
- case 0x201e:
- ch = 0x8c;
- break;
- case 0x201c:
- ch = 0x8d;
- break;
- case 0x201d:
- ch = 0x8e;
- break;
- case 0x2018:
- ch = 0x8f;
- break;
- case 0x2019:
- ch = 0x90;
- break;
- case 0x201a:
- ch = 0x91;
- break;
- case 0x2122:
- ch = 0x92;
- break;
- case 0xfb01:
- ch = 0x93;
- break;
- case 0xfb02:
- ch = 0x94;
- break;
- case 0x0141:
- ch = 0x95;
- break;
- case 0x0152:
- ch = 0x96;
- break;
- case 0x0160:
- ch = 0x97;
- break;
- case 0x0178:
- ch = 0x98;
- break;
- case 0x017d:
- ch = 0x99;
- break;
- case 0x0131:
- ch = 0x9a;
- break;
- case 0x0142:
- ch = 0x9b;
- break;
- case 0x0153:
- ch = 0x9c;
- break;
- case 0x0161:
- ch = 0x9d;
- break;
- case 0x017e:
- ch = 0x9e;
- break;
- case 0xfffd:
- ch = 0x9f;
- break;
- case 0x20ac:
- ch = 0xa0;
- break;
- default:
- break;
- }
- return ch;
+ auto i = unicode_to_pdf_doc.find(codepoint);
+ if (i != unicode_to_pdf_doc.end()) {
+ return i->second;
+ }
+ return '\0';
}
unsigned long