aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog3
-rw-r--r--libqpdf/QUtil.cc33
-rw-r--r--qpdf/qtest/qpdf.test14
-rw-r--r--qpdf/qtest/qpdf/check-unicode-filename-1.out6
-rw-r--r--qpdf/qtest/qpdf/check-unicode-filename-2.out6
5 files changed, 59 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index e62317b8..8514e379 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2019-04-20 Jay Berkenbilt <ejb@ql.org>
+ * Handle Unicode characters in filenames. The changes to support
+ Unicode on the CLI in Windows broke Unicode filenames. Fixes #298.
+
* Slightly tighten logic that determines whether an object is a
page. The previous logic was sometimes failing to preserve
annotations because they were passing the overly loose test for
diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc
index 7ea3f5e7..816c2dfa 100644
--- a/libqpdf/QUtil.cc
+++ b/libqpdf/QUtil.cc
@@ -354,11 +354,42 @@ FILE*
QUtil::safe_fopen(char const* filename, char const* mode)
{
FILE* f = 0;
+#ifdef _WIN32
+ // Convert the utf-8 encoded filename argument to wchar_t*. First,
+ // convert to utf16, then to wchar_t*. Note that u16 will start
+ // with the UTF16 marker, which we skip.
+ std::string u16 = utf8_to_utf16(filename);
+ size_t len = u16.length();
+ size_t wlen = (len / 2) - 1;
+ PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]);
+ wchar_t* wfilename = wfilenamep.getPointer();
+ wfilename[wlen] = 0;
+ for (unsigned int i = 2; i < len; i += 2)
+ {
+ wfilename[(i/2) - 1] =
+ static_cast<wchar_t>(
+ (static_cast<unsigned char>(u16.at(i)) << 8) +
+ static_cast<unsigned char>(u16.at(i+1)));
+ }
+ PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1));
+ wchar_t* wmode = wmodep.getPointer();
+ wmode[strlen(mode)] = 0;
+ for (size_t i = 0; i < strlen(mode); ++i)
+ {
+ wmode[i] = mode[i];
+ }
+
#ifdef _MSC_VER
- errno_t err = fopen_s(&f, filename, mode);
+ errno_t err = _wfopen_s(&f, wfilename, wmode);
if (err != 0)
{
errno = err;
+ }
+#else
+ f = _wfopen(wfilename, wmode);
+#endif
+ if (f == 0)
+ {
throw_system_error(std::string("open ") + filename);
}
#else
diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test
index 43219109..4dd0909f 100644
--- a/qpdf/qtest/qpdf.test
+++ b/qpdf/qtest/qpdf.test
@@ -135,7 +135,7 @@ foreach my $c (@completion_tests)
show_ntests();
# ----------
$td->notify("--- Argument Parsing ---");
-$n_tests += 6;
+$n_tests += 8;
$td->runtest("required argument",
{$td->COMMAND => "qpdf --password minimal.pdf"},
@@ -167,6 +167,16 @@ $td->runtest("extra overlay filename",
{$td->REGEXP => ".*overlay file already specified.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
+foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
+{
+ my ($u, $n) = @$d;
+ copy('minimal.pdf', "$u.pdf");
+ $td->runtest("unicode filename $u",
+ {$td->COMMAND => "qpdf --check $u.pdf"},
+ {$td->FILE => "check-unicode-filename-$n.out",
+ $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+}
show_ntests();
# ----------
@@ -4093,5 +4103,5 @@ sub get_md5_checksum
sub cleanup
{
system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
- system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf");
+ system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf");
}
diff --git a/qpdf/qtest/qpdf/check-unicode-filename-1.out b/qpdf/qtest/qpdf/check-unicode-filename-1.out
new file mode 100644
index 00000000..7ff8c445
--- /dev/null
+++ b/qpdf/qtest/qpdf/check-unicode-filename-1.out
@@ -0,0 +1,6 @@
+checking auto-ü.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect
diff --git a/qpdf/qtest/qpdf/check-unicode-filename-2.out b/qpdf/qtest/qpdf/check-unicode-filename-2.out
new file mode 100644
index 00000000..62c32cb5
--- /dev/null
+++ b/qpdf/qtest/qpdf/check-unicode-filename-2.out
@@ -0,0 +1,6 @@
+checking auto-öπ.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+No syntax or stream encoding errors found; the file may still contain
+errors that qpdf cannot detect