aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Berkenbilt <ejb@ql.org>2018-06-22 18:15:22 +0200
committerJay Berkenbilt <ejb@ql.org>2018-06-22 18:52:45 +0200
commit4ccc8b1a44109a913c87f31029c9c17db30ecc43 (patch)
treefb2259c3507896c252037aca9b14e2d93acb2259
parent32ddcec91e77b9f6a52480c3393ab17cbc105880 (diff)
downloadqpdf-4ccc8b1a44109a913c87f31029c9c17db30ecc43.tar.zst
Add ClosedFileInputSource
ClosedFileInputSource is an input source that keeps the file closed when not reading it.
-rw-r--r--ChangeLog5
-rw-r--r--include/qpdf/ClosedFileInputSource.hh83
-rw-r--r--libqpdf/ClosedFileInputSource.cc103
-rw-r--r--libqpdf/build.mk1
-rw-r--r--libtests/build.mk1
-rw-r--r--libtests/closed_file_input_source.cc62
-rw-r--r--libtests/qtest/closedfile.test16
-rw-r--r--libtests/qtest/closedfile/input5
-rw-r--r--libtests/qtest/closedfile/output3
9 files changed, 279 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index db9bee08..66966f7b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2018-06-22 Jay Berkenbilt <ejb@ql.org>
+ * Add ClosedFileInputSource class, and input source that keeps its
+ input file closed when not reading it. At the expense of some
+ performance, this allows you to operate on many files without
+ opening too many files at the operating system level.
+
* Add new option --preserved-unreferenced-resources, which
suppresses removal of unreferenced objects from page resource
dictionaries during page splitting operations.
diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh
new file mode 100644
index 00000000..349b938a
--- /dev/null
+++ b/include/qpdf/ClosedFileInputSource.hh
@@ -0,0 +1,83 @@
+// Copyright (c) 2005-2018 Jay Berkenbilt
+//
+// This file is part of qpdf.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Versions of qpdf prior to version 7 were released under the terms
+// of version 2.0 of the Artistic License. At your option, you may
+// continue to consider qpdf to be licensed under those terms. Please
+// see the manual for additional information.
+
+#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__
+#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__
+
+// This is an input source that reads from files, like
+// FileInputSource, except that it opens and close the file
+// surrounding every operation. This decreases effiency, but it allows
+// many more of these to exist at once than the maximum number of open
+// file descriptors. This is used for merging large numbers of files.
+
+#include <qpdf/InputSource.hh>
+#include <qpdf/PointerHolder.hh>
+
+class FileInputSource;
+
+class ClosedFileInputSource: public InputSource
+{
+ public:
+ QPDF_DLL
+ ClosedFileInputSource(char const* filename);
+ QPDF_DLL
+ virtual ~ClosedFileInputSource();
+ QPDF_DLL
+ virtual qpdf_offset_t findAndSkipNextEOL();
+ QPDF_DLL
+ virtual std::string const& getName() const;
+ QPDF_DLL
+ virtual qpdf_offset_t tell();
+ QPDF_DLL
+ virtual void seek(qpdf_offset_t offset, int whence);
+ QPDF_DLL
+ virtual void rewind();
+ QPDF_DLL
+ virtual size_t read(char* buffer, size_t length);
+ QPDF_DLL
+ virtual void unreadCh(char ch);
+
+ private:
+ ClosedFileInputSource(ClosedFileInputSource const&);
+ ClosedFileInputSource& operator=(ClosedFileInputSource const&);
+
+ void before();
+ void after();
+
+ class Members
+ {
+ friend class ClosedFileInputSource;
+
+ public:
+ QPDF_DLL
+ ~Members();
+
+ private:
+ Members(char const* filename);
+
+ std::string filename;
+ qpdf_offset_t offset;
+ FileInputSource* fis;
+ };
+ PointerHolder<Members> m;
+};
+
+#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__
diff --git a/libqpdf/ClosedFileInputSource.cc b/libqpdf/ClosedFileInputSource.cc
new file mode 100644
index 00000000..ea79a840
--- /dev/null
+++ b/libqpdf/ClosedFileInputSource.cc
@@ -0,0 +1,103 @@
+#include <qpdf/ClosedFileInputSource.hh>
+#include <qpdf/FileInputSource.hh>
+
+ClosedFileInputSource::Members::Members(char const* filename) :
+ filename(filename),
+ offset(0),
+ fis(0)
+{
+}
+
+ClosedFileInputSource::Members::~Members()
+{
+ if (fis)
+ {
+ delete fis;
+ }
+}
+
+ClosedFileInputSource::ClosedFileInputSource(char const* filename) :
+ m(new Members(filename))
+{
+}
+
+ClosedFileInputSource::~ClosedFileInputSource()
+{
+}
+
+void
+ClosedFileInputSource::before()
+{
+ if (0 == this->m->fis)
+ {
+ this->m->fis = new FileInputSource();
+ this->m->fis->setFilename(this->m->filename.c_str());
+ this->m->fis->seek(this->m->offset, SEEK_SET);
+ this->m->fis->setLastOffset(this->last_offset);
+ }
+}
+
+void
+ClosedFileInputSource::after()
+{
+ this->last_offset = this->m->fis->getLastOffset();
+ this->m->offset = this->m->fis->tell();
+ delete this->m->fis;
+ this->m->fis = 0;
+}
+
+qpdf_offset_t
+ClosedFileInputSource::findAndSkipNextEOL()
+{
+ before();
+ qpdf_offset_t r = this->m->fis->findAndSkipNextEOL();
+ after();
+ return r;
+}
+
+std::string const&
+ClosedFileInputSource::getName() const
+{
+ return this->m->filename;
+}
+
+qpdf_offset_t
+ClosedFileInputSource::tell()
+{
+ before();
+ qpdf_offset_t r = this->m->fis->tell();
+ after();
+ return r;
+}
+
+void
+ClosedFileInputSource::seek(qpdf_offset_t offset, int whence)
+{
+ before();
+ this->m->fis->seek(offset, whence);
+ after();
+}
+
+void
+ClosedFileInputSource::rewind()
+{
+ this->m->offset = 0;
+}
+
+size_t
+ClosedFileInputSource::read(char* buffer, size_t length)
+{
+ before();
+ size_t r = this->m->fis->read(buffer, length);
+ after();
+ return r;
+}
+
+void
+ClosedFileInputSource::unreadCh(char ch)
+{
+ before();
+ this->m->fis->unreadCh(ch);
+ // Don't call after -- the file has to stay open after this
+ // operation.
+}
diff --git a/libqpdf/build.mk b/libqpdf/build.mk
index 437c683e..528456f8 100644
--- a/libqpdf/build.mk
+++ b/libqpdf/build.mk
@@ -9,6 +9,7 @@ SRCS_libqpdf = \
libqpdf/BitWriter.cc \
libqpdf/Buffer.cc \
libqpdf/BufferInputSource.cc \
+ libqpdf/ClosedFileInputSource.cc \
libqpdf/ContentNormalizer.cc \
libqpdf/FileInputSource.cc \
libqpdf/InputSource.cc \
diff --git a/libtests/build.mk b/libtests/build.mk
index 0b895225..e8f20270 100644
--- a/libtests/build.mk
+++ b/libtests/build.mk
@@ -3,6 +3,7 @@ BINS_libtests = \
ascii85 \
bits \
buffer \
+ closed_file_input_source \
concatenate \
dct_compress \
dct_uncompress \
diff --git a/libtests/closed_file_input_source.cc b/libtests/closed_file_input_source.cc
new file mode 100644
index 00000000..9fd3eec7
--- /dev/null
+++ b/libtests/closed_file_input_source.cc
@@ -0,0 +1,62 @@
+#include <qpdf/ClosedFileInputSource.hh>
+#include <qpdf/FileInputSource.hh>
+
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <stdlib.h>
+
+void check(std::string const& what, bool result)
+{
+ if (! result)
+ {
+ std::cout << "FAIL: " << what << std::endl;
+ }
+}
+
+void do_tests(InputSource* is)
+{
+ check("get name", "input" == is->getName());
+ check("initial tell", 0 == is->tell());
+ is->seek(11, SEEK_SET);
+ check("tell after SEEK_SET", 11 == is->tell());
+ check("read offset 11", "Offset 11" == is->readLine(100));
+ check("last offset after read 11", 11 == is->getLastOffset());
+ check("tell after read", 21 == is->tell());
+ is->findAndSkipNextEOL();
+ check("tell after findAndSkipNextEOL", 522 == is->tell());
+ is->unreadCh('Q');
+ char b[1];
+ b[0] = '\0';
+ check("read unread character", 1 == is->read(b, 1));
+ check("last offset after read unread", 521 == is->getLastOffset());
+ check("got character", 'Q' == b[0]);
+ is->seek(0, SEEK_END);
+ check("tell at end", 556 == is->tell());
+ is->seek(-25, SEEK_END);
+ check("tell before end", 531 == is->tell());
+ check("last offset unchanged after seek", 521 == is->getLastOffset());
+ is->seek(-9, SEEK_CUR);
+ check("tell after SEEK_CUR", 522 == is->tell());
+ check("read offset 522", "9 before" == is->readLine(100));
+ check("last offset after read", 522 == is->getLastOffset());
+ is->rewind();
+ check("last offset unchanged after rewind", 522 == is->getLastOffset());
+ check("tell after rewind", 0 == is->tell());
+ check("read offset at beginning", "!00000000?" == is->readLine(100));
+ check("last offset after read 0", 0 == is->getLastOffset());
+}
+
+int main()
+{
+ // This test is designed to work with a specified input file.
+ std::cout << "testing with ClosedFileInputSource\n";
+ ClosedFileInputSource cf("input");
+ do_tests(&cf);
+ std::cout << "testing with FileInputSource\n";
+ FileInputSource f;
+ f.setFilename("input");
+ do_tests(&f);
+ std::cout << "all assertions passed" << std::endl;
+ return 0;
+}
diff --git a/libtests/qtest/closedfile.test b/libtests/qtest/closedfile.test
new file mode 100644
index 00000000..864f9c12
--- /dev/null
+++ b/libtests/qtest/closedfile.test
@@ -0,0 +1,16 @@
+#!/usr/bin/env perl
+require 5.008;
+use warnings;
+use strict;
+
+chdir("closedfile") or die "chdir testdir failed: $!\n";
+
+require TestDriver;
+
+my $td = new TestDriver('closed_file_input_source');
+
+$td->runtest("closed file input source",
+ {$td->COMMAND => "closed_file_input_source"},
+ {$td->FILE => "output", $td->EXIT_STATUS => 0},
+ $td->NORMALIZE_NEWLINES);
+$td->report(1);
diff --git a/libtests/qtest/closedfile/input b/libtests/qtest/closedfile/input
new file mode 100644
index 00000000..25dbd5c2
--- /dev/null
+++ b/libtests/qtest/closedfile/input
@@ -0,0 +1,5 @@
+!00000000?
+Offset 11
+wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
+9 before
+This is 25 from the end.
diff --git a/libtests/qtest/closedfile/output b/libtests/qtest/closedfile/output
new file mode 100644
index 00000000..7a5554af
--- /dev/null
+++ b/libtests/qtest/closedfile/output
@@ -0,0 +1,3 @@
+testing with ClosedFileInputSource
+testing with FileInputSource
+all assertions passed