aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDF.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf/QPDF.cc')
-rw-r--r--libqpdf/QPDF.cc403
1 files changed, 349 insertions, 54 deletions
diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc
index 308b3dd2..e9d0b77a 100644
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@@ -18,8 +18,9 @@
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDF_Null.hh>
#include <qpdf/QPDF_Dictionary.hh>
+#include <qpdf/QPDF_Stream.hh>
-std::string QPDF::qpdf_version = "8.1.0";
+std::string QPDF::qpdf_version = "8.4.0";
static char const* EMPTY_PDF =
"%PDF-1.3\n"
@@ -39,13 +40,50 @@ static char const* EMPTY_PDF =
"110\n"
"%%EOF\n";
+QPDF::ForeignStreamData::ForeignStreamData(
+ PointerHolder<EncryptionParameters> encp,
+ PointerHolder<InputSource> file,
+ int foreign_objid,
+ int foreign_generation,
+ qpdf_offset_t offset,
+ size_t length,
+ bool is_attachment_stream,
+ QPDFObjectHandle local_dict)
+ :
+ encp(encp),
+ file(file),
+ foreign_objid(foreign_objid),
+ foreign_generation(foreign_generation),
+ offset(offset),
+ length(length),
+ is_attachment_stream(is_attachment_stream),
+ local_dict(local_dict)
+{
+}
+
+QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(
+ QPDF& destination_qpdf) :
+ destination_qpdf(destination_qpdf)
+{
+}
+
void
QPDF::CopiedStreamDataProvider::provideStreamData(
int objid, int generation, Pipeline* pipeline)
{
- QPDFObjectHandle foreign_stream =
- this->foreign_streams[QPDFObjGen(objid, generation)];
- foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none);
+ PointerHolder<ForeignStreamData> foreign_data =
+ this->foreign_stream_data[QPDFObjGen(objid, generation)];
+ if (foreign_data.getPointer())
+ {
+ destination_qpdf.pipeForeignStreamData(
+ foreign_data, pipeline, 0, qpdf_dl_none);
+ }
+ else
+ {
+ QPDFObjectHandle foreign_stream =
+ this->foreign_streams[QPDFObjGen(objid, generation)];
+ foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none);
+ }
}
void
@@ -55,6 +93,14 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
this->foreign_streams[local_og] = foreign_stream;
}
+void
+QPDF::CopiedStreamDataProvider::registerForeignStream(
+ QPDFObjGen const& local_og,
+ PointerHolder<ForeignStreamData> foreign_stream)
+{
+ this->foreign_stream_data[local_og] = foreign_stream;
+}
+
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
qpdf(qpdf),
objid(objid),
@@ -74,15 +120,9 @@ QPDF::QPDFVersion()
return QPDF::qpdf_version;
}
-QPDF::Members::Members() :
- provided_password_is_hex_key(false),
+QPDF::EncryptionParameters::EncryptionParameters() :
encrypted(false),
encryption_initialized(false),
- ignore_xref_streams(false),
- suppress_warnings(false),
- out_stream(&std::cout),
- err_stream(&std::cerr),
- attempt_recovery(true),
encryption_V(0),
encryption_R(0),
encrypt_metadata(true),
@@ -90,10 +130,24 @@ QPDF::Members::Members() :
cf_string(e_none),
cf_file(e_none),
cached_key_objid(0),
- cached_key_generation(0),
+ cached_key_generation(0)
+{
+}
+
+QPDF::Members::Members() :
+ unique_id(0),
+ provided_password_is_hex_key(false),
+ ignore_xref_streams(false),
+ suppress_warnings(false),
+ out_stream(&std::cout),
+ err_stream(&std::cerr),
+ attempt_recovery(true),
+ encp(new EncryptionParameters),
pushed_inherited_attributes_to_pages(false),
copied_stream_data_provider(0),
reconstructed_xref(false),
+ fixed_dangling_refs(false),
+ immediate_copy_from(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@@ -107,6 +161,12 @@ QPDF::QPDF() :
m(new Members())
{
m->tokenizer.allowEOF();
+ // Generate a unique ID. It just has to be unique among all QPDF
+ // objects allocated throughout the lifetime of this running
+ // application.
+ m->unique_id = static_cast<unsigned long>(QUtil::get_current_time());
+ m->unique_id <<= 32;
+ m->unique_id |= static_cast<unsigned long>(QUtil::random());
}
QPDF::~QPDF()
@@ -210,6 +270,12 @@ QPDF::setAttemptRecovery(bool val)
this->m->attempt_recovery = val;
}
+void
+QPDF::setImmediateCopyFrom(bool val)
+{
+ this->m->immediate_copy_from = val;
+}
+
std::vector<QPDFExc>
QPDF::getWarnings()
{
@@ -292,7 +358,7 @@ QPDF::parse(char const* password)
{
if (password)
{
- this->m->provided_password = password;
+ this->m->encp->provided_password = password;
}
// Find the header anywhere in the first 1024 bytes of the file.
@@ -1218,29 +1284,129 @@ QPDF::showXRefTable()
}
}
+void
+QPDF::fixDanglingReferences(bool force)
+{
+ if (this->m->fixed_dangling_refs && (! force))
+ {
+ return;
+ }
+ this->m->fixed_dangling_refs = true;
+
+ // Create a set of all known indirect objects including those
+ // we've previously resolved and those that we have created.
+ std::set<QPDFObjGen> to_process;
+ for (std::map<QPDFObjGen, ObjCache>::iterator iter =
+ this->m->obj_cache.begin();
+ iter != this->m->obj_cache.end(); ++iter)
+ {
+ to_process.insert((*iter).first);
+ }
+ for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
+ this->m->xref_table.begin();
+ iter != this->m->xref_table.end(); ++iter)
+ {
+ to_process.insert((*iter).first);
+ }
+
+ // For each non-scalar item to process, put it in the queue.
+ std::list<QPDFObjectHandle> queue;
+ queue.push_back(this->m->trailer);
+ for (std::set<QPDFObjGen>::iterator iter = to_process.begin();
+ iter != to_process.end(); ++iter)
+ {
+ QPDFObjectHandle obj = QPDFObjectHandle::Factory::newIndirect(
+ this, (*iter).getObj(), (*iter).getGen());
+ if (obj.isDictionary() || obj.isArray())
+ {
+ queue.push_back(obj);
+ }
+ else if (obj.isStream())
+ {
+ queue.push_back(obj.getDict());
+ }
+ }
+
+ // Process the queue by recursively resolving all object
+ // references. We don't need to do loop detection because we don't
+ // traverse known indirect objects when processing the queue.
+ while (! queue.empty())
+ {
+ QPDFObjectHandle obj = queue.front();
+ queue.pop_front();
+ std::list<QPDFObjectHandle> to_check;
+ if (obj.isDictionary())
+ {
+ std::map<std::string, QPDFObjectHandle> members =
+ obj.getDictAsMap();
+ for (std::map<std::string, QPDFObjectHandle>::iterator iter =
+ members.begin();
+ iter != members.end(); ++iter)
+ {
+ to_check.push_back((*iter).second);
+ }
+ }
+ else if (obj.isArray())
+ {
+ std::vector<QPDFObjectHandle> elements = obj.getArrayAsVector();
+ for (std::vector<QPDFObjectHandle>::iterator iter =
+ elements.begin();
+ iter != elements.end(); ++iter)
+ {
+ to_check.push_back(*iter);
+ }
+ }
+ for (std::list<QPDFObjectHandle>::iterator iter = to_check.begin();
+ iter != to_check.end(); ++iter)
+ {
+ QPDFObjectHandle sub = *iter;
+ if (sub.isIndirect())
+ {
+ if (sub.getOwningQPDF() == this)
+ {
+ QPDFObjGen og(sub.getObjGen());
+ if (this->m->obj_cache.count(og) == 0)
+ {
+ QTC::TC("qpdf", "QPDF detected dangling ref");
+ queue.push_back(sub);
+ }
+ }
+ }
+ else
+ {
+ queue.push_back(sub);
+ }
+ }
+
+ }
+}
+
size_t
QPDF::getObjectCount()
{
// This method returns the next available indirect object number.
- // makeIndirectObject uses it for this purpose.
- QPDFObjGen o1(0, 0);
+ // makeIndirectObject uses it for this purpose. After
+ // fixDanglingReferences is called, all objects in the xref table
+ // will also be in obj_cache.
+ fixDanglingReferences();
+ QPDFObjGen og(0, 0);
if (! this->m->obj_cache.empty())
{
- o1 = (*(this->m->obj_cache.rbegin())).first;
+ og = (*(this->m->obj_cache.rbegin())).first;
}
- QPDFObjGen o2 = (*(this->m->xref_table.rbegin())).first;
- QTC::TC("qpdf", "QPDF indirect last obj from xref",
- (o2.getObj() > o1.getObj()) ? 1 : 0);
- return std::max(o1.getObj(), o2.getObj());
+ return og.getObj();
}
std::vector<QPDFObjectHandle>
QPDF::getAllObjects()
{
+ // After fixDanglingReferences is called, all objects are in the
+ // object cache.
+ fixDanglingReferences(true);
std::vector<QPDFObjectHandle> result;
- for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
- this->m->xref_table.begin();
- iter != this->m->xref_table.end(); ++iter)
+ for (std::map<QPDFObjGen, ObjCache>::iterator iter =
+ this->m->obj_cache.begin();
+ iter != this->m->obj_cache.end(); ++iter)
{
QPDFObjGen const& og = (*iter).first;
@@ -1282,7 +1448,7 @@ QPDF::readObject(PointerHolder<InputSource> input,
bool empty = false;
PointerHolder<StringDecrypter> decrypter_ph;
StringDecrypter* decrypter = 0;
- if (this->m->encrypted && (! in_object_stream))
+ if (this->m->encp->encrypted && (! in_object_stream))
{
decrypter_ph = new StringDecrypter(this, objid, generation);
decrypter = decrypter_ph.getPointer();
@@ -1748,7 +1914,6 @@ QPDF::resolve(int objid, int generation)
}
ResolveRecorder rr(this, og);
- // PDF spec says unknown objects resolve to the null object.
if ((! this->m->obj_cache.count(og)) && this->m->xref_table.count(og))
{
QPDFXRefEntry const& entry = this->m->xref_table[og];
@@ -1796,6 +1961,7 @@ QPDF::resolve(int objid, int generation)
}
if (this->m->obj_cache.count(og) == 0)
{
+ // PDF spec says unknown objects resolve to the null object.
QTC::TC("qpdf", "QPDF resolve failure to null");
QPDFObjectHandle oh = QPDFObjectHandle::newNull();
this->m->obj_cache[og] =
@@ -1975,14 +2141,18 @@ QPDF::replaceReserved(QPDFObjectHandle reserved,
}
QPDFObjectHandle
-QPDF::copyForeignObject(QPDFObjectHandle foreign)
+QPDF::copyForeignObject(QPDFObjectHandle foreign, bool)
{
- return copyForeignObject(foreign, false);
+ // This method will be removed next time the ABI is changed.
+ return copyForeignObject(foreign);
}
QPDFObjectHandle
-QPDF::copyForeignObject(QPDFObjectHandle foreign, bool allow_page)
+QPDF::copyForeignObject(QPDFObjectHandle foreign)
{
+ // Do not preclude use of copyForeignObject on page objects. It is
+ // a documented use case to copy pages this way if the intention
+ // is to not update the pages tree.
if (! foreign.isIndirect())
{
QTC::TC("qpdf", "QPDF copyForeign direct");
@@ -1997,7 +2167,7 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign, bool allow_page)
"QPDF::copyForeign called with object from this QPDF");
}
- ObjCopier& obj_copier = this->m->object_copiers[other];
+ ObjCopier& obj_copier = this->m->object_copiers[other->m->unique_id];
if (! obj_copier.visiting.empty())
{
throw std::logic_error("obj_copier.visiting is not empty"
@@ -2194,15 +2364,80 @@ QPDF::replaceForeignIndirectObjects(
if (this->m->copied_stream_data_provider == 0)
{
this->m->copied_stream_data_provider =
- new CopiedStreamDataProvider();
+ new CopiedStreamDataProvider(*this);
this->m->copied_streams = this->m->copied_stream_data_provider;
}
QPDFObjGen local_og(result.getObjGen());
- this->m->copied_stream_data_provider->registerForeignStream(
- local_og, foreign);
- result.replaceStreamData(this->m->copied_streams,
- dict.getKey("/Filter"),
- dict.getKey("/DecodeParms"));
+ // Copy information from the foreign stream so we can pipe its
+ // data later without keeping the original QPDF object around.
+ QPDF* foreign_stream_qpdf = foreign.getOwningQPDF();
+ if (! foreign_stream_qpdf)
+ {
+ throw std::logic_error("unable to retrieve owning qpdf"
+ " from foreign stream");
+ }
+ QPDF_Stream* stream =
+ dynamic_cast<QPDF_Stream*>(
+ QPDFObjectHandle::ObjAccessor::getObject(
+ foreign).getPointer());
+ if (! stream)
+ {
+ throw std::logic_error("unable to retrieve underlying"
+ " stream object from foreign stream");
+ }
+ PointerHolder<Buffer> stream_buffer =
+ stream->getStreamDataBuffer();
+ if ((foreign_stream_qpdf->m->immediate_copy_from) &&
+ (stream_buffer.getPointer() == 0))
+ {
+ // Pull the stream data into a buffer before attempting
+ // the copy operation. Do it on the source stream so that
+ // if the source stream is copied multiple times, we don't
+ // have to keep duplicating the memory.
+ QTC::TC("qpdf", "QPDF immediate copy stream data");
+ foreign.replaceStreamData(foreign.getRawStreamData(),
+ dict.getKey("/Filter"),
+ dict.getKey("/DecodeParms"));
+ stream_buffer = stream->getStreamDataBuffer();
+ }
+ PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider =
+ stream->getStreamDataProvider();
+ if (stream_buffer.getPointer())
+ {
+ QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
+ result.replaceStreamData(stream_buffer,
+ dict.getKey("/Filter"),
+ dict.getKey("/DecodeParms"));
+ }
+ else if (stream_provider.getPointer())
+ {
+ // In this case, the remote stream's QPDF must stay in scope.
+ QTC::TC("qpdf", "QPDF copy foreign stream with provider");
+ this->m->copied_stream_data_provider->registerForeignStream(
+ local_og, foreign);
+ result.replaceStreamData(this->m->copied_streams,
+ dict.getKey("/Filter"),
+ dict.getKey("/DecodeParms"));
+ }
+ else
+ {
+ PointerHolder<ForeignStreamData> foreign_stream_data =
+ new ForeignStreamData(
+ foreign_stream_qpdf->m->encp,
+ foreign_stream_qpdf->m->file,
+ foreign.getObjectID(),
+ foreign.getGeneration(),
+ stream->getOffset(),
+ stream->getLength(),
+ (foreign_stream_qpdf->m->attachment_streams.count(
+ foreign.getObjGen()) > 0),
+ dict);
+ this->m->copied_stream_data_provider->registerForeignStream(
+ local_og, foreign_stream_data);
+ result.replaceStreamData(this->m->copied_streams,
+ dict.getKey("/Filter"),
+ dict.getKey("/DecodeParms"));
+ }
}
else
{
@@ -2239,6 +2474,12 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
this->m->obj_cache[og2] = t;
}
+unsigned long long
+QPDF::getUniqueId() const
+{
+ return this->m->unique_id;
+}
+
std::string
QPDF::getFilename() const
{
@@ -2399,34 +2640,40 @@ QPDF::getCompressibleObjGens()
}
bool
-QPDF::pipeStreamData(int objid, int generation,
+QPDF::pipeStreamData(PointerHolder<EncryptionParameters> encp,
+ PointerHolder<InputSource> file,
+ QPDF& qpdf_for_warning,
+ int objid, int generation,
qpdf_offset_t offset, size_t length,
QPDFObjectHandle stream_dict,
+ bool is_attachment_stream,
Pipeline* pipeline,
bool suppress_warnings,
bool will_retry)
{
- bool success = false;
std::vector<PointerHolder<Pipeline> > to_delete;
- if (this->m->encrypted)
+ if (encp->encrypted)
{
- decryptStream(pipeline, objid, generation, stream_dict, to_delete);
+ decryptStream(encp, file, qpdf_for_warning,
+ pipeline, objid, generation,
+ stream_dict, is_attachment_stream, to_delete);
}
+ bool success = false;
try
{
- this->m->file->seek(offset, SEEK_SET);
+ file->seek(offset, SEEK_SET);
char buf[10240];
while (length > 0)
{
size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
- size_t len = this->m->file->read(buf, to_read);
+ size_t len = file->read(buf, to_read);
if (len == 0)
{
throw QPDFExc(qpdf_e_damaged_pdf,
- this->m->file->getName(),
- this->m->last_object_description,
- this->m->file->getLastOffset(),
+ file->getName(),
+ "",
+ file->getLastOffset(),
"unexpected EOF reading stream data");
}
length -= len;
@@ -2439,7 +2686,7 @@ QPDF::pipeStreamData(int objid, int generation,
{
if (! suppress_warnings)
{
- warn(e);
+ qpdf_for_warning.warn(e);
}
}
catch (std::exception& e)
@@ -2447,17 +2694,19 @@ QPDF::pipeStreamData(int objid, int generation,
if (! suppress_warnings)
{
QTC::TC("qpdf", "QPDF decoding error warning");
- warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
- "", this->m->file->getLastOffset(),
- "error decoding stream data for object " +
- QUtil::int_to_string(objid) + " " +
- QUtil::int_to_string(generation) + ": " + e.what()));
+ qpdf_for_warning.warn(
+ QPDFExc(qpdf_e_damaged_pdf, file->getName(),
+ "", file->getLastOffset(),
+ "error decoding stream data for object " +
+ QUtil::int_to_string(objid) + " " +
+ QUtil::int_to_string(generation) + ": " + e.what()));
if (will_retry)
{
- warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
- "", this->m->file->getLastOffset(),
- "stream will be re-processed without"
- " filtering to avoid data loss"));
+ qpdf_for_warning.warn(
+ QPDFExc(qpdf_e_damaged_pdf, file->getName(),
+ "", file->getLastOffset(),
+ "stream will be re-processed without"
+ " filtering to avoid data loss"));
}
}
}
@@ -2475,6 +2724,42 @@ QPDF::pipeStreamData(int objid, int generation,
return success;
}
+bool
+QPDF::pipeStreamData(int objid, int generation,
+ qpdf_offset_t offset, size_t length,
+ QPDFObjectHandle stream_dict,
+ Pipeline* pipeline,
+ bool suppress_warnings,
+ bool will_retry)
+{
+ bool is_attachment_stream = this->m->attachment_streams.count(
+ QPDFObjGen(objid, generation));
+ return pipeStreamData(
+ this->m->encp, this->m->file, *this,
+ objid, generation, offset, length,
+ stream_dict, is_attachment_stream,
+ pipeline, suppress_warnings, will_retry);
+}
+
+bool
+QPDF::pipeForeignStreamData(
+ PointerHolder<ForeignStreamData> foreign,
+ Pipeline* pipeline,
+ unsigned long encode_flags,
+ qpdf_stream_decode_level_e decode_level)
+{
+ if (foreign->encp->encrypted)
+ {
+ QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
+ }
+ return pipeStreamData(
+ foreign->encp, foreign->file, *this,
+ foreign->foreign_objid, foreign->foreign_generation,
+ foreign->offset, foreign->length,
+ foreign->local_dict, foreign->is_attachment_stream,
+ pipeline, false, false);
+}
+
void
QPDF::findAttachmentStreams()
{
@@ -2508,3 +2793,13 @@ QPDF::findAttachmentStreams()
}
}
}
+
+void
+QPDF::stopOnError(std::string const& message)
+{
+ // Throw a generic exception when we lack context for something
+ // more specific. New code should not use this. This method exists
+ // to improve somewhat from calling assert in very old code.
+ throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
+ "", this->m->file->getLastOffset(), message);
+}