aboutsummaryrefslogtreecommitdiffstats
path: root/libqpdf/QPDFPageObjectHelper.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libqpdf/QPDFPageObjectHelper.cc')
-rw-r--r--libqpdf/QPDFPageObjectHelper.cc611
1 files changed, 605 insertions, 6 deletions
diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc
index 4093622d..9543d294 100644
--- a/libqpdf/QPDFPageObjectHelper.cc
+++ b/libqpdf/QPDFPageObjectHelper.cc
@@ -1,5 +1,286 @@
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QTC.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/Pl_Concatenate.hh>
+#include <qpdf/Pl_Buffer.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDFMatrix.hh>
+
+class ContentProvider: public QPDFObjectHandle::StreamDataProvider
+{
+ public:
+ ContentProvider(QPDFObjectHandle from_page) :
+ from_page(from_page)
+ {
+ }
+ virtual ~ContentProvider()
+ {
+ }
+ virtual void provideStreamData(int objid, int generation,
+ Pipeline* pipeline);
+
+ private:
+ QPDFObjectHandle from_page;
+};
+
+void
+ContentProvider::provideStreamData(int, int, Pipeline* p)
+{
+ Pl_Concatenate concat("concatenate", p);
+ std::string description = "contents from page object " +
+ QUtil::int_to_string(from_page.getObjectID()) + " " +
+ QUtil::int_to_string(from_page.getGeneration());
+ std::string all_description;
+ from_page.getKey("/Contents").pipeContentStreams(
+ &concat, description, all_description);
+ concat.manualFinish();
+}
+
+class InlineImageTracker: public QPDFObjectHandle::TokenFilter
+{
+ public:
+ InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
+ virtual ~InlineImageTracker()
+ {
+ }
+ virtual void handleToken(QPDFTokenizer::Token const&);
+ QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
+
+ QPDF* qpdf;
+ size_t min_size;
+ QPDFObjectHandle resources;
+ std::string dict_str;
+ std::string bi_str;
+ int min_suffix;
+ bool any_images;
+ enum { st_top, st_bi } state;
+};
+
+InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size,
+ QPDFObjectHandle resources) :
+ qpdf(qpdf),
+ min_size(min_size),
+ resources(resources),
+ min_suffix(1),
+ any_images(false),
+ state(st_top)
+{
+}
+
+QPDFObjectHandle
+InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
+{
+ QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
+ dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
+ dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
+ std::set<std::string> keys = odict.getKeys();
+ for (std::set<std::string>::iterator iter = keys.begin();
+ iter != keys.end(); ++iter)
+ {
+ std::string key = *iter;
+ QPDFObjectHandle value = odict.getKey(key);
+ if (key == "/BPC")
+ {
+ key = "/BitsPerComponent";
+ }
+ else if (key == "/CS")
+ {
+ key = "/ColorSpace";
+ }
+ else if (key == "/D")
+ {
+ key = "/Decode";
+ }
+ else if (key == "/DP")
+ {
+ key = "/DecodeParms";
+ }
+ else if (key == "/F")
+ {
+ key = "/Filter";
+ }
+ else if (key == "/H")
+ {
+ key = "/Height";
+ }
+ else if (key == "/IM")
+ {
+ key = "/ImageMask";
+ }
+ else if (key == "/I")
+ {
+ key = "/Interpolate";
+ }
+ else if (key == "/W")
+ {
+ key = "/Width";
+ }
+
+ if (key == "/ColorSpace")
+ {
+ if (value.isName())
+ {
+ std::string name = value.getName();
+ if (name == "/G")
+ {
+ name = "/DeviceGray";
+ }
+ else if (name == "/RGB")
+ {
+ name = "/DeviceRGB";
+ }
+ else if (name == "/CMYK")
+ {
+ name = "/DeviceCMYK";
+ }
+ else if (name == "/I")
+ {
+ name = "/Indexed";
+ }
+ else
+ {
+ name.clear();
+ }
+ if (! name.empty())
+ {
+ value = QPDFObjectHandle::newName(name);
+ }
+ }
+ }
+ else if (key == "/Filter")
+ {
+ std::vector<QPDFObjectHandle> filters;
+ if (value.isName())
+ {
+ filters.push_back(value);
+ }
+ else if (value.isArray())
+ {
+ filters = value.getArrayAsVector();
+ }
+ for (std::vector<QPDFObjectHandle>::iterator iter =
+ filters.begin();
+ iter != filters.end(); ++iter)
+ {
+ std::string name;
+ if ((*iter).isName())
+ {
+ name = (*iter).getName();
+ }
+ if (name == "/AHx")
+ {
+ name = "/ASCIIHexDecode";
+ }
+ else if (name == "/A85")
+ {
+ name = "/ASCII85Decode";
+ }
+ else if (name == "/LZW")
+ {
+ name = "/LZWDecode";
+ }
+ else if (name == "/Fl")
+ {
+ name = "/FlateDecode";
+ }
+ else if (name == "/RL")
+ {
+ name = "/RunLengthDecode";
+ }
+ else if (name == "/CCF")
+ {
+ name = "/CCITTFaxDecode";
+ }
+ else if (name == "/DCT")
+ {
+ name = "/DCTDecode";
+ }
+ else
+ {
+ name.clear();
+ }
+ if (! name.empty())
+ {
+ *iter = QPDFObjectHandle::newName(name);
+ }
+ }
+ if (value.isName() && (filters.size() == 1))
+ {
+ value = filters.at(0);
+ }
+ else if (value.isArray())
+ {
+ value = QPDFObjectHandle::newArray(filters);
+ }
+ }
+ dict.replaceKey(key, value);
+ }
+ return dict;
+}
+
+void
+InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
+{
+ if (state == st_bi)
+ {
+ if (token.getType() == QPDFTokenizer::tt_inline_image)
+ {
+ std::string image_data(token.getValue());
+ size_t len = image_data.length();
+ if (len >= this->min_size)
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
+ Pl_Buffer b("image_data");
+ b.write(QUtil::unsigned_char_pointer(image_data), len);
+ b.finish();
+ QPDFObjectHandle dict =
+ convertIIDict(QPDFObjectHandle::parse(dict_str));
+ dict.replaceKey("/Length", QPDFObjectHandle::newInteger(len));
+ std::string name = resources.getUniqueResourceName(
+ "/IIm", this->min_suffix);
+ QPDFObjectHandle image = QPDFObjectHandle::newStream(
+ this->qpdf, b.getBuffer());
+ image.replaceDict(dict);
+ resources.getKey("/XObject").replaceKey(name, image);
+ write(name);
+ write(" Do\n");
+ any_images = true;
+ }
+ else
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
+ write(bi_str);
+ writeToken(token);
+ state = st_top;
+ }
+ }
+ else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))
+ {
+ bi_str += token.getValue();
+ dict_str += " >>";
+ }
+ else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI"))
+ {
+ state = st_top;
+ }
+ else
+ {
+ bi_str += token.getValue();
+ dict_str += token.getValue();
+ }
+ }
+ else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI"))
+ {
+ bi_str = token.getValue();
+ dict_str = "<< ";
+ state = st_bi;
+ }
+ else
+ {
+ writeToken(token);
+ }
+}
QPDFPageObjectHelper::Members::~Members()
{
@@ -14,12 +295,95 @@ QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
{
}
+QPDFObjectHandle
+QPDFPageObjectHelper::getAttribute(std::string const& name,
+ bool copy_if_shared)
+{
+ bool inheritable = ((name == "/MediaBox") || (name == "/CropBox") ||
+ (name == "/Resources") || (name == "/Rotate"));
+
+ QPDFObjectHandle node = this->oh;
+ QPDFObjectHandle result(node.getKey(name));
+ std::set<QPDFObjGen> seen;
+ bool inherited = false;
+ while (inheritable && result.isNull() && node.hasKey("/Parent"))
+ {
+ seen.insert(node.getObjGen());
+ node = node.getKey("/Parent");
+ if (seen.count(node.getObjGen()))
+ {
+ break;
+ }
+ result = node.getKey(name);
+ if (! result.isNull())
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
+ inherited = true;
+ }
+ }
+ if (copy_if_shared && (inherited || result.isIndirect()))
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute");
+ result = result.shallowCopy();
+ this->oh.replaceKey(name, result);
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFPageObjectHelper::getTrimBox(bool copy_if_shared)
+{
+ QPDFObjectHandle result = getAttribute("/TrimBox", copy_if_shared);
+ if (result.isNull())
+ {
+ result = getCropBox(copy_if_shared);
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFPageObjectHelper::getCropBox(bool copy_if_shared)
+{
+ QPDFObjectHandle result = getAttribute("/CropBox", copy_if_shared);
+ if (result.isNull())
+ {
+ result = getMediaBox();
+ }
+ return result;
+}
+
+QPDFObjectHandle
+QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
+{
+ return getAttribute("/MediaBox", copy_if_shared);
+}
+
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getPageImages()
{
return this->oh.getPageImages();
}
+void
+QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
+{
+ QPDFObjectHandle resources = getAttribute("/Resources", true);
+ // Calling mergeResources also ensures that /XObject becomes
+ // direct and is not shared with other pages.
+ resources.mergeResources(
+ QPDFObjectHandle::parse("<< /XObject << >> >>"));
+ InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
+ Pl_Buffer b("new page content");
+ filterPageContents(&iit, &b);
+ if (iit.any_images)
+ {
+ getObjectHandle().replaceKey(
+ "/Contents",
+ QPDFObjectHandle::newStream(
+ this->oh.getOwningQPDF(), b.getBuffer()));
+ }
+}
+
std::vector<QPDFAnnotationObjectHelper>
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
{
@@ -98,11 +462,16 @@ QPDFPageObjectHelper::addContentTokenFilter(
class NameWatcher: public QPDFObjectHandle::TokenFilter
{
public:
+ NameWatcher() :
+ saw_bad(false)
+ {
+ }
virtual ~NameWatcher()
{
}
virtual void handleToken(QPDFTokenizer::Token const&);
std::set<std::string> names;
+ bool saw_bad;
};
void
@@ -115,6 +484,10 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
this->names.insert(
QPDFObjectHandle::newName(token.getValue()).getName());
}
+ else if (token.getType() == QPDFTokenizer::tt_bad)
+ {
+ saw_bad = true;
+ }
writeToken(token);
}
@@ -133,6 +506,14 @@ QPDFPageObjectHelper::removeUnreferencedResources()
"; not attempting to remove unreferenced objects from this page");
return;
}
+ if (nw.saw_bad)
+ {
+ QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
+ this->oh.warnIfPossible(
+ "Bad token found while scanning content stream; "
+ "not attempting to remove unreferenced objects from this page");
+ return;
+ }
// Walk through /Font and /XObject dictionaries, removing any
// resources that are not referenced. We must make copies of
// resource dictionaries down into the dictionaries are mutating
@@ -141,12 +522,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
std::vector<std::string> to_filter;
to_filter.push_back("/Font");
to_filter.push_back("/XObject");
- QPDFObjectHandle resources = this->oh.getKey("/Resources");
- if (resources.isDictionary())
- {
- resources = resources.shallowCopy();
- this->oh.replaceKey("/Resources", resources);
- }
+ QPDFObjectHandle resources = getAttribute("/Resources", true);
for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter)
{
@@ -155,6 +531,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
{
continue;
}
+ dict = dict.shallowCopy();
resources.replaceKey(*d_iter, dict);
std::set<std::string> keys = dict.getKeys();
for (std::set<std::string>::iterator k_iter = keys.begin();
@@ -167,3 +544,225 @@ QPDFPageObjectHelper::removeUnreferencedResources()
}
}
}
+
+QPDFPageObjectHelper
+QPDFPageObjectHelper::shallowCopyPage()
+{
+ QPDF* qpdf = this->oh.getOwningQPDF();
+ if (! qpdf)
+ {
+ throw std::runtime_error(
+ "QPDFPageObjectHelper::shallowCopyPage"
+ " called with a direct object");
+ }
+ QPDFObjectHandle new_page = this->oh.shallowCopy();
+ return QPDFPageObjectHelper(qpdf->makeIndirectObject(new_page));
+}
+
+QPDFObjectHandle::Matrix
+QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
+{
+ QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
+ QPDFObjectHandle bbox = getTrimBox(false);
+ if (! bbox.isRectangle())
+ {
+ return matrix;
+ }
+ QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
+ QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
+ if (! (rotate_obj.isNull() && scale_obj.isNull()))
+ {
+ QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
+ double width = rect.urx - rect.llx;
+ double height = rect.ury - rect.lly;
+ double scale = (scale_obj.isNumber()
+ ? scale_obj.getNumericValue()
+ : 1.0);
+ int rotate = (rotate_obj.isInteger()
+ ? rotate_obj.getIntValue()
+ : 0);
+ if (invert)
+ {
+ if (scale == 0.0)
+ {
+ return matrix;
+ }
+ scale = 1.0 / scale;
+ rotate = 360 - rotate;
+ }
+
+ // Ignore invalid rotation angle
+ switch (rotate)
+ {
+ case 90:
+ matrix = QPDFObjectHandle::Matrix(
+ 0, -scale, scale, 0, 0, width * scale);
+ break;
+ case 180:
+ matrix = QPDFObjectHandle::Matrix(
+ -scale, 0, 0, -scale, width * scale, height * scale);
+ break;
+ case 270:
+ matrix = QPDFObjectHandle::Matrix(
+ 0, scale, -scale, 0, height * scale, 0);
+ break;
+ default:
+ matrix = QPDFObjectHandle::Matrix(
+ scale, 0, 0, scale, 0, 0);
+ break;
+ }
+ }
+ return matrix;
+}
+
+QPDFObjectHandle
+QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
+{
+ QPDF* qpdf = this->oh.getOwningQPDF();
+ if (! qpdf)
+ {
+ throw std::runtime_error(
+ "QPDFPageObjectHelper::getFormXObjectForPage"
+ " called with a direct object");
+ }
+ QPDFObjectHandle result = QPDFObjectHandle::newStream(qpdf);
+ QPDFObjectHandle newdict = result.getDict();
+ newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
+ newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
+ newdict.replaceKey("/Resources",
+ getAttribute("/Resources", false).shallowCopy());
+ newdict.replaceKey("/Group",
+ getAttribute("/Group", false).shallowCopy());
+ QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
+ if (! bbox.isRectangle())
+ {
+ this->oh.warnIfPossible(
+ "bounding box is invalid; form"
+ " XObject created from page will not work");
+ }
+ newdict.replaceKey("/BBox", bbox);
+ PointerHolder<QPDFObjectHandle::StreamDataProvider> provider =
+ new ContentProvider(this->oh);
+ result.replaceStreamData(
+ provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
+ QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
+ QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
+ if (handle_transformations &&
+ (! (rotate_obj.isNull() && scale_obj.isNull())))
+ {
+ newdict.replaceKey("/Matrix",
+ QPDFObjectHandle::newArray(
+ getMatrixForTransformations()));
+ }
+
+ return result;
+}
+
+std::string
+QPDFPageObjectHelper::placeFormXObject(
+ QPDFObjectHandle fo, std::string name,
+ QPDFObjectHandle::Rectangle rect,
+ bool invert_transformations)
+{
+ // Calculate the transformation matrix that will place the given
+ // form XObject fully inside the given rectangle, shrinking and
+ // centering if needed.
+
+ // When rendering a form XObject, the transformation in the
+ // graphics state (cm) is applied first (of course -- when it is
+ // applied, the PDF interpreter doesn't even know we're going to
+ // be drawing a form XObject yet), and then the object's matrix
+ // (M) is applied. The resulting matrix, when applied to the form
+ // XObject's bounding box, will generate a new rectangle. We want
+ // to create a transformation matrix that make the form XObject's
+ // bounding box land in exactly the right spot.
+
+ QPDFObjectHandle fdict = fo.getDict();
+ QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
+ if (! bbox_obj.isRectangle())
+ {
+ return "";
+ }
+
+ QPDFMatrix wmatrix; // work matrix
+ QPDFMatrix tmatrix; // "to" matrix
+ QPDFMatrix fmatrix; // "from" matrix
+ if (invert_transformations)
+ {
+ // tmatrix inverts scaling and rotation of the destination
+ // page. Applying this matrix allows the overlaid form
+ // XObject's to be absolute rather than relative to properties
+ // of the destination page. tmatrix is part of the computed
+ // transformation matrix.
+ tmatrix = QPDFMatrix(getMatrixForTransformations(true));
+ wmatrix.concat(tmatrix);
+ }
+ if (fdict.getKey("/Matrix").isMatrix())
+ {
+ // fmatrix is the transformation matrix that is applied to the
+ // form XObject itself. We need this for calculations, but we
+ // don't explicitly use it in the final result because the PDF
+ // rendering system automatically applies this last before
+ // drawing the form XObject.
+ fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
+ wmatrix.concat(fmatrix);
+ }
+
+ // The current wmatrix handles transformation from the form
+ // xobject and, if requested, the destination page. Next, we have
+ // to adjust this for scale and position.
+
+ // Step 1: figure out what scale factor we need to make the form
+ // XObject's bounding box fit within the destination rectangle.
+
+ // Transform bounding box
+ QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
+ QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
+
+ // Calculate a scale factor, if needed. If the transformed
+ // rectangle is too big, shrink it. Never expand it.
+ if ((T.urx == T.llx) || (T.ury == T.lly))
+ {
+ // avoid division by zero
+ return "";
+ }
+ double rect_w = rect.urx - rect.llx;
+ double rect_h = rect.ury - rect.lly;
+ double t_w = T.urx - T.llx;
+ double t_h = T.ury - T.lly;
+ double xscale = rect_w / t_w;
+ double yscale = rect_h / t_h;
+ double scale = (xscale < yscale ? xscale : yscale);
+ if (scale > 1.0)
+ {
+ scale = 1.0;
+ }
+
+ // Step 2: figure out what translation is required to get the
+ // rectangle to the right spot: centered within the destination.
+ wmatrix = QPDFMatrix();
+ wmatrix.scale(scale, scale);
+ wmatrix.concat(tmatrix);
+ wmatrix.concat(fmatrix);
+
+ T = wmatrix.transformRectangle(bbox);
+ double t_cx = (T.llx + T.urx) / 2.0;
+ double t_cy = (T.lly + T.ury) / 2.0;
+ double r_cx = (rect.llx + rect.urx) / 2.0;
+ double r_cy = (rect.lly + rect.ury) / 2.0;
+ double tx = r_cx - t_cx;
+ double ty = r_cy - t_cy;
+
+ // Now we can calculate the final matrix. The final matrix does
+ // not include fmatrix because that is applied automatically by
+ // the PDF interpreter.
+ QPDFMatrix cm;
+ cm.translate(tx, ty);
+ cm.scale(scale, scale);
+ cm.concat(tmatrix);
+ return (
+ "q\n" +
+ cm.unparse() + " cm\n" +
+ name + " Do\n" +
+ "Q\n");
+}