1 files changed, 77 insertions, 0 deletions
diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc
new file mode 100644
index 00000000..35a8ad74
--- /dev/null
+++ b/libqpdf/ContentNormalizer.cc
@@ -0,0 +1,77 @@
+#include <qpdf/ContentNormalizer.hh>
+#include <qpdf/QUtil.hh>
+
+ContentNormalizer::ContentNormalizer()
+{
+}
+
+ContentNormalizer::~ContentNormalizer()
+{
+}
+
+void
+ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
+{
+    std::string value = token.getRawValue();
+    QPDFTokenizer::token_type_e token_type = token.getType();
+
+    switch (token_type)
+    {
+      case QPDFTokenizer::tt_space:
+        {
+            size_t len = value.length();
+            for (size_t i = 0; i < len; ++i)
+            {
+                char ch = value.at(i);
+                if (ch == '\r')
+                {
+                    if ((i + 1 < len) && (value.at(i + 1) == '\n'))
+                    {
+                        // ignore
+                    }
+                    else
+                    {
+                        write("\n");
+                    }
+                }
+                else
+                {
+                    write(&ch, 1);
+                }
+            }
+        }
+        break;
+
+      case QPDFTokenizer::tt_string:
+        // Replacing string and name tokens in this way normalizes
+        // their representation as this will automatically handle
+        // quoting of unprintable characters, etc.
+        writeToken(QPDFTokenizer::Token(
+                       QPDFTokenizer::tt_string, token.getValue()));
+	break;
+
+      case QPDFTokenizer::tt_name:
+        writeToken(QPDFTokenizer::Token(
+                       QPDFTokenizer::tt_name, token.getValue()));
+	break;
+
+      default:
+        writeToken(token);
+	break;
+    }
+
+    value = token.getRawValue();
+    if (((token_type == QPDFTokenizer::tt_string) ||
+         (token_type == QPDFTokenizer::tt_name)) &&
+        ((value.find('\r') != std::string::npos) ||
+         (value.find('\n') != std::string::npos)))
+    {
+        write("\n");
+    }
+}
+
+void
+ContentNormalizer::handleEOF()
+{
+    finish();
+}