diff options
author | Jay Berkenbilt <ejb@ql.org> | 2008-04-29 14:55:25 +0200 |
---|---|---|
committer | Jay Berkenbilt <ejb@ql.org> | 2008-04-29 14:55:25 +0200 |
commit | 9a0b88bf7777c153dc46ace22db74ef24d51583a (patch) | |
tree | f567ac1cf2bf5071a611eb49323a935b6ac938ff /libqpdf/PCRE.cc | |
download | qpdf-9a0b88bf7777c153dc46ace22db74ef24d51583a.tar.zst |
update release date to actual daterelease-qpdf-2.0
git-svn-id: svn+q:///qpdf/trunk@599 71b93d88-0707-0410-a8cf-f5a4172ac649
Diffstat (limited to 'libqpdf/PCRE.cc')
-rw-r--r-- | libqpdf/PCRE.cc | 365 |
1 files changed, 365 insertions, 0 deletions
diff --git a/libqpdf/PCRE.cc b/libqpdf/PCRE.cc new file mode 100644 index 00000000..afa6e954 --- /dev/null +++ b/libqpdf/PCRE.cc @@ -0,0 +1,365 @@ + + +#include <qpdf/PCRE.hh> +#include <qpdf/QUtil.hh> + +#include <iostream> + +PCRE::Exception::Exception(std::string const& message) +{ + this->setMessage("PCRE error: " + message); +} + +PCRE::NoBackref::NoBackref() : + Exception("no match") +{ +} + +PCRE::Match::Match(int nbackrefs, char const* subject) +{ + this->init(-1, nbackrefs, subject); +} + +PCRE::Match::~Match() +{ + this->destroy(); +} + +PCRE::Match::Match(Match const& rhs) +{ + this->copy(rhs); +} + +PCRE::Match& +PCRE::Match::operator=(Match const& rhs) +{ + if (this != &rhs) + { + this->destroy(); + this->copy(rhs); + } + return *this; +} + +void +PCRE::Match::init(int nmatches, int nbackrefs, char const* subject) +{ + this->nmatches = nmatches; + this->nbackrefs = nbackrefs; + this->subject = subject; + this->ovecsize = 3 * (1 + nbackrefs); + this->ovector = 0; + if (this->ovecsize) + { + this->ovector = new int[this->ovecsize]; + } +} + +void +PCRE::Match::copy(Match const& rhs) +{ + this->init(rhs.nmatches, rhs.nbackrefs, rhs.subject); + int i; + for (i = 0; i < this->ovecsize; ++i) + { + this->ovector[i] = rhs.ovector[i]; + } +} + +void +PCRE::Match::destroy() +{ + delete [] this->ovector; +} + +PCRE::Match::operator bool() +{ + return (this->nmatches >= 0); +} + + +std::string +PCRE::Match::getMatch(int n, int flags) + throw(QEXC::General, Exception) +{ + // This method used to be implemented in terms of + // pcre_get_substring, but that function gives you an empty string + // for an unmatched backreference that is in range. + + int offset; + int length; + try + { + getOffsetLength(n, offset, length); + } + catch (NoBackref&) + { + if (flags & gm_no_substring_returns_empty) + { + return ""; + } + else + { + throw; + } + } + + return std::string(this->subject).substr(offset, length); +} + +void +PCRE::Match::getOffsetLength(int n, int& offset, int& length) throw(Exception) +{ + if ((this->nmatches < 0) || + (n > this->nmatches - 1) || + (this->ovector[n * 2] == -1)) + { + throw NoBackref(); + } + offset = this->ovector[n * 2]; + length = this->ovector[n * 2 + 1] - offset; +} + + +int +PCRE::Match::getOffset(int n) throw(Exception) +{ + int offset; + int length; + this->getOffsetLength(n, offset, length); + return offset; +} + + +int +PCRE::Match::getLength(int n) throw(Exception) +{ + int offset; + int length; + this->getOffsetLength(n, offset, length); + return length; +} + + +int +PCRE::Match::nMatches() const +{ + return this->nmatches; +} + +PCRE::PCRE(char const* pattern, int options) throw (Exception) +{ + char const *errptr; + int erroffset; + this->code = pcre_compile(pattern, options, &errptr, &erroffset, 0); + if (this->code) + { + this->nbackrefs = pcre_info(this->code, 0, 0); + } + else + { + std::string message = (std::string("compilation of ") + pattern + + " failed at offset " + + QUtil::int_to_string(erroffset) + ": " + + errptr); + throw Exception(message); + } +} + +PCRE::~PCRE() +{ + pcre_free(this->code); +} + +PCRE::Match +PCRE::match(char const* subject, int options, int startoffset, int size) + throw (QEXC::General, Exception) +{ + if (size == -1) + { + size = strlen(subject); + } + + Match result(this->nbackrefs, subject); + int status = pcre_exec(this->code, 0, subject, size, + startoffset, options, + result.ovector, result.ovecsize); + if (status >= 0) + { + result.nmatches = status; + } + else + { + std::string message; + + switch (status) + { + case PCRE_ERROR_NOMATCH: + break; + + case PCRE_ERROR_BADOPTION: + message = "bad option passed to PCRE::match()"; + throw Exception(message); + break; + + case PCRE_ERROR_NOMEMORY: + message = "insufficient memory"; + throw Exception(message); + break; + + case PCRE_ERROR_NULL: + case PCRE_ERROR_BADMAGIC: + case PCRE_ERROR_UNKNOWN_NODE: + default: + message = "pcre_exec returned " + QUtil::int_to_string(status); + throw QEXC::Internal(message); + } + } + + return result; +} + +void +PCRE::test(int n) +{ + try + { + if (n == 1) + { + static char const* utf8 = "abπdefq"; + PCRE u1("^([[:alpha:]]+)"); + PCRE u2("^([\\p{L}]+)", PCRE_UTF8); + PCRE::Match m1 = u1.match(utf8); + if (m1) + { + std::cout << "no utf8: " << m1.getMatch(1) << std::endl; + } + PCRE::Match m2 = u2.match(utf8); + if (m2) + { + std::cout << "utf8: " << m2.getMatch(1) << std::endl; + } + return; + } + + try + { + PCRE pcre1("a**"); + } + catch (Exception& e) + { + std::cout << e.unparse() << std::endl; + } + + PCRE pcre2("^([^\\s:]*)\\s*:\\s*(.*?)\\s*$"); + PCRE::Match m2 = pcre2.match("key: value one two three "); + if (m2) + { + std::cout << m2.nMatches() << std::endl; + std::cout << m2.getMatch(0) << std::endl; + std::cout << m2.getOffset(0) << std::endl; + std::cout << m2.getLength(0) << std::endl; + std::cout << m2.getMatch(1) << std::endl; + std::cout << m2.getOffset(1) << std::endl; + std::cout << m2.getLength(1) << std::endl; + std::cout << m2.getMatch(2) << std::endl; + std::cout << m2.getOffset(2) << std::endl; + std::cout << m2.getLength(2) << std::endl; + try + { + std::cout << m2.getMatch(3) << std::endl; + } + catch (Exception& e) + { + std::cout << e.unparse() << std::endl; + } + try + { + std::cout << m2.getOffset(3) << std::endl; + } + catch (Exception& e) + { + std::cout << e.unparse() << std::endl; + } + } + PCRE pcre3("^(a+)(b+)?$"); + PCRE::Match m3 = pcre3.match("aaa"); + try + { + if (m3) + { + std::cout << m3.nMatches() << std::endl; + std::cout << m3.getMatch(0) << std::endl; + std::cout << m3.getMatch(1) << std::endl; + std::cout << "-" + << m3.getMatch( + 2, Match::gm_no_substring_returns_empty) + << "-" << std::endl; + std::cout << "hello" << std::endl; + std::cout << m3.getMatch(2) << std::endl; + std::cout << "can't see this" << std::endl; + } + } + catch (Exception& e) + { + std::cout << e.unparse() << std::endl; + } + + // backref: 1 2 3 4 5 + PCRE pcre4("^((?:(a(b)?)(?:,(c))?)|(c))?$"); + static char const* candidates[] = { + "qqqcqqq", // no match + "ab,c", // backrefs: 0, 1, 2, 3, 4 + "ab", // backrefs: 0, 1, 2, 3 + "a", // backrefs: 0, 1, 2 + "a,c", // backrefs: 0, 1, 2, 4 + "c", // backrefs: 0, 1, 5 + "", // backrefs: 0 + 0 + }; + for (char const** p = candidates; *p; ++p) + { + PCRE::Match m(pcre4.match(*p)); + if (m) + { + int nmatches = m.nMatches(); + for (int i = 0; i < nmatches; ++i) + { + std::cout << *p << ": " << i << ": "; + try + { + std::string match = m.getMatch(i); + std::cout << match; + } + catch (NoBackref&) + { + std::cout << "no backref (getMatch)"; + } + std::cout << std::endl; + + std::cout << *p << ": " << i << ": "; + try + { + int offset; + int length; + m.getOffsetLength(i, offset, length); + std::cout << offset << ", " << length; + } + catch (NoBackref&) + { + std::cout << "no backref (getOffsetLength)"; + } + std:: cout << std::endl; + } + } + else + { + std::cout << *p << ": no match" << std::endl; + } + } + } + catch (QEXC::General& e) + { + std::cout << "unexpected exception: " << e.unparse() << std::endl; + } +} |