提交 7d3d51f8 编写于 作者: A Alexey Milovidov

dbms: using in-tree re2 library [#METR-17973].

上级 21d37dba
# This is the official list of RE2 authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Google Inc.
Samsung Electronics
Stefano Rivera <stefano.rivera@gmail.com>
# This is the official list of people who can contribute
# (and typically have contributed) code to the RE2 repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Dominic Battré <battre@chromium.org>
Dmitriy Vyukov <dvyukov@google.com>
John Millikin <jmillikin@gmail.com>
Mike Nazarewicz <mpn@google.com>
Pawel Hajdan <phajdan.jr@gmail.com>
Rob Pike <r@google.com>
Russ Cox <rsc@swtch.com>
Sanjay Ghemawat <sanjay@google.com>
Stefano Rivera <stefano.rivera@gmail.com>
Srinivasan Venkatachary <vsri@google.com>
Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
// Copyright (c) 2009 The RE2 Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This is the source code repository for RE2, a regular expression library.
For documentation about how to install and use RE2,
visit http://code.google.com/p/re2/.
The short version is:
make
make test
make install
make testinstall
Unless otherwise noted, the RE2 source files are distributed
under the BSD-style license found in the LICENSE file.
RE2's native language is C++.
An Inferno wrapper is at http://code.google.com/p/inferno-re2/.
A Python wrapper is at http://github.com/facebook/pyre2/.
A Ruby wrapper is at http://github.com/axic/rre2/.
// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Tested by search_test.cc, exhaustive_test.cc, tester.cc
// Prog::SearchBitState is a regular expression search with submatch
// tracking for small regular expressions and texts. Like
// testing/backtrack.cc, it allocates a bit vector with (length of
// text) * (length of prog) bits, to make sure it never explores the
// same (character position, instruction) state multiple times. This
// limits the search to run in time linear in the length of the text.
//
// Unlike testing/backtrack.cc, SearchBitState is not recursive
// on the text.
//
// SearchBitState is a fast replacement for the NFA code on small
// regexps and texts when SearchOnePass cannot be used.
#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
struct Job {
int id;
int arg;
const char* p;
};
class BitState {
public:
explicit BitState(Prog* prog);
~BitState();
// The usual Search prototype.
// Can only call Search once per BitState.
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch);
private:
inline bool ShouldVisit(int id, const char* p);
void Push(int id, const char* p, int arg);
bool GrowStack();
bool TrySearch(int id, const char* p);
// Search parameters
Prog* prog_; // program being run
StringPiece text_; // text being searched
StringPiece context_; // greater context of text being searched
bool anchored_; // whether search is anchored at text.begin()
bool longest_; // whether search wants leftmost-longest match
bool endmatch_; // whether match must end at text.end()
StringPiece *submatch_; // submatches to fill in
int nsubmatch_; // # of submatches to fill in
// Search state
const char** cap_; // capture registers
int ncap_;
static const int VisitedBits = 32;
uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked
int nvisited_; // # of words in bitmap
Job *job_; // stack of text positions to explore
int njob_;
int maxjob_;
};
BitState::BitState(Prog* prog)
: prog_(prog),
anchored_(false),
longest_(false),
endmatch_(false),
submatch_(NULL),
nsubmatch_(0),
cap_(NULL),
ncap_(0),
visited_(NULL),
nvisited_(0),
job_(NULL),
njob_(0),
maxjob_(0) {
}
BitState::~BitState() {
delete[] visited_;
delete[] job_;
delete[] cap_;
}
// Should the search visit the pair ip, p?
// If so, remember that it was visited so that the next time,
// we don't repeat the visit.
bool BitState::ShouldVisit(int id, const char* p) {
uint n = id * (text_.size() + 1) + (p - text_.begin());
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
return false;
visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
return true;
}
// Grow the stack.
bool BitState::GrowStack() {
// VLOG(0) << "Reallocate.";
maxjob_ *= 2;
Job* newjob = new Job[maxjob_];
memmove(newjob, job_, njob_*sizeof job_[0]);
delete[] job_;
job_ = newjob;
if (njob_ >= maxjob_) {
LOG(DFATAL) << "Job stack overflow.";
return false;
}
return true;
}
// Push the triple (id, p, arg) onto the stack, growing it if necessary.
void BitState::Push(int id, const char* p, int arg) {
if (njob_ >= maxjob_) {
if (!GrowStack())
return;
}
int op = prog_->inst(id)->opcode();
if (op == kInstFail)
return;
// Only check ShouldVisit when arg == 0.
// When arg > 0, we are continuing a previous visit.
if (arg == 0 && !ShouldVisit(id, p))
return;
Job* j = &job_[njob_++];
j->id = id;
j->p = p;
j->arg = arg;
}
// Try a search from instruction id0 in state p0.
// Return whether it succeeded.
bool BitState::TrySearch(int id0, const char* p0) {
bool matched = false;
const char* end = text_.end();
njob_ = 0;
Push(id0, p0, 0);
while (njob_ > 0) {
// Pop job off stack.
--njob_;
int id = job_[njob_].id;
const char* p = job_[njob_].p;
int arg = job_[njob_].arg;
// Optimization: rather than push and pop,
// code that is going to Push and continue
// the loop simply updates ip, p, and arg
// and jumps to CheckAndLoop. We have to
// do the ShouldVisit check that Push
// would have, but we avoid the stack
// manipulation.
if (0) {
CheckAndLoop:
if (!ShouldVisit(id, p))
continue;
}
// Visit ip, p.
// VLOG(0) << "Job: " << ip->id() << " "
// << (p - text_.begin()) << " " << arg;
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
case kInstFail:
default:
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
return false;
case kInstAlt:
// Cannot just
// Push(ip->out1(), p, 0);
// Push(ip->out(), p, 0);
// If, during the processing of ip->out(), we encounter
// ip->out1() via another path, we want to process it then.
// Pushing it here will inhibit that. Instead, re-push
// ip with arg==1 as a reminder to push ip->out1() later.
switch (arg) {
case 0:
Push(id, p, 1); // come back when we're done
id = ip->out();
goto CheckAndLoop;
case 1:
// Finished ip->out(); try ip->out1().
arg = 0;
id = ip->out1();
goto CheckAndLoop;
}
LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
continue;
case kInstAltMatch:
// One opcode is byte range; the other leads to match.
if (ip->greedy(prog_)) {
// out1 is the match
Push(ip->out1(), p, 0);
id = ip->out1();
p = end;
goto CheckAndLoop;
}
// out is the match - non-greedy
Push(ip->out(), end, 0);
id = ip->out();
goto CheckAndLoop;
case kInstByteRange: {
int c = -1;
if (p < end)
c = *p & 0xFF;
if (ip->Matches(c)) {
id = ip->out();
p++;
goto CheckAndLoop;
}
continue;
}
case kInstCapture:
switch (arg) {
case 0:
if (0 <= ip->cap() && ip->cap() < ncap_) {
// Capture p to register, but save old value.
Push(id, cap_[ip->cap()], 1); // come back when we're done
cap_[ip->cap()] = p;
}
// Continue on.
id = ip->out();
goto CheckAndLoop;
case 1:
// Finished ip->out(); restore the old value.
cap_[ip->cap()] = p;
continue;
}
LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
continue;
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
continue;
id = ip->out();
goto CheckAndLoop;
case kInstNop:
id = ip->out();
goto CheckAndLoop;
case kInstMatch: {
if (endmatch_ && p != text_.end())
continue;
// VLOG(0) << "Found match.";
// We found a match. If the caller doesn't care
// where the match is, no point going further.
if (nsubmatch_ == 0)
return true;
// Record best match so far.
// Only need to check end point, because this entire
// call is only considering one start position.
matched = true;
cap_[1] = p;
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].end())) {
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
}
// If going for first match, we're done.
if (!longest_)
return true;
// If we used the entire text, no longer match is possible.
if (p == text_.end())
return true;
// Otherwise, continue on in hope of a longer match.
continue;
}
}
}
return matched;
}
// Search text (within context) for prog_.
bool BitState::Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
StringPiece* submatch, int nsubmatch) {
// Search parameters.
text_ = text;
context_ = context;
if (context_.begin() == NULL)
context_ = text;
if (prog_->anchor_start() && context_.begin() != text.begin())
return false;
if (prog_->anchor_end() && context_.end() != text.end())
return false;
anchored_ = anchored || prog_->anchor_start();
longest_ = longest || prog_->anchor_end();
endmatch_ = prog_->anchor_end();
submatch_ = submatch;
nsubmatch_ = nsubmatch;
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] = NULL;
// Allocate scratch space.
nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;
visited_ = new uint32[nvisited_];
memset(visited_, 0, nvisited_*sizeof visited_[0]);
// VLOG(0) << "nvisited_ = " << nvisited_;
ncap_ = 2*nsubmatch;
if (ncap_ < 2)
ncap_ = 2;
cap_ = new const char*[ncap_];
memset(cap_, 0, ncap_*sizeof cap_[0]);
maxjob_ = 256;
job_ = new Job[maxjob_];
// Anchored search must start at text.begin().
if (anchored_) {
cap_[0] = text.begin();
return TrySearch(prog_->start(), text.begin());
}
// Unanchored search, starting from each possible text position.
// Notice that we have to try the empty string at the end of
// the text, so the loop condition is p <= text.end(), not p < text.end().
// This looks like it's quadratic in the size of the text,
// but we are not clearing visited_ between calls to TrySearch,
// so no work is duplicated and it ends up still being linear.
for (const char* p = text.begin(); p <= text.end(); p++) {
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;
}
return false;
}
// Bit-state search.
bool Prog::SearchBitState(const StringPiece& text,
const StringPiece& context,
Anchor anchor,
MatchKind kind,
StringPiece* match,
int nmatch) {
// If full match, we ask for an anchored longest match
// and then check that match[0] == text.
// So make sure match[0] exists.
StringPiece sp0;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch < 1) {
match = &sp0;
nmatch = 1;
}
}
// Run the search.
BitState b(this);
bool anchored = anchor == kAnchored;
bool longest = kind != kFirstMatch;
if (!b.Search(text, context, anchored, longest, match, nmatch))
return false;
if (kind == kFullMatch && match[0].end() != text.end())
return false;
return true;
}
} // namespace re2
此差异已折叠。
此差异已折叠。
// Copyright 2009 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <string>
#include "util/util.h"
#include "re2/filtered_re2.h"
#include "re2/prefilter.h"
#include "re2/prefilter_tree.h"
namespace re2 {
FilteredRE2::FilteredRE2()
: compiled_(false),
prefilter_tree_(new PrefilterTree()) {
}
FilteredRE2::~FilteredRE2() {
for (int i = 0; i < re2_vec_.size(); i++)
delete re2_vec_[i];
delete prefilter_tree_;
}
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
const RE2::Options& options, int* id) {
RE2* re = new RE2(pattern, options);
RE2::ErrorCode code = re->error_code();
if (!re->ok()) {
if (options.log_errors()) {
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
<< re << " due to error " << re->error();
}
delete re;
} else {
*id = re2_vec_.size();
re2_vec_.push_back(re);
}
return code;
}
void FilteredRE2::Compile(vector<string>* atoms) {
if (compiled_ || re2_vec_.size() == 0) {
LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();
return;
}
for (int i = 0; i < re2_vec_.size(); i++) {
Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
prefilter_tree_->Add(prefilter);
}
atoms->clear();
prefilter_tree_->Compile(atoms);
compiled_ = true;
}
int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
for (int i = 0; i < re2_vec_.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[i]))
return i;
return -1;
}
int FilteredRE2::FirstMatch(const StringPiece& text,
const vector<int>& atoms) const {
if (!compiled_) {
LOG(DFATAL) << "FirstMatch called before Compile";
return -1;
}
vector<int> regexps;
prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
for (int i = 0; i < regexps.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
return regexps[i];
return -1;
}
bool FilteredRE2::AllMatches(
const StringPiece& text,
const vector<int>& atoms,
vector<int>* matching_regexps) const {
matching_regexps->clear();
vector<int> regexps;
prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
for (int i = 0; i < regexps.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
matching_regexps->push_back(regexps[i]);
return !matching_regexps->empty();
}
void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,
vector<int>* passed_regexps) {
prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
}
void FilteredRE2::PrintPrefilter(int regexpid) {
prefilter_tree_->PrintPrefilter(regexpid);
}
} // namespace re2
// Copyright 2009 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
// It provides a prefilter mechanism that helps in cutting down the
// number of regexps that need to be actually searched.
//
// By design, it does not include a string matching engine. This is to
// allow the user of the class to use their favorite string match
// engine. The overall flow is: Add all the regexps using Add, then
// Compile the FilteredRE2. The compile returns strings that need to
// be matched. Note that all returned strings are lowercase. For
// applying regexps to a search text, the caller does the string
// matching using the strings returned. When doing the string match,
// note that the caller has to do that on lower cased version of the
// search text. Then call FirstMatch or AllMatches with a vector of
// indices of strings that were found in the text to get the actual
// regexp matches.
#ifndef RE2_FILTERED_RE2_H_
#define RE2_FILTERED_RE2_H_
#include <vector>
#include "re2/re2.h"
namespace re2 {
using std::vector;
class PrefilterTree;
class FilteredRE2 {
public:
FilteredRE2();
~FilteredRE2();
// Uses RE2 constructor to create a RE2 object (re). Returns
// re->error_code(). If error_code is other than NoError, then re is
// deleted and not added to re2_vec_.
RE2::ErrorCode Add(const StringPiece& pattern,
const RE2::Options& options,
int *id);
// Prepares the regexps added by Add for filtering. Returns a set
// of strings that the caller should check for in candidate texts.
// The returned strings are lowercased. When doing string matching,
// the search text should be lowercased first to find matching
// strings from the set of strings returned by Compile. Call after
// all Add calls are done.
void Compile(vector<string>* strings_to_match);
// Returns the index of the first matching regexp.
// Returns -1 on no match. Can be called prior to Compile.
// Does not do any filtering: simply tries to Match the
// regexps in a loop.
int SlowFirstMatch(const StringPiece& text) const;
// Returns the index of the first matching regexp.
// Returns -1 on no match. Compile has to be called before
// calling this.
int FirstMatch(const StringPiece& text,
const vector<int>& atoms) const;
// Returns the indices of all matching regexps, after first clearing
// matched_regexps.
bool AllMatches(const StringPiece& text,
const vector<int>& atoms,
vector<int>* matching_regexps) const;
// The number of regexps added.
int NumRegexps() const { return re2_vec_.size(); }
private:
// Get the individual RE2 objects. Useful for testing.
RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; }
// Print prefilter.
void PrintPrefilter(int regexpid);
// Useful for testing and debugging.
void RegexpsGivenStrings(const vector<int>& matched_atoms,
vector<int>* passed_regexps);
// All the regexps in the FilteredRE2.
vector<RE2*> re2_vec_;
// Has the FilteredRE2 been compiled using Compile()
bool compiled_;
// An AND-OR tree of string atoms used for filtering regexps.
PrefilterTree* prefilter_tree_;
//DISALLOW_EVIL_CONSTRUCTORS(FilteredRE2);
FilteredRE2(const FilteredRE2&);
void operator=(const FilteredRE2&);
};
} // namespace re2
#endif // RE2_FILTERED_RE2_H_
// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Determine whether this library should match PCRE exactly
// for a particular Regexp. (If so, the testing framework can
// check that it does.)
//
// This library matches PCRE except in these cases:
// * the regexp contains a repetition of an empty string,
// like (a*)* or (a*)+. In this case, PCRE will treat
// the repetition sequence as ending with an empty string,
// while this library does not.
// * Perl and PCRE differ on whether \v matches \n.
// For historical reasons, this library implements the Perl behavior.
// * Perl and PCRE allow $ in one-line mode to match either the very
// end of the text or just before a \n at the end of the text.
// This library requires it to match only the end of the text.
// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to
// match the end of the text if the last character is a \n.
// This library does allow it.
//
// Regexp::MimicsPCRE checks for any of these conditions.
#include "util/util.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
namespace re2 {
// Returns whether re might match an empty string.
static bool CanBeEmptyString(Regexp *re);
// Walker class to compute whether library handles a regexp
// exactly as PCRE would. See comment at top for conditions.
class PCREWalker : public Regexp::Walker<bool> {
public:
PCREWalker() {}
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
int nchild_args);
bool ShortVisit(Regexp* re, bool a) {
// Should never be called: we use Walk not WalkExponential.
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
return a;
}
};
// Called after visiting each of re's children and accumulating
// the return values in child_args. So child_args contains whether
// this library mimics PCRE for those subexpressions.
bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
bool* child_args, int nchild_args) {
// If children failed, so do we.
for (int i = 0; i < nchild_args; i++)
if (!child_args[i])
return false;
// Otherwise look for other reasons to fail.
switch (re->op()) {
// Look for repeated empty string.
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
if (CanBeEmptyString(re->sub()[0]))
return false;
break;
case kRegexpRepeat:
if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))
return false;
break;
// Look for \v
case kRegexpLiteral:
if (re->rune() == '\v')
return false;
break;
// Look for $ in single-line mode.
case kRegexpEndText:
case kRegexpEmptyMatch:
if (re->parse_flags() & Regexp::WasDollar)
return false;
break;
// Look for ^ in multi-line mode.
case kRegexpBeginLine:
// No condition: in single-line mode ^ becomes kRegexpBeginText.
return false;
default:
break;
}
// Not proven guilty.
return true;
}
// Returns whether this regexp's behavior will mimic PCRE's exactly.
bool Regexp::MimicsPCRE() {
PCREWalker w;
return w.Walk(this, true);
}
// Walker class to compute whether a Regexp can match an empty string.
// It is okay to overestimate. For example, \b\B cannot match an empty
// string, because \b and \B are mutually exclusive, but this isn't
// that smart and will say it can. Spurious empty strings
// will reduce the number of regexps we sanity check against PCRE,
// but they won't break anything.
class EmptyStringWalker : public Regexp::Walker<bool> {
public:
EmptyStringWalker() { }
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
bool* child_args, int nchild_args);
bool ShortVisit(Regexp* re, bool a) {
// Should never be called: we use Walk not WalkExponential.
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
return a;
}
private:
DISALLOW_EVIL_CONSTRUCTORS(EmptyStringWalker);
};
// Called after visiting re's children. child_args contains the return
// value from each of the children's PostVisits (i.e., whether each child
// can match an empty string). Returns whether this clause can match an
// empty string.
bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
bool* child_args, int nchild_args) {
switch (re->op()) {
case kRegexpNoMatch: // never empty
case kRegexpLiteral:
case kRegexpAnyChar:
case kRegexpAnyByte:
case kRegexpCharClass:
case kRegexpLiteralString:
return false;
case kRegexpEmptyMatch: // always empty
case kRegexpBeginLine: // always empty, when they match
case kRegexpEndLine:
case kRegexpNoWordBoundary:
case kRegexpWordBoundary:
case kRegexpBeginText:
case kRegexpEndText:
case kRegexpStar: // can always be empty
case kRegexpQuest:
case kRegexpHaveMatch:
return true;
case kRegexpConcat: // can be empty if all children can
for (int i = 0; i < nchild_args; i++)
if (!child_args[i])
return false;
return true;
case kRegexpAlternate: // can be empty if any child can
for (int i = 0; i < nchild_args; i++)
if (child_args[i])
return true;
return false;
case kRegexpPlus: // can be empty if the child can
case kRegexpCapture:
return child_args[0];
case kRegexpRepeat: // can be empty if child can or is x{0}
return child_args[0] || re->min() == 0;
}
return false;
}
// Returns whether re can match an empty string.
static bool CanBeEmptyString(Regexp* re) {
EmptyStringWalker w;
return w.Walk(re, true);
}
} // namespace re2
此差异已折叠。
此差异已折叠。
此差异已折叠。
// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
// make_perl_groups.pl >perl_groups.cc
#include "re2/unicode_groups.h"
namespace re2 {
static const URange16 code1[] = { /* \d */
{ 0x30, 0x39 },
};
static const URange16 code2[] = { /* \s */
{ 0x9, 0xa },
{ 0xc, 0xd },
{ 0x20, 0x20 },
};
static const URange16 code3[] = { /* \w */
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
{ 0x61, 0x7a },
};
const UGroup perl_groups[] = {
{ "\\d", +1, code1, 1 },
{ "\\D", -1, code1, 1 },
{ "\\s", +1, code2, 3 },
{ "\\S", -1, code2, 3 },
{ "\\w", +1, code3, 4 },
{ "\\W", -1, code3, 4 },
};
const int num_perl_groups = 6;
static const URange16 code4[] = { /* [:alnum:] */
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x61, 0x7a },
};
static const URange16 code5[] = { /* [:alpha:] */
{ 0x41, 0x5a },
{ 0x61, 0x7a },
};
static const URange16 code6[] = { /* [:ascii:] */
{ 0x0, 0x7f },
};
static const URange16 code7[] = { /* [:blank:] */
{ 0x9, 0x9 },
{ 0x20, 0x20 },
};
static const URange16 code8[] = { /* [:cntrl:] */
{ 0x0, 0x1f },
{ 0x7f, 0x7f },
};
static const URange16 code9[] = { /* [:digit:] */
{ 0x30, 0x39 },
};
static const URange16 code10[] = { /* [:graph:] */
{ 0x21, 0x7e },
};
static const URange16 code11[] = { /* [:lower:] */
{ 0x61, 0x7a },
};
static const URange16 code12[] = { /* [:print:] */
{ 0x20, 0x7e },
};
static const URange16 code13[] = { /* [:punct:] */
{ 0x21, 0x2f },
{ 0x3a, 0x40 },
{ 0x5b, 0x60 },
{ 0x7b, 0x7e },
};
static const URange16 code14[] = { /* [:space:] */
{ 0x9, 0xd },
{ 0x20, 0x20 },
};
static const URange16 code15[] = { /* [:upper:] */
{ 0x41, 0x5a },
};
static const URange16 code16[] = { /* [:word:] */
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
{ 0x61, 0x7a },
};
static const URange16 code17[] = { /* [:xdigit:] */
{ 0x30, 0x39 },
{ 0x41, 0x46 },
{ 0x61, 0x66 },
};
const UGroup posix_groups[] = {
{ "[:alnum:]", +1, code4, 3 },
{ "[:^alnum:]", -1, code4, 3 },
{ "[:alpha:]", +1, code5, 2 },
{ "[:^alpha:]", -1, code5, 2 },
{ "[:ascii:]", +1, code6, 1 },
{ "[:^ascii:]", -1, code6, 1 },
{ "[:blank:]", +1, code7, 2 },
{ "[:^blank:]", -1, code7, 2 },
{ "[:cntrl:]", +1, code8, 2 },
{ "[:^cntrl:]", -1, code8, 2 },
{ "[:digit:]", +1, code9, 1 },
{ "[:^digit:]", -1, code9, 1 },
{ "[:graph:]", +1, code10, 1 },
{ "[:^graph:]", -1, code10, 1 },
{ "[:lower:]", +1, code11, 1 },
{ "[:^lower:]", -1, code11, 1 },
{ "[:print:]", +1, code12, 1 },
{ "[:^print:]", -1, code12, 1 },
{ "[:punct:]", +1, code13, 4 },
{ "[:^punct:]", -1, code13, 4 },
{ "[:space:]", +1, code14, 2 },
{ "[:^space:]", -1, code14, 2 },
{ "[:upper:]", +1, code15, 1 },
{ "[:^upper:]", -1, code15, 1 },
{ "[:word:]", +1, code16, 4 },
{ "[:^word:]", -1, code16, 4 },
{ "[:xdigit:]", +1, code17, 3 },
{ "[:^xdigit:]", -1, code17, 3 },
};
const int num_posix_groups = 28;
} // namespace re2
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
// Copyright 2009 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef RE2_UTIL_BENCHMARK_H__
#define RE2_UTIL_BENCHMARK_H__
namespace testing {
struct Benchmark {
const char* name;
void (*fn)(int);
void (*fnr)(int, int);
int lo;
int hi;
int threadlo;
int threadhi;
void Register();
Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
};
} // namespace testing
void SetBenchmarkBytesProcessed(long long);
void StopBenchmarkTiming();
void StartBenchmarkTiming();
void BenchmarkMemoryUsage();
void SetBenchmarkItemsProcessed(int);
int NumCPUs();
#define BENCHMARK(f) \
::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
#define BENCHMARK_RANGE(f, lo, hi) \
::testing::Benchmark* _benchmark_##f = \
(new ::testing::Benchmark(#f, f, lo, hi))
#endif // RE2_UTIL_BENCHMARK_H__
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册