diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:36:14 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:36:14 +0900 |
commit | 25519175010718a309b1616cfae68d75cdb1f046 (patch) | |
tree | 04a3edb2dc219d9c1c84f51432f22745ffd42099 | |
parent | de99325ffe35c346d71d37e5f60b67e29313c1f2 (diff) | |
download | re2-25519175010718a309b1616cfae68d75cdb1f046.tar.gz re2-25519175010718a309b1616cfae68d75cdb1f046.tar.bz2 re2-25519175010718a309b1616cfae68d75cdb1f046.zip |
Imported Upstream version 20190401upstream/20190401
55 files changed, 892 insertions, 755 deletions
diff --git a/re2/bitmap256.h b/re2/bitmap256.h index 1abae99..f649b4c 100644 --- a/re2/bitmap256.h +++ b/re2/bitmap256.h @@ -19,6 +19,11 @@ namespace re2 { class Bitmap256 { public: Bitmap256() { + Clear(); + } + + // Clears all of the bits. + void Clear() { memset(words_, 0, sizeof words_); } diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 6e1b44c..6f045b1 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -5,10 +5,10 @@ // Tested by search_test.cc, exhaustive_test.cc, tester.cc // Prog::SearchBitState is a regular expression search with submatch -// tracking for small regular expressions and texts. Like -// testing/backtrack.cc, it allocates a bit vector with (length of -// text) * (length of prog) bits, to make sure it never explores the -// same (character position, instruction) state multiple times. This +// tracking for small regular expressions and texts. Similarly to +// testing/backtrack.cc, it allocates a bitmap with (count of +// lists) * (length of prog) bits to make sure it never explores the +// same (instruction list, character position) multiple times. This // limits the search to run in time linear in the length of the text. // // Unlike testing/backtrack.cc, SearchBitState is not recursive @@ -20,6 +20,7 @@ #include <stddef.h> #include <stdint.h> #include <string.h> +#include <limits> #include <utility> #include "util/logging.h" @@ -31,7 +32,7 @@ namespace re2 { struct Job { int id; - int arg; + int rle; // run length encoding const char* p; }; @@ -47,7 +48,7 @@ class BitState { private: inline bool ShouldVisit(int id, const char* p); - void Push(int id, const char* p, int arg); + void Push(int id, const char* p); void GrowStack(); bool TrySearch(int id, const char* p); @@ -63,7 +64,7 @@ class BitState { // Search state static const int VisitedBits = 32; - PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited + PODArray<uint32_t> visited_; // bitmap: (list ID, char*) pairs visited PODArray<const char*> cap_; // capture registers PODArray<Job> job_; // stack of text positions to explore int njob_; // stack size @@ -79,11 +80,12 @@ BitState::BitState(Prog* prog) njob_(0) { } -// Should the search visit the pair ip, p? +// Given id, which *must* be a list head, we can look up its list ID. +// Then the question is: Should the search visit the (list ID, p) pair? // If so, remember that it was visited so that the next time, // we don't repeat the visit. bool BitState::ShouldVisit(int id, const char* p) { - int n = id * static_cast<int>(text_.size()+1) + + int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) + static_cast<int>(p-text_.begin()); if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1)))) return false; @@ -98,8 +100,8 @@ void BitState::GrowStack() { job_ = std::move(tmp); } -// Push the triple (id, p, arg) onto the stack, growing it if necessary. -void BitState::Push(int id, const char* p, int arg) { +// Push (id, p) onto the stack, growing it if necessary. +void BitState::Push(int id, const char* p) { if (njob_ >= job_.size()) { GrowStack(); if (njob_ >= job_.size()) { @@ -109,93 +111,81 @@ void BitState::Push(int id, const char* p, int arg) { return; } } - int op = prog_->inst(id)->opcode(); - if (op == kInstFail) - return; - - // Only check ShouldVisit when arg == 0. - // When arg > 0, we are continuing a previous visit. - if (arg == 0 && !ShouldVisit(id, p)) - return; - - Job* j = &job_[njob_++]; - j->id = id; - j->p = p; - j->arg = arg; + + // If id < 0, it's undoing a Capture, + // so we mustn't interfere with that. + if (id >= 0 && njob_ > 0) { + Job* top = &job_[njob_-1]; + if (id == top->id && + p == top->p + top->rle + 1 && + top->rle < std::numeric_limits<int>::max()) { + ++top->rle; + return; + } + } + + Job* top = &job_[njob_++]; + top->id = id; + top->rle = 0; + top->p = p; } // Try a search from instruction id0 in state p0. // Return whether it succeeded. bool BitState::TrySearch(int id0, const char* p0) { bool matched = false; - bool inaltmatch = false; const char* end = text_.end(); njob_ = 0; - Push(id0, p0, 0); + // Push() no longer checks ShouldVisit(), + // so we must perform the check ourselves. + if (ShouldVisit(id0, p0)) + Push(id0, p0); while (njob_ > 0) { // Pop job off stack. --njob_; int id = job_[njob_].id; + int& rle = job_[njob_].rle; const char* p = job_[njob_].p; - int arg = job_[njob_].arg; - - // Optimization: rather than push and pop, - // code that is going to Push and continue - // the loop simply updates ip, p, and arg - // and jumps to CheckAndLoop. We have to - // do the ShouldVisit check that Push - // would have, but we avoid the stack - // manipulation. - if (0) { - Next: - // If the Match of a non-greedy AltMatch failed, - // we stop ourselves from trying the ByteRange, - // which would steer us off the short circuit. - if (prog_->inst(id)->last() || inaltmatch) - continue; - id++; - - CheckAndLoop: - if (!ShouldVisit(id, p)) - continue; + + if (id < 0) { + // Undo the Capture. + cap_[prog_->inst(-id)->cap()] = p; + continue; + } + + if (rle > 0) { + p += rle; + // Revivify job on stack. + --rle; + ++njob_; } - // Visit ip, p. + Loop: + // Visit id, p. Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { default: - LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg; + LOG(DFATAL) << "Unexpected opcode: " << ip->opcode(); return false; case kInstFail: - continue; + break; case kInstAltMatch: - switch (arg) { - case 0: - inaltmatch = true; - Push(id, p, 1); // come back when we're done - - // One opcode is ByteRange; the other leads to Match - // (possibly via Nop or Capture). - if (ip->greedy(prog_)) { - // out1 is the match - Push(ip->out1(), p, 0); - id = ip->out1(); - p = end; - goto CheckAndLoop; - } - // out is the match - non-greedy - Push(ip->out(), end, 0); - id = ip->out(); - goto CheckAndLoop; - - case 1: - inaltmatch = false; - continue; + if (ip->greedy(prog_)) { + // out1 is the Match instruction. + id = ip->out1(); + p = end; + goto Loop; } - LOG(DFATAL) << "Bad arg in kInstAltMatch: " << arg; - continue; + if (longest_) { + // ip must be non-greedy... + // out is the Match instruction. + id = ip->out(); + p = end; + goto Loop; + } + goto Next; case kInstByteRange: { int c = -1; @@ -204,54 +194,50 @@ bool BitState::TrySearch(int id0, const char* p0) { if (!ip->Matches(c)) goto Next; - if (!ip->last()) - Push(id+1, p, 0); // try the next when we're done + if (ip->hint() != 0) + Push(id+ip->hint(), p); // try the next when we're done id = ip->out(); p++; goto CheckAndLoop; } case kInstCapture: - switch (arg) { - case 0: - if (!ip->last()) - Push(id+1, p, 0); // try the next when we're done - - if (0 <= ip->cap() && ip->cap() < cap_.size()) { - // Capture p to register, but save old value. - Push(id, cap_[ip->cap()], 1); // come back when we're done - cap_[ip->cap()] = p; - } - - // Continue on. - id = ip->out(); - goto CheckAndLoop; - - case 1: - // Finished ip->out(); restore the old value. - cap_[ip->cap()] = p; - continue; + if (!ip->last()) + Push(id+1, p); // try the next when we're done + + if (0 <= ip->cap() && ip->cap() < cap_.size()) { + // Capture p to register, but save old value first. + Push(-id, cap_[ip->cap()]); // undo when we're done + cap_[ip->cap()] = p; } - LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; - continue; + + id = ip->out(); + goto CheckAndLoop; case kInstEmptyWidth: if (ip->empty() & ~Prog::EmptyFlags(context_, p)) goto Next; if (!ip->last()) - Push(id+1, p, 0); // try the next when we're done + Push(id+1, p); // try the next when we're done id = ip->out(); goto CheckAndLoop; case kInstNop: if (!ip->last()) - Push(id+1, p, 0); // try the next when we're done + Push(id+1, p); // try the next when we're done id = ip->out(); - goto CheckAndLoop; + + CheckAndLoop: + // Sanity check: id is the head of its list, which must + // be the case if id-1 is the last of *its* list. :) + DCHECK(id == 0 || prog_->inst(id-1)->last()); + if (ShouldVisit(id, p)) + goto Loop; + break; case kInstMatch: { - if (endmatch_ && p != text_.end()) + if (endmatch_ && p != end) goto Next; // We found a match. If the caller doesn't care @@ -277,11 +263,18 @@ bool BitState::TrySearch(int id0, const char* p0) { return true; // If we used the entire text, no longer match is possible. - if (p == text_.end()) + if (p == end) return true; // Otherwise, continue on in hope of a longer match. - goto Next; + // Note the absence of the ShouldVisit() check here + // due to execution remaining in the same list. + Next: + if (!ip->last()) { + id++; + goto Loop; + } + break; } } } @@ -310,7 +303,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, submatch_[i] = StringPiece(); // Allocate scratch space. - int nvisited = prog_->size() * static_cast<int>(text.size()+1); + int nvisited = prog_->list_count() * static_cast<int>(text.size()+1); nvisited = (nvisited + VisitedBits-1) / VisitedBits; visited_ = PODArray<uint32_t>(nvisited); memset(visited_.data(), 0, nvisited*sizeof visited_[0]); @@ -321,8 +314,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, cap_ = PODArray<const char*>(ncap); memset(cap_.data(), 0, ncap*sizeof cap_[0]); - // When sizeof(Job) == 16, we start with a nice round 4KiB. :) - job_ = PODArray<Job>(256); + // When sizeof(Job) == 16, we start with a nice round 1KiB. :) + job_ = PODArray<Job>(64); // Anchored search must start at text.begin(). if (anchored_) { diff --git a/re2/compile.cc b/re2/compile.cc index 3f8e0cc..ab18cef 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -1202,7 +1202,10 @@ Prog* Compiler::Finish() { if (max_mem_ <= 0) { prog_->set_dfa_mem(1<<20); } else { - int64_t m = max_mem_ - sizeof(Prog) - prog_->size_*sizeof(Prog::Inst); + int64_t m = max_mem_ - sizeof(Prog); + m -= prog_->size_*sizeof(Prog::Inst); // account for inst_ + if (prog_->CanBitState()) + m -= prog_->size_*sizeof(uint16_t); // account for list_heads_ if (m < 0) m = 0; prog_->set_dfa_mem(m); @@ -106,7 +106,7 @@ class DFA { // Computes min and max for matching strings. Won't return strings // bigger than maxlen. - bool PossibleMatchRange(string* min, string* max, int maxlen); + bool PossibleMatchRange(std::string* min, std::string* max, int maxlen); // These data structures are logically private, but C++ makes it too // difficult to mark them as such. @@ -241,10 +241,10 @@ class DFA { void AddToQueue(Workq* q, int id, uint32_t flag); // For debugging, returns a text representation of State. - static string DumpState(State* state); + static std::string DumpState(State* state); // For debugging, returns a text representation of a Workq. - static string DumpWorkq(Workq* q); + static std::string DumpWorkq(Workq* q); // Search parameters struct SearchParams { @@ -505,8 +505,8 @@ DFA::~DFA() { // Debugging printouts // For debugging, returns a string representation of the work queue. -string DFA::DumpWorkq(Workq* q) { - string s; +std::string DFA::DumpWorkq(Workq* q) { + std::string s; const char* sep = ""; for (Workq::iterator it = q->begin(); it != q->end(); ++it) { if (q->is_mark(*it)) { @@ -521,14 +521,14 @@ string DFA::DumpWorkq(Workq* q) { } // For debugging, returns a string representation of the state. -string DFA::DumpState(State* state) { +std::string DFA::DumpState(State* state) { if (state == NULL) return "_"; if (state == DeadState) return "X"; if (state == FullMatchState) return "*"; - string s; + std::string s; const char* sep = ""; StringAppendF(&s, "(%p)", state); for (int i = 0; i < state->ninst_; i++) { @@ -1769,7 +1769,7 @@ bool DFA::Search(const StringPiece& text, if (ExtraDebug) { fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", - string(text).c_str(), anchored, want_earliest_match, + std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_); } @@ -1995,7 +1995,7 @@ void Prog::TEST_dfa_should_bail_when_slow(bool b) { // Computes min and max for matching string. // Won't return strings bigger than maxlen. -bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { +bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { if (!ok()) return false; @@ -2132,7 +2132,7 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { } // PossibleMatchRange for a Prog. -bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) { +bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // Have to use dfa_longest_ to get all strings for full matches. // For example, (a|aa) never matches aa in first-match mode. return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen); diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index 12f638a..e5d8de5 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -49,7 +49,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, return code; } -void FilteredRE2::Compile(std::vector<string>* atoms) { +void FilteredRE2::Compile(std::vector<std::string>* atoms) { if (compiled_) { LOG(ERROR) << "Compile called already."; return; diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h index b1317cc..965b5c9 100644 --- a/re2/filtered_re2.h +++ b/re2/filtered_re2.h @@ -49,7 +49,7 @@ class FilteredRE2 { // the search text should be lowercased first to find matching // strings from the set of strings returned by Compile. Call after // all Add calls are done. - void Compile(std::vector<string>* strings_to_match); + void Compile(std::vector<std::string>* strings_to_match); // Returns the index of the first matching regexp. // Returns -1 on no match. Can be called prior to Compile. diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 5e5d324..2faebe0 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -13,7 +13,6 @@ #include "re2/re2.h" using re2::StringPiece; -using std::string; // NOT static, NOT signed. uint8_t dummy = 0; @@ -87,12 +86,12 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { RE2::FindAndConsume(&sp, re, &d); } - string s = string(text); + std::string s = std::string(text); RE2::Replace(&s, re, ""); - s = string(text); // Reset. + s = std::string(text); // Reset. RE2::GlobalReplace(&s, re, ""); - string min, max; + std::string min, max; re.PossibleMatchRange(&min, &max, /*maxlen=*/9); // Exercise some other API functionality. @@ -105,7 +105,7 @@ class NFA { const char* p); // Returns text version of capture information, for debugging. - string FormatCapture(const char** capture); + std::string FormatCapture(const char** capture); inline void CopyCapture(const char** dst, const char** src); @@ -288,15 +288,25 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, case kInstByteRange: if (!ip->Matches(c)) goto Next; - FALLTHROUGH_INTENDED; - case kInstMatch: // Save state; will pick up at next byte. t = Incref(t0); *tp = t; if (ExtraDebug) fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); + if (ip->hint() == 0) + break; + a = {id+ip->hint(), NULL}; + goto Loop; + + case kInstMatch: + // Save state; will pick up at next byte. + t = Incref(t0); + *tp = t; + if (ExtraDebug) + fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str()); + Next: if (ip->last()) break; @@ -415,9 +425,8 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, return 0; } -string NFA::FormatCapture(const char** capture) { - string s; - +std::string NFA::FormatCapture(const char** capture) { + std::string s; for (int i = 0; i < ncapture_; i+=2) { if (capture[i] == NULL) StringAppendF(&s, "(?,?)"); @@ -482,7 +491,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (ExtraDebug) fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", - string(text).c_str(), string(context).c_str(), anchored, longest); + std::string(text).c_str(), std::string(context).c_str(), anchored, + longest); // Set up search. Threadq* runq = &q0_; diff --git a/re2/onepass.cc b/re2/onepass.cc index edd2c48..e04c56d 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -590,7 +590,7 @@ bool Prog::IsOnePass() { if (nodebyid[i] != -1) idmap[nodebyid[i]] = i; - string dump; + std::string dump; for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { int id = *it; int nodeindex = nodebyid[id]; diff --git a/re2/parse.cc b/re2/parse.cc index c8dea7e..f0a1387 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -610,7 +610,7 @@ bool Regexp::ParseState::DoLeftParen(const StringPiece& name) { Regexp* re = new Regexp(kLeftParen, flags_); re->cap_ = ++ncap_; if (name.data() != NULL) - re->name_ = new string(name); + re->name_ = new std::string(name); return PushRegexp(re); } @@ -1790,7 +1790,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, // Look up the group in the ICU Unicode data. Because ICU provides full // Unicode properties support, this could be more than a lookup by name. ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8( - string("\\p{") + string(name) + string("}")); + std::string("\\p{") + std::string(name) + std::string("}")); UErrorCode uerr = U_ZERO_ERROR; ::icu::UnicodeSet uset(ustr, uerr); if (U_FAILURE(uerr)) { @@ -2181,7 +2181,7 @@ BadPerlOp: // into UTF8 encoding in string. // Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is // deprecated and because it rejects code points 0x80-0x9F. -void ConvertLatin1ToUTF8(const StringPiece& latin1, string* utf) { +void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) { char buf[UTFmax]; utf->clear(); @@ -2208,7 +2208,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // Convert regexp to UTF-8 (easier on the rest of the parser). if (global_flags & Latin1) { - string* tmp = new string; + std::string* tmp = new std::string; ConvertLatin1ToUTF8(t, tmp); status->set_tmp(tmp); t = *tmp; diff --git a/re2/prefilter.cc b/re2/prefilter.cc index b657357..4d6df8d 100644 --- a/re2/prefilter.cc +++ b/re2/prefilter.cc @@ -21,8 +21,8 @@ namespace re2 { static const bool ExtraDebug = false; -typedef std::set<string>::iterator SSIter; -typedef std::set<string>::const_iterator ConstSSIter; +typedef std::set<std::string>::iterator SSIter; +typedef std::set<std::string>::const_iterator ConstSSIter; // Initializes a Prefilter, allocating subs_ as necessary. Prefilter::Prefilter(Op op) { @@ -140,7 +140,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { return AndOr(OR, a, b); } -static void SimplifyStringSet(std::set<string> *ss) { +static void SimplifyStringSet(std::set<std::string> *ss) { // Now make sure that the strings aren't redundant. For example, if // we know "ab" is a required string, then it doesn't help at all to // know that "abc" is also a required string, so delete "abc". This @@ -155,13 +155,13 @@ static void SimplifyStringSet(std::set<string> *ss) { // Increment j early so that we can erase the element it points to. SSIter old_j = j; ++j; - if (old_j->find(*i) != string::npos) + if (old_j->find(*i) != std::string::npos) ss->erase(old_j); } } } -Prefilter* Prefilter::OrStrings(std::set<string>* ss) { +Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) { SimplifyStringSet(ss); Prefilter* or_prefilter = NULL; if (!ss->empty()) { @@ -191,7 +191,7 @@ static Rune ToLowerRuneLatin1(Rune r) { return r; } -Prefilter* Prefilter::FromString(const string& str) { +Prefilter* Prefilter::FromString(const std::string& str) { Prefilter* m = new Prefilter(Prefilter::ATOM); m->atom_ = str; return m; @@ -221,19 +221,19 @@ class Prefilter::Info { static Info* AnyMatch(); // Format Info as a string. - string ToString(); + std::string ToString(); // Caller takes ownership of the Prefilter. Prefilter* TakeMatch(); - std::set<string>& exact() { return exact_; } + std::set<std::string>& exact() { return exact_; } bool is_exact() const { return is_exact_; } class Walker; private: - std::set<string> exact_; + std::set<std::string> exact_; // When is_exact_ is true, the strings that match // are placed in exact_. When it is no longer an exact @@ -268,13 +268,11 @@ Prefilter* Prefilter::Info::TakeMatch() { } // Format a Info in string form. -string Prefilter::Info::ToString() { +std::string Prefilter::Info::ToString() { if (is_exact_) { int n = 0; - string s; - for (std::set<string>::iterator i = exact_.begin(); - i != exact_.end(); - ++i) { + std::string s; + for (SSIter i = exact_.begin(); i != exact_.end(); ++i) { if (n++ > 0) s += ","; s += *i; @@ -289,17 +287,17 @@ string Prefilter::Info::ToString() { } // Add the strings from src to dst. -static void CopyIn(const std::set<string>& src, - std::set<string>* dst) { +static void CopyIn(const std::set<std::string>& src, + std::set<std::string>* dst) { for (ConstSSIter i = src.begin(); i != src.end(); ++i) dst->insert(*i); } // Add the cross-product of a and b to dst. // (For each string i in a and j in b, add i+j.) -static void CrossProduct(const std::set<string>& a, - const std::set<string>& b, - std::set<string>* dst) { +static void CrossProduct(const std::set<std::string>& a, + const std::set<std::string>& b, + std::set<std::string>* dst) { for (ConstSSIter i = a.begin(); i != a.end(); ++i) for (ConstSSIter j = b.begin(); j != b.end(); ++j) dst->insert(*i + *j); @@ -390,15 +388,15 @@ Prefilter::Info* Prefilter::Info::Plus(Info *a) { return ab; } -static string RuneToString(Rune r) { +static std::string RuneToString(Rune r) { char buf[UTFmax]; int n = runetochar(buf, &r); - return string(buf, n); + return std::string(buf, n); } -static string RuneToStringLatin1(Rune r) { +static std::string RuneToStringLatin1(Rune r) { char c = r & 0xff; - return string(&c, 1); + return std::string(&c, 1); } // Constructs Info for literal rune. @@ -662,7 +660,7 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) { return m; } -string Prefilter::DebugString() const { +std::string Prefilter::DebugString() const { switch (op_) { default: LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; @@ -674,7 +672,7 @@ string Prefilter::DebugString() const { case ALL: return ""; case AND: { - string s = ""; + std::string s = ""; for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += " "; @@ -684,7 +682,7 @@ string Prefilter::DebugString() const { return s; } case OR: { - string s = "("; + std::string s = "("; for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += "|"; diff --git a/re2/prefilter.h b/re2/prefilter.h index ead09e1..4fedeb4 100644 --- a/re2/prefilter.h +++ b/re2/prefilter.h @@ -37,7 +37,7 @@ class Prefilter { ~Prefilter(); Op op() { return op_; } - const string& atom() const { return atom_; } + const std::string& atom() const { return atom_; } void set_unique_id(int id) { unique_id_ = id; } int unique_id() const { return unique_id_; } @@ -57,7 +57,7 @@ class Prefilter { static Prefilter* FromRE2(const RE2* re2); // Returns a readable debug string of the prefilter. - string DebugString() const; + std::string DebugString() const; private: class Info; @@ -75,9 +75,9 @@ class Prefilter { static Prefilter* FromRegexp(Regexp* a); - static Prefilter* FromString(const string& str); + static Prefilter* FromString(const std::string& str); - static Prefilter* OrStrings(std::set<string>* ss); + static Prefilter* OrStrings(std::set<std::string>* ss); static Info* BuildInfo(Regexp* re); @@ -90,7 +90,7 @@ class Prefilter { std::vector<Prefilter*>* subs_; // Actual string to match in leaf node. - string atom_; + std::string atom_; // If different prefilters have the same string atom, or if they are // structurally the same (e.g., OR of same atom strings) they are diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc index a07de40..187e2ec 100644 --- a/re2/prefilter_tree.cc +++ b/re2/prefilter_tree.cc @@ -54,7 +54,7 @@ void PrefilterTree::Add(Prefilter* prefilter) { prefilter_vec_.push_back(prefilter); } -void PrefilterTree::Compile(std::vector<string>* atom_vec) { +void PrefilterTree::Compile(std::vector<std::string>* atom_vec) { if (compiled_) { LOG(DFATAL) << "Compile called already."; return; @@ -106,16 +106,16 @@ void PrefilterTree::Compile(std::vector<string>* atom_vec) { } Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) { - string node_string = NodeString(node); - std::map<string, Prefilter*>::iterator iter = nodes->find(node_string); + std::string node_string = NodeString(node); + std::map<std::string, Prefilter*>::iterator iter = nodes->find(node_string); if (iter == nodes->end()) return NULL; return (*iter).second; } -string PrefilterTree::NodeString(Prefilter* node) const { +std::string PrefilterTree::NodeString(Prefilter* node) const { // Adding the operation disambiguates AND/OR/atom nodes. - string s = StringPrintf("%d", node->op()) + ":"; + std::string s = StringPrintf("%d", node->op()) + ":"; if (node->op() == Prefilter::ATOM) { s += node->atom(); } else { @@ -166,7 +166,7 @@ bool PrefilterTree::KeepNode(Prefilter* node) const { } void PrefilterTree::AssignUniqueIds(NodeMap* nodes, - std::vector<string>* atom_vec) { + std::vector<std::string>* atom_vec) { atom_vec->clear(); // Build vector of all filter nodes, sorted topologically @@ -377,15 +377,14 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { LOG(ERROR) << it->first; } LOG(ERROR) << "Map:"; - for (std::map<string, Prefilter*>::const_iterator iter = nodes->begin(); + for (std::map<std::string, Prefilter*>::const_iterator iter = nodes->begin(); iter != nodes->end(); ++iter) LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() << " Str: " << (*iter).first; } -string PrefilterTree::DebugNodeString(Prefilter* node) const { - string node_string = ""; - +std::string PrefilterTree::DebugNodeString(Prefilter* node) const { + std::string node_string = ""; if (node->op() == Prefilter::ATOM) { DCHECK(!node->atom().empty()); node_string += node->atom(); diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h index f81e134..10d6f7c 100644 --- a/re2/prefilter_tree.h +++ b/re2/prefilter_tree.h @@ -43,7 +43,7 @@ class PrefilterTree { // The caller should use the returned set of strings to do string matching. // Each time a string matches, the corresponding index then has to be // and passed to RegexpsGivenStrings below. - void Compile(std::vector<string>* atom_vec); + void Compile(std::vector<std::string>* atom_vec); // Given the indices of the atoms that matched, returns the indexes // of regexps that should be searched. The matched_atoms should @@ -60,7 +60,7 @@ class PrefilterTree { private: typedef SparseArray<int> IntMap; typedef std::map<int, int> StdIntMap; - typedef std::map<string, Prefilter*> NodeMap; + typedef std::map<std::string, Prefilter*> NodeMap; // Each unique node has a corresponding Entry that helps in // passing the matching trigger information along the tree. @@ -90,7 +90,7 @@ class PrefilterTree { // This function assigns unique ids to various parts of the // prefilter, by looking at if these nodes are already in the // PrefilterTree. - void AssignUniqueIds(NodeMap* nodes, std::vector<string>* atom_vec); + void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec); // Given the matching atoms, find the regexps to be triggered. void PropagateMatch(const std::vector<int>& atom_ids, @@ -102,10 +102,10 @@ class PrefilterTree { // A string that uniquely identifies the node. Assumes that the // children of node has already been assigned unique ids. - string NodeString(Prefilter* node) const; + std::string NodeString(Prefilter* node) const; // Recursively constructs a readable prefilter string. - string DebugNodeString(Prefilter* node) const; + std::string DebugNodeString(Prefilter* node) const; // Used for debugging. void PrintDebugInfo(NodeMap* nodes); diff --git a/re2/prog.cc b/re2/prog.cc index 9729aa4..9853d6d 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -34,7 +34,7 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { set_out_opcode(out, kInstByteRange); lo_ = lo & 0xFF; hi_ = hi & 0xFF; - foldcase_ = foldcase & 0xFF; + hint_foldcase_ = foldcase&1; } void Prog::Inst::InitCapture(int cap, uint32_t out) { @@ -65,7 +65,7 @@ void Prog::Inst::InitFail() { set_opcode(kInstFail); } -string Prog::Inst::Dump() { +std::string Prog::Inst::Dump() { switch (opcode()) { default: return StringPrintf("opcode %d", static_cast<int>(opcode())); @@ -77,9 +77,9 @@ string Prog::Inst::Dump() { return StringPrintf("altmatch -> %d | %d", out(), out1_); case kInstByteRange: - return StringPrintf("byte%s [%02x-%02x] -> %d", - foldcase_ ? "/i" : "", - lo_, hi_, out()); + return StringPrintf("byte%s [%02x-%02x] %d -> %d", + foldcase() ? "/i" : "", + lo_, hi_, hint(), out()); case kInstCapture: return StringPrintf("capture %d -> %d", cap_, out()); @@ -129,8 +129,8 @@ static inline void AddToQueue(Workq* q, int id) { q->insert(id); } -static string ProgToString(Prog* prog, Workq* q) { - string s; +static std::string ProgToString(Prog* prog, Workq* q) { + std::string s; for (Workq::iterator i = q->begin(); i != q->end(); ++i) { int id = *i; Prog::Inst* ip = prog->inst(id); @@ -142,8 +142,8 @@ static string ProgToString(Prog* prog, Workq* q) { return s; } -static string FlattenedProgToString(Prog* prog, int start) { - string s; +static std::string FlattenedProgToString(Prog* prog, int start) { + std::string s; for (int id = start; id < prog->size(); id++) { Prog::Inst* ip = prog->inst(id); if (ip->last()) @@ -154,7 +154,7 @@ static string FlattenedProgToString(Prog* prog, int start) { return s; } -string Prog::Dump() { +std::string Prog::Dump() { if (did_flatten_) return FlattenedProgToString(this, start_); @@ -163,7 +163,7 @@ string Prog::Dump() { return ProgToString(this, &q); } -string Prog::DumpUnanchored() { +std::string Prog::DumpUnanchored() { if (did_flatten_) return FlattenedProgToString(this, start_unanchored_); @@ -172,8 +172,8 @@ string Prog::DumpUnanchored() { return ProgToString(this, &q); } -string Prog::DumpByteMap() { - string map; +std::string Prog::DumpByteMap() { + std::string map; for (int c = 0; c < 256; c++) { int b = bytemap_[c]; int lo = c; @@ -341,7 +341,6 @@ class ByteMapBuilder { // This will avoid problems during the second phase, // in which we assign byte classes numbered from 0. splits_.Set(255); - colors_.resize(256); colors_[255] = 256; nextcolor_ = 257; } @@ -354,7 +353,7 @@ class ByteMapBuilder { int Recolor(int oldcolor); Bitmap256 splits_; - std::vector<int> colors_; + int colors_[256]; int nextcolor_; std::vector<std::pair<int, int>> colormap_; std::vector<std::pair<int, int>> ranges_; @@ -468,8 +467,11 @@ void Prog::ComputeByteMap() { foldlo = 'a'; if (foldhi > 'z') foldhi = 'z'; - if (foldlo <= foldhi) - builder.Mark(foldlo + 'A' - 'a', foldhi + 'A' - 'a'); + if (foldlo <= foldhi) { + foldlo += 'A' - 'a'; + foldhi += 'A' - 'a'; + builder.Mark(foldlo, foldhi); + } } // If this Inst is not the last Inst in its list AND the next Inst is // also a ByteRange AND the Insts have the same out, defer the merge. @@ -591,6 +593,9 @@ void Prog::Flatten() { flatmap[i->value()] = static_cast<int>(flat.size()); EmitList(i->index(), &rootmap, &flat, &reachable, &stk); flat.back().set_last(); + // We have the bounds of the "list", so this is the + // most convenient point at which to compute hints. + ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size())); } list_count_ = static_cast<int>(flatmap.size()); @@ -625,7 +630,17 @@ void Prog::Flatten() { // Finally, replace the old instructions with the new instructions. size_ = static_cast<int>(flat.size()); inst_ = PODArray<Inst>(size_); - memmove(inst_.data(), flat.data(), size_*sizeof(inst_[0])); + memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]); + + // Populate the list heads for BitState. + // 512 instructions limits the memory footprint to 1KiB. + if (size_ <= 512) { + list_heads_ = PODArray<uint16_t>(size_); + // 0xFF makes it more obvious if we try to look up a non-head. + memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]); + for (int i = 0; i < list_count_; ++i) + list_heads_[flatmap[i]] = i; + } } void Prog::MarkSuccessors(SparseArray<int>* rootmap, @@ -818,4 +833,89 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap, } } +// For each ByteRange instruction in [begin, end), computes a hint to execution +// engines: the delta to the next instruction (in flat) worth exploring iff the +// current instruction matched. +// +// Implements a coloring algorithm related to ByteMapBuilder, but in this case, +// colors are instructions and recoloring ranges precisely identifies conflicts +// between instructions. Iterating backwards over [begin, end) is guaranteed to +// identify the nearest conflict (if any) with only linear complexity. +void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) { + Bitmap256 splits; + int colors[256]; + + bool dirty = false; + for (int id = end; id >= begin; --id) { + if (id == end || + (*flat)[id].opcode() != kInstByteRange) { + if (dirty) { + dirty = false; + splits.Clear(); + } + splits.Set(255); + colors[255] = id; + // At this point, the [0-255] range is colored with id. + // Thus, hints cannot point beyond id; and if id == end, + // hints that would have pointed to id will be 0 instead. + continue; + } + dirty = true; + + // We recolor the [lo-hi] range with id. Note that first ratchets backwards + // from end to the nearest conflict (if any) during recoloring. + int first = end; + auto Recolor = [&](int lo, int hi) { + // Like ByteMapBuilder, we split at lo-1 and at hi. + --lo; + + if (0 <= lo && !splits.Test(lo)) { + splits.Set(lo); + int next = splits.FindNextSetBit(lo+1); + colors[lo] = colors[next]; + } + if (!splits.Test(hi)) { + splits.Set(hi); + int next = splits.FindNextSetBit(hi+1); + colors[hi] = colors[next]; + } + + int c = lo+1; + while (c < 256) { + int next = splits.FindNextSetBit(c); + // Ratchet backwards... + first = std::min(first, colors[next]); + // Recolor with id - because it's the new nearest conflict! + colors[next] = id; + if (next == hi) + break; + c = next+1; + } + }; + + Inst* ip = &(*flat)[id]; + int lo = ip->lo(); + int hi = ip->hi(); + Recolor(lo, hi); + if (ip->foldcase() && lo <= 'z' && hi >= 'a') { + int foldlo = lo; + int foldhi = hi; + if (foldlo < 'a') + foldlo = 'a'; + if (foldhi > 'z') + foldhi = 'z'; + if (foldlo <= foldhi) { + foldlo += 'A' - 'a'; + foldhi += 'A' - 'a'; + Recolor(foldlo, foldhi); + } + } + + if (first != end) { + uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767)); + ip->hint_foldcase_ |= hint<<1; + } + } +} + } // namespace re2 @@ -86,7 +86,8 @@ class Prog { int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; } int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; } int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; } - int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; } + int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; } + int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; } int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; } EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; } @@ -100,13 +101,13 @@ class Prog { // Does this inst (an kInstByteRange) match c? inline bool Matches(int c) { DCHECK_EQ(opcode(), kInstByteRange); - if (foldcase_ && 'A' <= c && c <= 'Z') + if (foldcase() && 'A' <= c && c <= 'Z') c += 'a' - 'A'; return lo_ <= c && c <= hi_; } // Returns string representation for debugging. - string Dump(); + std::string Dump(); // Maximum instruction id. // (Must fit in out_opcode_. PatchList/last steal another bit.) @@ -129,25 +130,31 @@ class Prog { out_opcode_ = (out<<4) | (last()<<3) | opcode; } - uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode - union { // additional instruction arguments: - uint32_t out1_; // opcode == kInstAlt - // alternate next instruction - - int32_t cap_; // opcode == kInstCapture - // Index of capture register (holds text - // position recorded by capturing parentheses). - // For \n (the submatch for the nth parentheses), - // the left parenthesis captures into register 2*n - // and the right one captures into register 2*n+1. - - int32_t match_id_; // opcode == kInstMatch - // Match ID to identify this match (for re2::Set). - - struct { // opcode == kInstByteRange - uint8_t lo_; // byte range is lo_-hi_ inclusive - uint8_t hi_; // - uint8_t foldcase_; // convert A-Z to a-z before checking range. + uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode + union { // additional instruction arguments: + uint32_t out1_; // opcode == kInstAlt + // alternate next instruction + + int32_t cap_; // opcode == kInstCapture + // Index of capture register (holds text + // position recorded by capturing parentheses). + // For \n (the submatch for the nth parentheses), + // the left parenthesis captures into register 2*n + // and the right one captures into register 2*n+1. + + int32_t match_id_; // opcode == kInstMatch + // Match ID to identify this match (for re2::Set). + + struct { // opcode == kInstByteRange + uint8_t lo_; // byte range is lo_-hi_ inclusive + uint8_t hi_; // + uint16_t hint_foldcase_; // 15 bits: hint, 1 (low) bit: foldcase + // hint to execution engines: the delta to the + // next instruction (in the current list) worth + // exploring iff this instruction matched; 0 + // means there are no remaining possibilities, + // which is most likely for character classes. + // foldcase: A-Z -> a-z before checking range. }; EmptyOp empty_; // opcode == kInstEmptyWidth @@ -199,6 +206,7 @@ class Prog { void set_reversed(bool reversed) { reversed_ = reversed; } int list_count() { return list_count_; } int inst_count(InstOp op) { return inst_count_[op]; } + uint16_t* list_heads() { return list_heads_.data(); } void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; } int64_t dfa_mem() { return dfa_mem_; } int flags() { return flags_; } @@ -214,9 +222,9 @@ class Prog { int first_byte(); // Returns string representation of program for debugging. - string Dump(); - string DumpUnanchored(); - string DumpByteMap(); + std::string Dump(); + std::string DumpUnanchored(); + std::string DumpByteMap(); // Returns the set of kEmpty flags that are in effect at // position p within context. @@ -305,7 +313,8 @@ class Prog { StringPiece* match, int nmatch); // Bit-state backtracking. Fast on small cases but uses memory - // proportional to the product of the program size and the text size. + // proportional to the product of the list count and the text size. + bool CanBitState() { return list_heads_.data() != NULL; } bool SearchBitState(const StringPiece& text, const StringPiece& context, Anchor anchor, MatchKind kind, StringPiece* match, int nmatch); @@ -337,7 +346,7 @@ class Prog { // do not compile down to infinite repetitions. // // Returns true on success, false on error. - bool PossibleMatchRange(string* min, string* max, int maxlen); + bool PossibleMatchRange(std::string* min, std::string* max, int maxlen); // EXPERIMENTAL! SUBJECT TO CHANGE! // Outputs the program fanout into the given sparse array. @@ -374,6 +383,9 @@ class Prog { std::vector<Inst>* flat, SparseSet* reachable, std::vector<int>* stk); + // Computes hints for ByteRange instructions in [begin, end). + void ComputeHints(std::vector<Inst>* flat, int begin, int end); + private: friend class Compiler; @@ -393,10 +405,12 @@ class Prog { int first_byte_; // required first byte for match, or -1 if none int flags_; // regexp parse flags - int list_count_; // count of lists (see above) - int inst_count_[kNumInst]; // count of instructions by opcode + int list_count_; // count of lists (see above) + int inst_count_[kNumInst]; // count of instructions by opcode + PODArray<uint16_t> list_heads_; // sparse array enumerating list heads + // not populated if size_ is overly large - PODArray<Inst> inst_; // pointer to instruction array + PODArray<Inst> inst_; // pointer to instruction array PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes int64_t dfa_mem_; // Maximum memory for DFAs. @@ -56,9 +56,9 @@ RE2::Options::Options(RE2::CannedOptions opt) // static empty objects for use as const references. // To avoid global constructors, allocated in RE2::Init(). -static const string* empty_string; -static const std::map<string, int>* empty_named_groups; -static const std::map<int, string>* empty_group_names; +static const std::string* empty_string; +static const std::map<std::string, int>* empty_named_groups; +static const std::map<int, std::string>* empty_group_names; // Converts from Regexp error code to RE2 error code. // Maybe some day they will diverge. In any event, this @@ -97,10 +97,10 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { return RE2::ErrorInternal; } -static string trunc(const StringPiece& pattern) { +static std::string trunc(const StringPiece& pattern) { if (pattern.size() < 100) - return string(pattern); - return string(pattern.substr(0, 100)) + "..."; + return std::string(pattern); + return std::string(pattern.substr(0, 100)) + "..."; } @@ -108,7 +108,7 @@ RE2::RE2(const char* pattern) { Init(pattern, DefaultOptions); } -RE2::RE2(const string& pattern) { +RE2::RE2(const std::string& pattern) { Init(pattern, DefaultOptions); } @@ -167,12 +167,12 @@ int RE2::Options::ParseFlags() const { void RE2::Init(const StringPiece& pattern, const Options& options) { static std::once_flag empty_once; std::call_once(empty_once, []() { - empty_string = new string; - empty_named_groups = new std::map<string, int>; - empty_group_names = new std::map<int, string>; + empty_string = new std::string; + empty_named_groups = new std::map<std::string, int>; + empty_group_names = new std::map<int, std::string>; }); - pattern_ = string(pattern); + pattern_ = std::string(pattern); options_.Copy(options); entire_regexp_ = NULL; suffix_regexp_ = NULL; @@ -194,9 +194,9 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " << status.Text(); } - error_ = new string(status.Text()); + error_ = new std::string(status.Text()); error_code_ = RegexpErrorToRE2(status.code()); - error_arg_ = string(status.error_arg()); + error_arg_ = std::string(status.error_arg()); return; } @@ -213,7 +213,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { if (prog_ == NULL) { if (options_.log_errors()) LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; - error_ = new string("pattern too large - compile failed"); + error_ = new std::string("pattern too large - compile failed"); error_code_ = RE2::ErrorPatternTooLarge; return; } @@ -239,7 +239,8 @@ re2::Prog* RE2::ReverseProg() const { if (re->rprog_ == NULL) { if (re->options_.log_errors()) LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'"; - re->error_ = new string("pattern too large - reverse compile failed"); + re->error_ = + new std::string("pattern too large - reverse compile failed"); re->error_code_ = RE2::ErrorPatternTooLarge; } }, this); @@ -307,7 +308,7 @@ int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const { } // Returns named_groups_, computing it if needed. -const std::map<string, int>& RE2::NamedCapturingGroups() const { +const std::map<std::string, int>& RE2::NamedCapturingGroups() const { std::call_once(named_groups_once_, [](const RE2* re) { if (re->suffix_regexp_ != NULL) re->named_groups_ = re->suffix_regexp_->NamedCaptures(); @@ -318,7 +319,7 @@ const std::map<string, int>& RE2::NamedCapturingGroups() const { } // Returns group_names_, computing it if needed. -const std::map<int, string>& RE2::CapturingGroupNames() const { +const std::map<int, std::string>& RE2::CapturingGroupNames() const { std::call_once(group_names_once_, [](const RE2* re) { if (re->suffix_regexp_ != NULL) re->group_names_ = re->suffix_regexp_->CaptureNames(); @@ -362,7 +363,7 @@ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, } } -bool RE2::Replace(string* str, +bool RE2::Replace(std::string* str, const RE2& re, const StringPiece& rewrite) { StringPiece vec[kVecSize]; @@ -372,7 +373,7 @@ bool RE2::Replace(string* str, if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) return false; - string s; + std::string s; if (!re.Rewrite(&s, rewrite, vec, nvec)) return false; @@ -382,7 +383,7 @@ bool RE2::Replace(string* str, return true; } -int RE2::GlobalReplace(string* str, +int RE2::GlobalReplace(std::string* str, const RE2& re, const StringPiece& rewrite) { StringPiece vec[kVecSize]; @@ -393,7 +394,7 @@ int RE2::GlobalReplace(string* str, const char* p = str->data(); const char* ep = p + str->size(); const char* lastend = NULL; - string out; + std::string out; int count = 0; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION // Iterate just once when fuzzing. Otherwise, we easily get bogged down @@ -410,11 +411,10 @@ int RE2::GlobalReplace(string* str, if (vec[0].begin() == lastend && vec[0].size() == 0) { // Disallow empty match at end of last match: skip ahead. // - // fullrune() takes int, not size_t. However, it just looks + // fullrune() takes int, not ptrdiff_t. However, it just looks // at the leading byte and treats any length >= 4 the same. if (re.options().encoding() == RE2::Options::EncodingUTF8 && - fullrune(p, static_cast<int>(std::min(static_cast<ptrdiff_t>(4), - ep - p)))) { + fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) { // re is in UTF-8 mode and there is enough left of str // to allow us to advance by up to UTFmax bytes. Rune r; @@ -457,7 +457,7 @@ int RE2::GlobalReplace(string* str, bool RE2::Extract(const StringPiece& text, const RE2& re, const StringPiece& rewrite, - string* out) { + std::string* out) { StringPiece vec[kVecSize]; int nvec = 1 + MaxSubmatch(rewrite); if (nvec > arraysize(vec)) @@ -470,8 +470,8 @@ bool RE2::Extract(const StringPiece& text, return re.Rewrite(out, rewrite, vec, nvec); } -string RE2::QuoteMeta(const StringPiece& unquoted) { - string result; +std::string RE2::QuoteMeta(const StringPiece& unquoted) { + std::string result; result.reserve(unquoted.size() << 1); // Escape any ascii character not in [A-Za-z_0-9]. @@ -508,7 +508,8 @@ string RE2::QuoteMeta(const StringPiece& unquoted) { return result; } -bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { +bool RE2::PossibleMatchRange(std::string* min, std::string* max, + int maxlen) const { if (prog_ == NULL) return false; @@ -529,7 +530,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { } // Add to prefix min max using PossibleMatchRange on regexp. - string dmin, dmax; + std::string dmin, dmax; maxlen -= n; if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { min->append(dmin); @@ -646,15 +647,13 @@ bool RE2::Match(const StringPiece& text, bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture); - // SearchBitState allocates a bit vector of size prog_->size() * text.size(). + // BitState allocates a bitmap of size prog_->list_count() * text.size(). // It also allocates a stack of 3-word structures which could potentially - // grow as large as prog_->size() * text.size() but in practice is much - // smaller. - // Conditions for using SearchBitState: - const int MaxBitStateProg = 500; // prog_->size() <= Max. - const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits) - bool can_bit_state = prog_->size() <= MaxBitStateProg; - size_t bit_state_text_max = MaxBitStateVector / prog_->size(); + // grow as large as prog_->list_count() * text.size(), but in practice is + // much smaller. + const int kMaxBitStateBitmapSize = 256*1024; // bitmap size <= max (bits) + bool can_bit_state = prog_->CanBitState(); + size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count(); bool dfa_failed = false; switch (re_anchor) { @@ -854,7 +853,8 @@ bool RE2::DoMatch(const StringPiece& text, // Checks that the rewrite string is well-formed with respect to this // regular expression. -bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { +bool RE2::CheckRewriteString(const StringPiece& rewrite, + std::string* error) const { int max_token = -1; for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) { @@ -911,7 +911,7 @@ int RE2::MaxSubmatch(const StringPiece& rewrite) { // Append the "rewrite" string, with backslash subsitutions from "vec", // to string "out". -bool RE2::Rewrite(string* out, +bool RE2::Rewrite(std::string* out, const StringPiece& rewrite, const StringPiece* vec, int veclen) const { @@ -955,7 +955,7 @@ bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) { bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) { if (dest == NULL) return true; - reinterpret_cast<string*>(dest)->assign(str, n); + reinterpret_cast<std::string*>(dest)->assign(str, n); return true; } @@ -69,7 +69,7 @@ // // Example: extracts "ruby" into "s" and 1234 into "i" // int i; -// string s; +// std::string s; // CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); // // Example: fails because string cannot be stored in integer @@ -131,10 +131,10 @@ // which represents a sub-range of a real string. // // Example: read lines of the form "var = value" from a string. -// string contents = ...; // Fill string somehow +// std::string contents = ...; // Fill string somehow // StringPiece input(contents); // Wrap a StringPiece around it // -// string var; +// std::string var; // int value; // while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { // ...; @@ -205,9 +205,6 @@ class Regexp; namespace re2 { -// TODO(junyer): Get rid of this. -using std::string; - // Interface for regular expression matching. Also corresponds to a // pre-compiled regular expression. An "RE2" object is safe for // concurrent use by multiple threads. @@ -254,12 +251,12 @@ class RE2 { Quiet // do not log about regexp parse errors }; - // Need to have the const char* and const string& forms for implicit + // Need to have the const char* and const std::string& forms for implicit // conversions when passing string literals to FullMatch and PartialMatch. // Otherwise the StringPiece form would be sufficient. #ifndef SWIG RE2(const char* pattern); - RE2(const string& pattern); + RE2(const std::string& pattern); #endif RE2(const StringPiece& pattern); RE2(const StringPiece& pattern, const Options& options); @@ -271,11 +268,11 @@ class RE2 { // The string specification for this RE2. E.g. // RE2 re("ab*c?d+"); // re.pattern(); // "ab*c?d+" - const string& pattern() const { return pattern_; } + const std::string& pattern() const { return pattern_; } // If RE2 could not be created properly, returns an error string. // Else returns the empty string. - const string& error() const { return *error_; } + const std::string& error() const { return *error_; } // If RE2 could not be created properly, returns an error code. // Else returns RE2::NoError (== 0). @@ -283,7 +280,7 @@ class RE2 { // If RE2 could not be created properly, returns the offending // portion of the regexp. - const string& error_arg() const { return error_arg_; } + const std::string& error_arg() const { return error_arg_; } // Returns the program size, a very approximate measure of a regexp's "cost". // Larger numbers are more expensive than smaller numbers. @@ -341,12 +338,12 @@ class RE2 { // Matches "text" against "re". If pointer arguments are // supplied, copies matched sub-patterns into them. // - // You can pass in a "const char*" or a "string" for "text". - // You can pass in a "const char*" or a "string" or a "RE2" for "re". + // You can pass in a "const char*" or a "std::string" for "text". + // You can pass in a "const char*" or a "std::string" or a "RE2" for "re". // // The provided pointer arguments can be pointers to any scalar numeric // type, or one of: - // string (matched piece is copied to string) + // std::string (matched piece is copied to string) // StringPiece (StringPiece is mutated to point to matched piece) // T (where "bool T::ParseFrom(const char*, size_t)" exists) // (void*)NULL (the corresponding matched sub-pattern is not copied) @@ -402,21 +399,21 @@ class RE2 { // from the pattern. \0 in "rewrite" refers to the entire matching // text. E.g., // - // string s = "yabba dabba doo"; + // std::string s = "yabba dabba doo"; // CHECK(RE2::Replace(&s, "b+", "d")); // // will leave "s" containing "yada dabba doo" // // Returns true if the pattern matches and a replacement occurs, // false otherwise. - static bool Replace(string* str, + static bool Replace(std::string* str, const RE2& re, const StringPiece& rewrite); // Like Replace(), except replaces successive non-overlapping occurrences // of the pattern in the string with the rewrite. E.g. // - // string s = "yabba dabba doo"; + // std::string s = "yabba dabba doo"; // CHECK(RE2::GlobalReplace(&s, "b+", "d")); // // will leave "s" containing "yada dada doo" @@ -426,7 +423,7 @@ class RE2 { // replacing "ana" within "banana" makes only one replacement, not two. // // Returns the number of replacements made. - static int GlobalReplace(string* str, + static int GlobalReplace(std::string* str, const RE2& re, const StringPiece& rewrite); @@ -441,7 +438,7 @@ class RE2 { static bool Extract(const StringPiece& text, const RE2& re, const StringPiece& rewrite, - string* out); + std::string* out); // Escapes all potentially meaningful regexp characters in // 'unquoted'. The returned string, used as a regular expression, @@ -449,7 +446,7 @@ class RE2 { // 1.5-2.0? // may become: // 1\.5\-2\.0\? - static string QuoteMeta(const StringPiece& unquoted); + static std::string QuoteMeta(const StringPiece& unquoted); // Computes range for any strings matching regexp. The min and max can in // some cases be arbitrarily precise, so the caller gets to specify the @@ -465,7 +462,8 @@ class RE2 { // do not compile down to infinite repetitions. // // Returns true on success, false on error. - bool PossibleMatchRange(string* min, string* max, int maxlen) const; + bool PossibleMatchRange(std::string* min, std::string* max, + int maxlen) const; // Generic matching interface @@ -485,12 +483,12 @@ class RE2 { // The map records the index of the leftmost group // with the given name. // Only valid until the re is deleted. - const std::map<string, int>& NamedCapturingGroups() const; + const std::map<std::string, int>& NamedCapturingGroups() const; // Return a map from capturing indices to names. // The map has no entries for unnamed groups. // Only valid until the re is deleted. - const std::map<int, string>& CapturingGroupNames() const; + const std::map<int, std::string>& CapturingGroupNames() const; // General matching routine. // Match against text starting at offset startpos @@ -528,7 +526,8 @@ class RE2 { // '\' followed by anything other than a digit or '\'. // A true return value guarantees that Replace() and Extract() won't // fail because of a bad rewrite string. - bool CheckRewriteString(const StringPiece& rewrite, string* error) const; + bool CheckRewriteString(const StringPiece& rewrite, + std::string* error) const; // Returns the maximum submatch needed for the rewrite to be done by // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. @@ -539,7 +538,7 @@ class RE2 { // Returns true on success. This method can fail because of a malformed // rewrite string. CheckRewriteString guarantees that the rewrite will // be sucessful. - bool Rewrite(string* out, + bool Rewrite(std::string* out, const StringPiece& rewrite, const StringPiece* vec, int veclen) const; @@ -737,9 +736,9 @@ class RE2 { re2::Prog* ReverseProg() const; - string pattern_; // string regular expression + std::string pattern_; // string regular expression Options options_; // option flags - string prefix_; // required prefix (before regexp_) + std::string prefix_; // required prefix (before regexp_) bool prefix_foldcase_; // prefix is ASCII case-insensitive re2::Regexp* entire_regexp_; // parsed regular expression re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed @@ -747,17 +746,17 @@ class RE2 { int num_captures_; // Number of capturing groups bool is_one_pass_; // can use prog_->SearchOnePass? - mutable re2::Prog* rprog_; // reverse program for regexp - mutable const string* error_; // Error indicator + mutable re2::Prog* rprog_; // reverse program for regexp + mutable const std::string* error_; // Error indicator // (or points to empty string) mutable ErrorCode error_code_; // Error code - mutable string error_arg_; // Fragment of regexp showing error + mutable std::string error_arg_; // Fragment of regexp showing error // Map from capture names to indices - mutable const std::map<string, int>* named_groups_; + mutable const std::map<std::string, int>* named_groups_; // Map from capture indices to names - mutable const std::map<int, string>* group_names_; + mutable const std::map<int, std::string>* group_names_; // Onces for lazy computations. mutable std::once_flag rprog_once_; @@ -804,7 +803,7 @@ class RE2::Arg { MAKE_PARSER(unsigned char, parse_uchar) MAKE_PARSER(float, parse_float) MAKE_PARSER(double, parse_double) - MAKE_PARSER(string, parse_string) + MAKE_PARSER(std::string, parse_string) MAKE_PARSER(StringPiece, parse_stringpiece) MAKE_PARSER(short, parse_short) diff --git a/re2/regexp.cc b/re2/regexp.cc index 7cfbbcb..7995ffc 100644 --- a/re2/regexp.cc +++ b/re2/regexp.cc @@ -510,16 +510,16 @@ static const char *kErrorStrings[] = { "invalid named capture group", }; -string RegexpStatus::CodeText(enum RegexpStatusCode code) { +std::string RegexpStatus::CodeText(enum RegexpStatusCode code) { if (code < 0 || code >= arraysize(kErrorStrings)) code = kRegexpInternalError; return kErrorStrings[code]; } -string RegexpStatus::Text() const { +std::string RegexpStatus::Text() const { if (error_arg_.empty()) return CodeText(code_); - string s; + std::string s; s.append(CodeText(code_)); s.append(": "); s.append(error_arg_.data(), error_arg_.size()); @@ -569,8 +569,8 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { NamedCapturesWalker() : map_(NULL) {} ~NamedCapturesWalker() { delete map_; } - std::map<string, int>* TakeMap() { - std::map<string, int>* m = map_; + std::map<std::string, int>* TakeMap() { + std::map<std::string, int>* m = map_; map_ = NULL; return m; } @@ -579,7 +579,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { if (re->op() == kRegexpCapture && re->name() != NULL) { // Allocate map once we find a name. if (map_ == NULL) - map_ = new std::map<string, int>; + map_ = new std::map<std::string, int>; // Record first occurrence of each name. // (The rule is that if you have the same name @@ -597,13 +597,13 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { } private: - std::map<string, int>* map_; + std::map<std::string, int>* map_; NamedCapturesWalker(const NamedCapturesWalker&) = delete; NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete; }; -std::map<string, int>* Regexp::NamedCaptures() { +std::map<std::string, int>* Regexp::NamedCaptures() { NamedCapturesWalker w; w.Walk(this, 0); return w.TakeMap(); @@ -615,8 +615,8 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { CaptureNamesWalker() : map_(NULL) {} ~CaptureNamesWalker() { delete map_; } - std::map<int, string>* TakeMap() { - std::map<int, string>* m = map_; + std::map<int, std::string>* TakeMap() { + std::map<int, std::string>* m = map_; map_ = NULL; return m; } @@ -625,7 +625,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { if (re->op() == kRegexpCapture && re->name() != NULL) { // Allocate map once we find a name. if (map_ == NULL) - map_ = new std::map<int, string>; + map_ = new std::map<int, std::string>; (*map_)[re->cap()] = *re->name(); } @@ -639,13 +639,13 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { } private: - std::map<int, string>* map_; + std::map<int, std::string>* map_; CaptureNamesWalker(const CaptureNamesWalker&) = delete; CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete; }; -std::map<int, string>* Regexp::CaptureNames() { +std::map<int, std::string>* Regexp::CaptureNames() { CaptureNamesWalker w; w.Walk(this, 0); return w.TakeMap(); @@ -655,7 +655,8 @@ std::map<int, string>* Regexp::CaptureNames() { // with a fixed string prefix. If so, returns the prefix and // the regexp that remains after the prefix. The prefix might // be ASCII case-insensitive. -bool Regexp::RequiredPrefix(string* prefix, bool* foldcase, Regexp** suffix) { +bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase, + Regexp** suffix) { // No need for a walker: the regexp must be of the form // 1. some number of ^ anchors // 2. a literal char or string diff --git a/re2/regexp.h b/re2/regexp.h index 2ca96cd..a5d85c8 100644 --- a/re2/regexp.h +++ b/re2/regexp.h @@ -194,7 +194,7 @@ class RegexpStatus { void set_code(RegexpStatusCode code) { code_ = code; } void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; } - void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; } + void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; } RegexpStatusCode code() const { return code_; } const StringPiece& error_arg() const { return error_arg_; } bool ok() const { return code() == kRegexpSuccess; } @@ -204,16 +204,16 @@ class RegexpStatus { // Returns text equivalent of code, e.g.: // "Bad character class" - static string CodeText(RegexpStatusCode code); + static std::string CodeText(RegexpStatusCode code); // Returns text describing error, e.g.: // "Bad character class: [z-a]" - string Text() const; + std::string Text() const; private: RegexpStatusCode code_; // Kind of error - StringPiece error_arg_; // Piece of regexp containing syntax error. - string* tmp_; // Temporary storage, possibly where error_arg_ is. + StringPiece error_arg_; // Piece of regexp containing syntax error. + std::string* tmp_; // Temporary storage, possibly where error_arg_ is. RegexpStatus(const RegexpStatus&) = delete; RegexpStatus& operator=(const RegexpStatus&) = delete; @@ -336,7 +336,7 @@ class Regexp { Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; } CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; } int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; } - const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; } + const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; } Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; } int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; } int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; } @@ -368,8 +368,7 @@ class Regexp { // string representation of the simplified form. Returns true on success. // Returns false and sets *status (if status != NULL) on parse error. static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags, - string* dst, - RegexpStatus* status); + std::string* dst, RegexpStatus* status); // Returns the number of capturing groups in the regexp. int NumCaptures(); @@ -378,16 +377,16 @@ class Regexp { // Returns a map from names to capturing group indices, // or NULL if the regexp contains no named capture groups. // The caller is responsible for deleting the map. - std::map<string, int>* NamedCaptures(); + std::map<std::string, int>* NamedCaptures(); // Returns a map from capturing group indices to capturing group // names or NULL if the regexp contains no named capture groups. The // caller is responsible for deleting the map. - std::map<int, string>* CaptureNames(); + std::map<int, std::string>* CaptureNames(); // Returns a string representation of the current regexp, // using as few parentheses as possible. - string ToString(); + std::string ToString(); // Convenience functions. They consume the passed reference, // so in many cases you should use, e.g., Plus(re->Incref(), flags). @@ -409,7 +408,7 @@ class Regexp { // Debugging function. Returns string format for regexp // that makes structure clear. Does NOT use regexp syntax. - string Dump(); + std::string Dump(); // Helper traversal class, defined fully in walker-inl.h. template<typename T> class Walker; @@ -438,7 +437,8 @@ class Regexp { // follows it. // Callers should expect *prefix, *foldcase and *suffix to be "zeroed" // regardless of the return value. - bool RequiredPrefix(string* prefix, bool* foldcase, Regexp** suffix); + bool RequiredPrefix(std::string* prefix, bool* foldcase, + Regexp** suffix); private: // Constructor allocates vectors as appropriate for operator. @@ -564,7 +564,7 @@ class Regexp { }; struct { // Capture int cap_; - string* name_; + std::string* name_; }; struct { // LiteralString int nrunes_; @@ -33,7 +33,7 @@ RE2::Set::~Set() { delete prog_; } -int RE2::Set::Add(const StringPiece& pattern, string* error) { +int RE2::Set::Add(const StringPiece& pattern, std::string* error) { if (compiled_) { LOG(DFATAL) << "RE2::Set::Add() called after compiling"; return -1; @@ -68,7 +68,7 @@ int RE2::Set::Add(const StringPiece& pattern, string* error) { sub[1] = m; re = re2::Regexp::Concat(sub, 2, pf); } - elem_.emplace_back(string(pattern), re); + elem_.emplace_back(std::string(pattern), re); return n; } @@ -42,7 +42,7 @@ class RE2::Set { // Indices are assigned in sequential order starting from 0. // Errors do not increment the index; if error is not NULL, *error will hold // the error message from the parser. - int Add(const StringPiece& pattern, string* error); + int Add(const StringPiece& pattern, std::string* error); // Compiles the set in preparation for matching. // Returns false if the compiler runs out of memory. @@ -62,7 +62,7 @@ class RE2::Set { ErrorInfo* error_info) const; private: - typedef std::pair<string, re2::Regexp*> Elem; + typedef std::pair<std::string, re2::Regexp*> Elem; RE2::Options options_; RE2::Anchor anchor_; diff --git a/re2/simplify.cc b/re2/simplify.cc index 7cc0419..8939678 100644 --- a/re2/simplify.cc +++ b/re2/simplify.cc @@ -21,8 +21,7 @@ namespace re2 { // string representation of the simplified form. Returns true on success. // Returns false and sets *error (if error != NULL) on error. bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags, - string* dst, - RegexpStatus* status) { + std::string* dst, RegexpStatus* status) { Regexp* re = Parse(src, flags, status); if (re == NULL) return false; diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc index d89d80f..2accba1 100644 --- a/re2/testing/compile_test.cc +++ b/re2/testing/compile_test.cc @@ -26,88 +26,88 @@ struct Test { static Test tests[] = { { "a", - "3. byte [61-61] -> 4\n" + "3. byte [61-61] 0 -> 4\n" "4. match! 0\n" }, { "ab", - "3. byte [61-61] -> 4\n" - "4. byte [62-62] -> 5\n" + "3. byte [61-61] 0 -> 4\n" + "4. byte [62-62] 0 -> 5\n" "5. match! 0\n" }, { "a|c", - "3+ byte [61-61] -> 5\n" - "4. byte [63-63] -> 5\n" + "3+ byte [61-61] 0 -> 5\n" + "4. byte [63-63] 0 -> 5\n" "5. match! 0\n" }, { "a|b", - "3. byte [61-62] -> 4\n" + "3. byte [61-62] 0 -> 4\n" "4. match! 0\n" }, { "[ab]", - "3. byte [61-62] -> 4\n" + "3. byte [61-62] 0 -> 4\n" "4. match! 0\n" }, { "a+", - "3. byte [61-61] -> 4\n" + "3. byte [61-61] 0 -> 4\n" "4+ nop -> 3\n" "5. match! 0\n" }, { "a+?", - "3. byte [61-61] -> 4\n" + "3. byte [61-61] 0 -> 4\n" "4+ match! 0\n" "5. nop -> 3\n" }, { "a*", - "3+ byte [61-61] -> 3\n" + "3+ byte [61-61] 1 -> 3\n" "4. match! 0\n" }, { "a*?", "3+ match! 0\n" - "4. byte [61-61] -> 3\n" }, + "4. byte [61-61] 0 -> 3\n" }, { "a?", - "3+ byte [61-61] -> 5\n" + "3+ byte [61-61] 1 -> 5\n" "4. nop -> 5\n" "5. match! 0\n" }, { "a??", "3+ nop -> 5\n" - "4. byte [61-61] -> 5\n" + "4. byte [61-61] 0 -> 5\n" "5. match! 0\n" }, { "a{4}", - "3. byte [61-61] -> 4\n" - "4. byte [61-61] -> 5\n" - "5. byte [61-61] -> 6\n" - "6. byte [61-61] -> 7\n" + "3. byte [61-61] 0 -> 4\n" + "4. byte [61-61] 0 -> 5\n" + "5. byte [61-61] 0 -> 6\n" + "6. byte [61-61] 0 -> 7\n" "7. match! 0\n" }, { "(a)", "3. capture 2 -> 4\n" - "4. byte [61-61] -> 5\n" + "4. byte [61-61] 0 -> 5\n" "5. capture 3 -> 6\n" "6. match! 0\n" }, { "(?:a)", - "3. byte [61-61] -> 4\n" + "3. byte [61-61] 0 -> 4\n" "4. match! 0\n" }, { "", "3. match! 0\n" }, { ".", - "3+ byte [00-09] -> 5\n" - "4. byte [0b-ff] -> 5\n" + "3+ byte [00-09] 0 -> 5\n" + "4. byte [0b-ff] 0 -> 5\n" "5. match! 0\n" }, { "[^ab]", - "3+ byte [00-09] -> 6\n" - "4+ byte [0b-60] -> 6\n" - "5. byte [63-ff] -> 6\n" + "3+ byte [00-09] 0 -> 6\n" + "4+ byte [0b-60] 0 -> 6\n" + "5. byte [63-ff] 0 -> 6\n" "6. match! 0\n" }, { "[Aa]", - "3. byte/i [61-61] -> 4\n" + "3. byte/i [61-61] 0 -> 4\n" "4. match! 0\n" }, { "\\C+", - "3. byte [00-ff] -> 4\n" + "3. byte [00-ff] 0 -> 4\n" "4+ altmatch -> 5 | 6\n" "5+ nop -> 3\n" "6. match! 0\n" }, { "\\C*", "3+ altmatch -> 4 | 5\n" - "4+ byte [00-ff] -> 3\n" + "4+ byte [00-ff] 1 -> 3\n" "5. match! 0\n" }, { "\\C?", - "3+ byte [00-ff] -> 5\n" + "3+ byte [00-ff] 1 -> 5\n" "4. nop -> 5\n" "5. match! 0\n" }, // Issue 20992936 { "[[-`]", - "3. byte [5b-60] -> 4\n" + "3. byte [5b-60] 0 -> 4\n" "4. match! 0\n" }, }; @@ -129,7 +129,7 @@ TEST(TestRegexpCompileToProg, Simple) { continue; } ASSERT_TRUE(re->CompileToProg(1) == NULL); - string s = prog->Dump(); + std::string s = prog->Dump(); if (s != t.code) { LOG(ERROR) << "Incorrect compiled code for: " << t.regexp; LOG(ERROR) << "Want:\n" << t.code; @@ -143,7 +143,7 @@ TEST(TestRegexpCompileToProg, Simple) { } static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags, - string* bytemap) { + std::string* bytemap) { Regexp* re = Regexp::Parse(pattern, flags, NULL); EXPECT_TRUE(re != NULL); @@ -158,7 +158,7 @@ static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags, TEST(TestCompile, Latin1Ranges) { // The distinct byte ranges involved in the Latin-1 dot ([^\n]). - string bytemap; + std::string bytemap; DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap); EXPECT_EQ("[00-09] -> 0\n" @@ -168,7 +168,7 @@ TEST(TestCompile, Latin1Ranges) { } TEST(TestCompile, OtherByteMapTests) { - string bytemap; + std::string bytemap; // Test that "absent" ranges are mapped to the same byte class. DumpByteMap("[0-9A-Fa-f]+", Regexp::PerlX|Regexp::Latin1, &bytemap); @@ -207,7 +207,7 @@ TEST(TestCompile, UTF8Ranges) { // Once, erroneously split between 0x3f and 0x40 because it is // a 6-bit boundary. - string bytemap; + std::string bytemap; DumpByteMap(".", Regexp::PerlX, &bytemap); EXPECT_EQ("[00-09] -> 0\n" @@ -240,7 +240,7 @@ TEST(TestCompile, InsufficientMemory) { } static void Dump(StringPiece pattern, Regexp::ParseFlags flags, - string* forward, string* reverse) { + std::string* forward, std::string* reverse) { Regexp* re = Regexp::Parse(pattern, flags, NULL); EXPECT_TRUE(re != NULL); @@ -265,54 +265,54 @@ TEST(TestCompile, Bug26705922) { // Bug in the compiler caused inefficient bytecode to be generated for Unicode // groups: common suffixes were cached, but common prefixes were not factored. - string forward, reverse; + std::string forward, reverse; Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse); - EXPECT_EQ("3. byte [f0-f0] -> 4\n" - "4. byte [90-90] -> 5\n" - "5. byte [80-80] -> 6\n" - "6+ byte [80-80] -> 8\n" - "7. byte [90-90] -> 8\n" + EXPECT_EQ("3. byte [f0-f0] 0 -> 4\n" + "4. byte [90-90] 0 -> 5\n" + "5. byte [80-80] 0 -> 6\n" + "6+ byte [80-80] 0 -> 8\n" + "7. byte [90-90] 0 -> 8\n" "8. match! 0\n", forward); - EXPECT_EQ("3+ byte [80-80] -> 5\n" - "4. byte [90-90] -> 5\n" - "5. byte [80-80] -> 6\n" - "6. byte [90-90] -> 7\n" - "7. byte [f0-f0] -> 8\n" + EXPECT_EQ("3+ byte [80-80] 0 -> 5\n" + "4. byte [90-90] 0 -> 5\n" + "5. byte [80-80] 0 -> 6\n" + "6. byte [90-90] 0 -> 7\n" + "7. byte [f0-f0] 0 -> 8\n" "8. match! 0\n", reverse); Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse); - EXPECT_EQ("3+ byte [e8-ef] -> 5\n" - "4. byte [f0-f0] -> 8\n" - "5. byte [80-bf] -> 6\n" - "6. byte [80-bf] -> 7\n" + EXPECT_EQ("3+ byte [e8-ef] 0 -> 5\n" + "4. byte [f0-f0] 0 -> 8\n" + "5. byte [80-bf] 0 -> 6\n" + "6. byte [80-bf] 0 -> 7\n" "7. match! 0\n" - "8. byte [90-90] -> 5\n", + "8. byte [90-90] 0 -> 5\n", forward); - EXPECT_EQ("3. byte [80-bf] -> 4\n" - "4. byte [80-bf] -> 5\n" - "5+ byte [e8-ef] -> 7\n" - "6. byte [90-90] -> 8\n" + EXPECT_EQ("3. byte [80-bf] 0 -> 4\n" + "4. byte [80-bf] 0 -> 5\n" + "5+ byte [e8-ef] 0 -> 7\n" + "6. byte [90-90] 0 -> 8\n" "7. match! 0\n" - "8. byte [f0-f0] -> 7\n", + "8. byte [f0-f0] 0 -> 7\n", reverse); Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse); - EXPECT_EQ("3. byte [80-bf] -> 4\n" - "4+ byte [c2-df] -> 7\n" - "5+ byte [a0-bf] -> 8\n" - "6. byte [80-bf] -> 9\n" + EXPECT_EQ("3. byte [80-bf] 0 -> 4\n" + "4+ byte [c2-df] 0 -> 7\n" + "5+ byte [a0-bf] 1 -> 8\n" + "6. byte [80-bf] 0 -> 9\n" "7. match! 0\n" - "8. byte [e0-e0] -> 7\n" - "9+ byte [e1-ef] -> 7\n" - "10+ byte [90-bf] -> 13\n" - "11+ byte [80-bf] -> 14\n" - "12. byte [80-8f] -> 15\n" - "13. byte [f0-f0] -> 7\n" - "14. byte [f1-f3] -> 7\n" - "15. byte [f4-f4] -> 7\n", + "8. byte [e0-e0] 0 -> 7\n" + "9+ byte [e1-ef] 0 -> 7\n" + "10+ byte [90-bf] 1 -> 13\n" + "11+ byte [80-bf] 1 -> 14\n" + "12. byte [80-8f] 0 -> 15\n" + "13. byte [f0-f0] 0 -> 7\n" + "14. byte [f1-f3] 0 -> 7\n" + "15. byte [f4-f4] 0 -> 7\n", reverse); } @@ -320,14 +320,14 @@ TEST(TestCompile, Bug35237384) { // Bug in the compiler caused inefficient bytecode to be generated for // nested nullable subexpressions. - string forward; + std::string forward; Dump("a**{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL); - EXPECT_EQ("3+ byte [61-61] -> 3\n" + EXPECT_EQ("3+ byte [61-61] 1 -> 3\n" "4. nop -> 5\n" - "5+ byte [61-61] -> 5\n" + "5+ byte [61-61] 1 -> 5\n" "6. nop -> 7\n" - "7+ byte [61-61] -> 7\n" + "7+ byte [61-61] 1 -> 7\n" "8. match! 0\n", forward); @@ -335,17 +335,17 @@ TEST(TestCompile, Bug35237384) { EXPECT_EQ("3+ nop -> 6\n" "4+ nop -> 8\n" "5. nop -> 21\n" - "6+ byte [61-61] -> 6\n" + "6+ byte [61-61] 1 -> 6\n" "7. nop -> 3\n" - "8+ byte [62-62] -> 8\n" + "8+ byte [62-62] 1 -> 8\n" "9. nop -> 3\n" - "10+ byte [61-61] -> 10\n" + "10+ byte [61-61] 1 -> 10\n" "11. nop -> 21\n" - "12+ byte [62-62] -> 12\n" + "12+ byte [62-62] 1 -> 12\n" "13. nop -> 21\n" - "14+ byte [61-61] -> 14\n" + "14+ byte [61-61] 1 -> 14\n" "15. nop -> 18\n" - "16+ byte [62-62] -> 16\n" + "16+ byte [62-62] 1 -> 16\n" "17. nop -> 18\n" "18+ nop -> 14\n" "19+ nop -> 16\n" @@ -359,38 +359,38 @@ TEST(TestCompile, Bug35237384) { EXPECT_EQ("3+ nop -> 36\n" "4+ nop -> 31\n" "5. nop -> 33\n" - "6+ byte [00-09] -> 8\n" - "7. byte [0b-ff] -> 8\n" + "6+ byte [00-09] 0 -> 8\n" + "7. byte [0b-ff] 0 -> 8\n" "8+ nop -> 6\n" "9+ nop -> 29\n" "10. nop -> 28\n" - "11+ byte [00-09] -> 13\n" - "12. byte [0b-ff] -> 13\n" + "11+ byte [00-09] 0 -> 13\n" + "12. byte [0b-ff] 0 -> 13\n" "13+ nop -> 11\n" "14+ nop -> 26\n" "15. nop -> 28\n" - "16+ byte [00-09] -> 18\n" - "17. byte [0b-ff] -> 18\n" + "16+ byte [00-09] 0 -> 18\n" + "17. byte [0b-ff] 0 -> 18\n" "18+ nop -> 16\n" "19+ nop -> 36\n" "20. nop -> 33\n" - "21+ byte [00-09] -> 23\n" - "22. byte [0b-ff] -> 23\n" + "21+ byte [00-09] 0 -> 23\n" + "22. byte [0b-ff] 0 -> 23\n" "23+ nop -> 21\n" "24+ nop -> 31\n" "25. nop -> 33\n" "26+ nop -> 28\n" - "27. byte [53-53] -> 11\n" + "27. byte [53-53] 0 -> 11\n" "28. match! 0\n" "29+ nop -> 28\n" - "30. byte [53-53] -> 6\n" + "30. byte [53-53] 0 -> 6\n" "31+ nop -> 33\n" - "32. byte [53-53] -> 21\n" + "32. byte [53-53] 0 -> 21\n" "33+ nop -> 29\n" "34+ nop -> 26\n" "35. nop -> 28\n" "36+ nop -> 33\n" - "37. byte [53-53] -> 16\n", + "37. byte [53-53] 0 -> 16\n", forward); } diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc index eb44b4a..09d31f8 100644 --- a/re2/testing/dfa_test.cc +++ b/re2/testing/dfa_test.cc @@ -33,7 +33,7 @@ static void DoBuild(Prog* prog) { TEST(Multithreaded, BuildEntireDFA) { // Create regexp with 2^FLAGS_size states in DFA. - string s = "a"; + std::string s = "a"; for (int i = 0; i < FLAGS_size; i++) s += "[ab]"; s += "b"; @@ -116,7 +116,7 @@ TEST(SingleThreaded, BuildEntireDFA) { // DeBruijn string causes the DFA to need to create a new state at every // position in the input, never reusing any states until it gets to the // end of the string. This is the worst possible case for DFA execution. -static string DeBruijnString(int n) { +static std::string DeBruijnString(int n) { CHECK_LT(n, static_cast<int>(8*sizeof(int))); CHECK_GT(n, 0); @@ -124,7 +124,7 @@ static string DeBruijnString(int n) { for (int i = 0; i < 1<<n; i++) did[i] = false; - string s; + std::string s; for (int i = 0; i < n-1; i++) s.append("0"); int bits = 0; @@ -180,8 +180,8 @@ TEST(SingleThreaded, SearchDFA) { // The De Bruijn string for n ends with a 1 followed by n 0s in a row, // which is not a match for 0[01]{n}$. Adding one more 0 is a match. - string no_match = DeBruijnString(n); - string match = no_match + "0"; + std::string no_match = DeBruijnString(n); + std::string match = no_match + "0"; int64_t usage; int64_t peak_usage; @@ -243,8 +243,8 @@ TEST(Multithreaded, SearchDFA) { Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n), Regexp::LikePerl, NULL); ASSERT_TRUE(re != NULL); - string no_match = DeBruijnString(n); - string match = no_match + "0"; + std::string no_match = DeBruijnString(n); + std::string match = no_match + "0"; // Check that single-threaded code works. { @@ -356,7 +356,7 @@ TEST(DFA, Callback) { ASSERT_TRUE(re != NULL); Prog* prog = re->CompileToProg(0); ASSERT_TRUE(prog != NULL); - string dump; + std::string dump; prog->BuildEntireDFA(Prog::kLongestMatch, [&](const int* next, bool match) { ASSERT_TRUE(next != NULL); if (!dump.empty()) diff --git a/re2/testing/dump.cc b/re2/testing/dump.cc index b60bf24..743f7b5 100644 --- a/re2/testing/dump.cc +++ b/re2/testing/dump.cc @@ -57,7 +57,7 @@ static const char* kOpcodeNames[] = { // Create string representation of regexp with explicit structure. // Nothing pretty, just for testing. -static void DumpRegexpAppending(Regexp* re, string* s) { +static void DumpRegexpAppending(Regexp* re, std::string* s) { if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) { StringAppendF(s, "op%d", re->op()); } else { @@ -136,7 +136,7 @@ static void DumpRegexpAppending(Regexp* re, string* s) { DumpRegexpAppending(re->sub()[0], s); break; case kRegexpCharClass: { - string sep; + std::string sep; for (CharClass::iterator it = re->cc()->begin(); it != re->cc()->end(); ++it) { RuneRange rr = *it; @@ -153,8 +153,8 @@ static void DumpRegexpAppending(Regexp* re, string* s) { s->append("}"); } -string Regexp::Dump() { - string s; +std::string Regexp::Dump() { + std::string s; // Make sure being called from a unit test. if (FLAGS_test_tmpdir.empty()) { diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc index 29c5def..9ead27e 100644 --- a/re2/testing/exhaustive1_test.cc +++ b/re2/testing/exhaustive1_test.cc @@ -16,7 +16,7 @@ namespace re2 { // Test simple repetition operators TEST(Repetition, Simple) { - std::vector<string> ops = Split(" ", + std::vector<std::string> ops = Split(" ", "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} " "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} " "%s* %s+ %s? %s*? %s+? %s??"); @@ -28,7 +28,7 @@ TEST(Repetition, Simple) { // Test capturing parens -- (a) -- inside repetition operators TEST(Repetition, Capturing) { - std::vector<string> ops = Split(" ", + std::vector<std::string> ops = Split(" ", "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} " "%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} " "%s* %s+ %s? %s*? %s+? %s??"); @@ -36,7 +36,7 @@ TEST(Repetition, Capturing) { 7, Explode("ab"), "(?:%s)", ""); // This would be a great test, but it runs forever when PCRE is enabled. - if (FLAGS_regexp_engines.find("PCRE") == string::npos) + if (FLAGS_regexp_engines.find("PCRE") == std::string::npos) ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops, 50, Explode("a"), "(?:%s)", ""); } diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc index ba38a6e..ce4235b 100644 --- a/re2/testing/exhaustive2_test.cc +++ b/re2/testing/exhaustive2_test.cc @@ -24,8 +24,8 @@ TEST(EmptyString, Exhaustive) { // Test escaped versions of regexp syntax. TEST(Punctuation, Literals) { - std::vector<string> alphabet = Explode("()*+?{}[]\\^$."); - std::vector<string> escaped = alphabet; + std::vector<std::string> alphabet = Explode("()*+?{}[]\\^$."); + std::vector<std::string> escaped = alphabet; for (size_t i = 0; i < escaped.size(); i++) escaped[i] = "\\" + escaped[i]; ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), @@ -63,7 +63,7 @@ TEST(LineEnds, Exhaustive) { // provides a mechanism, and RE2 could add new syntax if needed. // // TEST(Newlines, Exhaustive) { -// std::vector<string> empty_vector; +// std::vector<std::string> empty_vector; // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), // RegexpGenerator::EgrepOps(), // 4, Explode("a\n"), ""); diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc index cf09e18..1fe46b6 100644 --- a/re2/testing/exhaustive3_test.cc +++ b/re2/testing/exhaustive3_test.cc @@ -17,7 +17,7 @@ namespace re2 { // Test simple character classes by themselves. TEST(CharacterClasses, Exhaustive) { - std::vector<string> atoms = Split(" ", + std::vector<std::string> atoms = Split(" ", "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 5, Explode("ab"), "", ""); @@ -25,25 +25,25 @@ TEST(CharacterClasses, Exhaustive) { // Test simple character classes inside a___b (for example, a[a]b). TEST(CharacterClasses, ExhaustiveAB) { - std::vector<string> atoms = Split(" ", + std::vector<std::string> atoms = Split(" ", "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 5, Explode("ab"), "a%sb", ""); } // Returns UTF8 for Rune r -static string UTF8(Rune r) { +static std::string UTF8(Rune r) { char buf[UTFmax+1]; buf[runetochar(buf, &r)] = 0; - return string(buf); + return std::string(buf); } // Returns a vector of "interesting" UTF8 characters. // Unicode is now too big to just return all of them, // so UTF8Characters return a set likely to be good test cases. -static const std::vector<string>& InterestingUTF8() { +static const std::vector<std::string>& InterestingUTF8() { static bool init; - static std::vector<string> v; + static std::vector<std::string> v; if (init) return v; @@ -70,12 +70,12 @@ static const std::vector<string>& InterestingUTF8() { // Test interesting UTF-8 characters against character classes. TEST(InterestingUTF8, SingleOps) { - std::vector<string> atoms = Split(" ", + std::vector<std::string> atoms = Split(" ", ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); - std::vector<string> ops; // no ops + std::vector<std::string> ops; // no ops ExhaustiveTest(1, 0, atoms, ops, 1, InterestingUTF8(), "", ""); } @@ -83,13 +83,13 @@ TEST(InterestingUTF8, SingleOps) { // Test interesting UTF-8 characters against character classes, // but wrap everything inside AB. TEST(InterestingUTF8, AB) { - std::vector<string> atoms = Split(" ", + std::vector<std::string> atoms = Split(" ", ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); - std::vector<string> ops; // no ops - std::vector<string> alpha = InterestingUTF8(); + std::vector<std::string> ops; // no ops + std::vector<std::string> alpha = InterestingUTF8(); for (size_t i = 0; i < alpha.size(); i++) alpha[i] = "a" + alpha[i] + "b"; ExhaustiveTest(1, 0, atoms, ops, diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc index 4f6335f..7e5dd14 100644 --- a/re2/testing/exhaustive_tester.cc +++ b/re2/testing/exhaustive_tester.cc @@ -73,9 +73,9 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc // Processes a single generated regexp. // Compiles it using Regexp interface and PCRE, and then // checks that NFA, DFA, and PCRE all return the same results. -void ExhaustiveTester::HandleRegexp(const string& const_regexp) { +void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) { regexps_++; - string regexp = const_regexp; + std::string regexp = const_regexp; if (!topwrapper_.empty()) regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); @@ -142,12 +142,12 @@ void ExhaustiveTester::HandleRegexp(const string& const_regexp) { // Runs an exhaustive test on the given parameters. void ExhaustiveTest(int maxatoms, int maxops, - const std::vector<string>& alphabet, - const std::vector<string>& ops, + const std::vector<std::string>& alphabet, + const std::vector<std::string>& ops, int maxstrlen, - const std::vector<string>& stralphabet, - const string& wrapper, - const string& topwrapper) { + const std::vector<std::string>& stralphabet, + const std::string& wrapper, + const std::string& topwrapper) { if (RE2_DEBUG_MODE) { if (maxatoms > 1) maxatoms--; @@ -169,9 +169,9 @@ void ExhaustiveTest(int maxatoms, int maxops, // Runs an exhaustive test using the given parameters and // the basic egrep operators. -void EgrepTest(int maxatoms, int maxops, const string& alphabet, - int maxstrlen, const string& stralphabet, - const string& wrapper) { +void EgrepTest(int maxatoms, int maxops, const std::string& alphabet, + int maxstrlen, const std::string& stralphabet, + const std::string& wrapper) { const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" }; for (int i = 0; i < arraysize(tops); i++) { diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h index 769d8b5..3a14282 100644 --- a/re2/testing/exhaustive_tester.h +++ b/re2/testing/exhaustive_tester.h @@ -42,12 +42,12 @@ class ExhaustiveTester : public RegexpGenerator { public: ExhaustiveTester(int maxatoms, int maxops, - const std::vector<string>& alphabet, - const std::vector<string>& ops, + const std::vector<std::string>& alphabet, + const std::vector<std::string>& ops, int maxstrlen, - const std::vector<string>& stralphabet, - const string& wrapper, - const string& topwrapper) + const std::vector<std::string>& stralphabet, + const std::string& wrapper, + const std::string& topwrapper) : RegexpGenerator(maxatoms, maxops, alphabet, ops), strgen_(maxstrlen, stralphabet), wrapper_(wrapper), @@ -60,7 +60,7 @@ class ExhaustiveTester : public RegexpGenerator { int failures() { return failures_; } // Needed for RegexpGenerator interface. - void HandleRegexp(const string& regexp); + void HandleRegexp(const std::string& regexp); // Causes testing to generate random input strings. void RandomStrings(int32_t seed, int32_t count) { @@ -71,8 +71,8 @@ class ExhaustiveTester : public RegexpGenerator { private: StringGenerator strgen_; - string wrapper_; // Regexp wrapper - either empty or has one %s. - string topwrapper_; // Regexp top-level wrapper. + std::string wrapper_; // Regexp wrapper - either empty or has one %s. + std::string topwrapper_; // Regexp top-level wrapper. int regexps_; // Number of HandleRegexp calls int tests_; // Number of regexp tests. int failures_; // Number of tests failed. @@ -87,18 +87,18 @@ class ExhaustiveTester : public RegexpGenerator { // Runs an exhaustive test on the given parameters. void ExhaustiveTest(int maxatoms, int maxops, - const std::vector<string>& alphabet, - const std::vector<string>& ops, + const std::vector<std::string>& alphabet, + const std::vector<std::string>& ops, int maxstrlen, - const std::vector<string>& stralphabet, - const string& wrapper, - const string& topwrapper); + const std::vector<std::string>& stralphabet, + const std::string& wrapper, + const std::string& topwrapper); // Runs an exhaustive test using the given parameters and // the basic egrep operators. -void EgrepTest(int maxatoms, int maxops, const string& alphabet, - int maxstrlen, const string& stralphabet, - const string& wrapper); +void EgrepTest(int maxatoms, int maxops, const std::string& alphabet, + int maxstrlen, const std::string& stralphabet, + const std::string& wrapper); } // namespace re2 diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index 867eac6..835ebcf 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -19,7 +19,7 @@ struct FilterTestVars { FilterTestVars() {} explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {} - std::vector<string> atoms; + std::vector<std::string> atoms; std::vector<int> atom_indices; std::vector<int> matches; RE2::Options opts; @@ -157,7 +157,7 @@ bool CheckExpectedAtoms(const char* atoms[], int n, const char* testname, struct FilterTestVars* v) { - std::vector<string> expected; + std::vector<std::string> expected; for (int i = 0; i < n; i++) expected.push_back(atoms[i]); @@ -200,8 +200,8 @@ TEST(FilteredRE2Test, AtomTests) { EXPECT_EQ(0, nfail); } -void FindAtomIndices(const std::vector<string>& atoms, - const std::vector<string>& matched_atoms, +void FindAtomIndices(const std::vector<std::string>& atoms, + const std::vector<std::string>& matched_atoms, std::vector<int>* atom_indices) { atom_indices->clear(); for (size_t i = 0; i < matched_atoms.size(); i++) { @@ -220,13 +220,13 @@ TEST(FilteredRE2Test, MatchEmptyPattern) { // We are using the regexps used in one of the atom tests // for this test. Adding the EXPECT here to make sure // the index we use for the test is for the correct test. - EXPECT_EQ("CheckEmptyPattern", string(t->testname)); + EXPECT_EQ("CheckEmptyPattern", std::string(t->testname)); int nregexp; for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) if (t->regexps[nregexp] == NULL) break; AddRegexpsAndCompile(t->regexps, nregexp, &v); - string text = "0123"; + std::string text = "0123"; std::vector<int> atom_ids; std::vector<int> matching_regexps; EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); @@ -237,17 +237,17 @@ TEST(FilteredRE2Test, MatchTests) { AtomTest* t = &atom_tests[2]; // We are using the regexps used in one of the atom tests // for this test. - EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname)); + EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname)); int nregexp; for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) if (t->regexps[nregexp] == NULL) break; AddRegexpsAndCompile(t->regexps, nregexp, &v); - string text = "abc121212xyz"; + std::string text = "abc121212xyz"; // atoms = abc std::vector<int> atom_ids; - std::vector<string> atoms; + std::vector<std::string> atoms; atoms.push_back("abc"); FindAtomIndices(v.atoms, atoms, &atom_ids); std::vector<int> matching_regexps; diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc index d2b04fc..5cb3952 100644 --- a/re2/testing/parse_test.cc +++ b/re2/testing/parse_test.cc @@ -224,7 +224,7 @@ bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) { } void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags, - const string& title) { + const std::string& title) { Regexp** re = new Regexp*[ntests]; for (int i = 0; i < ntests; i++) { RegexpStatus status; @@ -235,14 +235,16 @@ void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags, re[i] = Regexp::Parse(tests[i].regexp, f, &status); ASSERT_TRUE(re[i] != NULL) << " " << tests[i].regexp << " " << status.Text(); - string s = re[i]->Dump(); - EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp - << "\nparse: " << string(tests[i].parse) << " s: " << s << " flag=" << f; + std::string s = re[i]->Dump(); + EXPECT_EQ(std::string(tests[i].parse), s) + << "Regexp: " << tests[i].regexp + << "\nparse: " << std::string(tests[i].parse) + << " s: " << s << " flag=" << f; } for (int i = 0; i < ntests; i++) { for (int j = 0; j < ntests; j++) { - EXPECT_EQ(string(tests[i].parse) == string(tests[j].parse), + EXPECT_EQ(std::string(tests[i].parse) == std::string(tests[j].parse), RegexpEqualTestingOnly(re[i], re[j])) << "Regexp: " << tests[i].regexp << " " << tests[j].regexp; } @@ -453,9 +455,12 @@ TEST(TestToString, EquivalentParse) { } Regexp* re = Regexp::Parse(tests[i].regexp, f, &status); ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text(); - string s = re->Dump(); - EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << string(tests[i].parse) << " " << s; - string t = re->ToString(); + std::string s = re->Dump(); + EXPECT_EQ(std::string(tests[i].parse), s) + << "Regexp: " << tests[i].regexp + << "\nparse: " << std::string(tests[i].parse) + << " s: " << s << " flag=" << f; + std::string t = re->ToString(); if (t != tests[i].regexp) { // If ToString didn't return the original regexp, // it must have found one with fewer parens. @@ -468,8 +473,8 @@ TEST(TestToString, EquivalentParse) { // Test that if we parse the new regexp we get the same structure. Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status); ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text(); - string ss = nre->Dump(); - string tt = nre->ToString(); + std::string ss = nre->Dump(); + std::string tt = nre->ToString(); if (s != ss || t != tt) LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t; EXPECT_EQ(s, ss); diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc index f43a78b..438cb41 100644 --- a/re2/testing/possible_match_test.cc +++ b/re2/testing/possible_match_test.cc @@ -21,8 +21,8 @@ namespace re2 { // Test that C++ strings are compared as uint8s, not int8s. // PossibleMatchRange doesn't depend on this, but callers probably will. TEST(CplusplusStrings, EightBit) { - string s = "\x70"; - string t = "\xA0"; + std::string s = "\x70"; + std::string t = "\xA0"; EXPECT_LT(s, t); } @@ -110,7 +110,7 @@ TEST(PossibleMatchRange, HandWritten) { for (int i = 0; i < arraysize(tests); i++) { for (int j = 0; j < 2; j++) { const PrefixTest& t = tests[i]; - string min, max; + std::string min, max; if (j == 0) { LOG(INFO) << "Checking regexp=" << CEscape(t.regexp); Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); @@ -132,7 +132,7 @@ TEST(PossibleMatchRange, HandWritten) { // Test cases where PossibleMatchRange should return false. TEST(PossibleMatchRange, Failures) { - string min, max; + std::string min, max; // Fails because no room to write max. EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0)); @@ -172,10 +172,10 @@ class PossibleMatchTester : public RegexpGenerator { public: PossibleMatchTester(int maxatoms, int maxops, - const std::vector<string>& alphabet, - const std::vector<string>& ops, + const std::vector<std::string>& alphabet, + const std::vector<std::string>& ops, int maxstrlen, - const std::vector<string>& stralphabet) + const std::vector<std::string>& stralphabet) : RegexpGenerator(maxatoms, maxops, alphabet, ops), strgen_(maxstrlen, stralphabet), regexps_(0), tests_(0) { } @@ -184,7 +184,7 @@ class PossibleMatchTester : public RegexpGenerator { int tests() { return tests_; } // Needed for RegexpGenerator interface. - void HandleRegexp(const string& regexp); + void HandleRegexp(const std::string& regexp); private: StringGenerator strgen_; @@ -198,7 +198,7 @@ class PossibleMatchTester : public RegexpGenerator { // Processes a single generated regexp. // Checks that all accepted strings agree with the prefix range. -void PossibleMatchTester::HandleRegexp(const string& regexp) { +void PossibleMatchTester::HandleRegexp(const std::string& regexp) { regexps_++; VLOG(3) << CEscape(regexp); @@ -206,7 +206,7 @@ void PossibleMatchTester::HandleRegexp(const string& regexp) { RE2 re(regexp, RE2::Latin1); ASSERT_EQ(re.error(), ""); - string min, max; + std::string min, max; if(!re.PossibleMatchRange(&min, &max, 10)) { // There's no good max for "\\C*". Can't use strcmp // because sometimes it gets embedded in more diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc index bd0842f..c0b1fe5 100644 --- a/re2/testing/random_test.cc +++ b/re2/testing/random_test.cc @@ -22,11 +22,11 @@ namespace re2 { // (Always uses the same random seeds for reproducibility. // Can give different seeds on command line.) static void RandomTest(int maxatoms, int maxops, - const std::vector<string>& alphabet, - const std::vector<string>& ops, + const std::vector<std::string>& alphabet, + const std::vector<std::string>& ops, int maxstrlen, - const std::vector<string>& stralphabet, - const string& wrapper) { + const std::vector<std::string>& stralphabet, + const std::string& wrapper) { // Limit to smaller test cases in debug mode, // because everything is so much slower. if (RE2_DEBUG_MODE) { @@ -79,7 +79,7 @@ TEST(Random, BigEgrepCaptures) { // character classes like \d. (Adding larger character classes would // make for too many possibilities.) TEST(Random, Complicated) { - std::vector<string> ops = Split(" ", + std::vector<std::string> ops = Split(" ", "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? " "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} " "%s{2} %s{2,} %s{3,4} %s{4,5}"); @@ -87,11 +87,11 @@ TEST(Random, Complicated) { // Use (?:\b) and (?:\B) instead of \b and \B, // because PCRE rejects \b* but accepts (?:\b)*. // Ditto ^ and $. - std::vector<string> atoms = Split(" ", + std::vector<std::string> atoms = Split(" ", ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v " "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) " "a (a) b c - \\\\"); - std::vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); + std::vector<std::string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); RandomTest(10, 10, atoms, ops, 20, alphabet, ""); } diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index cae956c..2d692a6 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -176,10 +176,10 @@ TEST(RE2, Replace) { }; for (const ReplaceTest* t = tests; t->original != NULL; t++) { - string one(t->original); + std::string one(t->original); ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite)); ASSERT_EQ(one, t->single); - string all(t->original); + std::string all(t->original); ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) << "Got: " << all; ASSERT_EQ(all, t->global); @@ -188,7 +188,7 @@ TEST(RE2, Replace) { static void TestCheckRewriteString(const char* regexp, const char* rewrite, bool expect_ok) { - string error; + std::string error; RE2 exp(regexp); bool actual_ok = exp.CheckRewriteString(rewrite, &error); EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error; @@ -211,7 +211,7 @@ TEST(CheckRewriteString, all) { } TEST(RE2, Extract) { - string s; + std::string s; ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s)); ASSERT_EQ(s, "kremvax!boris"); @@ -225,9 +225,9 @@ TEST(RE2, Extract) { TEST(RE2, Consume) { RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace - string word; + std::string word; - string s(" aaa b!@#$@#$cccc"); + std::string s(" aaa b!@#$@#$cccc"); StringPiece input(s); ASSERT_TRUE(RE2::Consume(&input, r, &word)); @@ -238,7 +238,7 @@ TEST(RE2, Consume) { } TEST(RE2, ConsumeN) { - const string s(" one two three 4"); + const std::string s(" one two three 4"); StringPiece input(s); RE2::Arg argv[2]; @@ -248,7 +248,7 @@ TEST(RE2, ConsumeN) { EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one". // 1 arg - string word; + std::string word; argv[0] = &word; EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1)); EXPECT_EQ("two", word); @@ -263,9 +263,9 @@ TEST(RE2, ConsumeN) { TEST(RE2, FindAndConsume) { RE2 r("(\\w+)"); // matches a word - string word; + std::string word; - string s(" aaa b!@#$@#$cccc"); + std::string s(" aaa b!@#$@#$cccc"); StringPiece input(s); ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word)); @@ -285,7 +285,7 @@ TEST(RE2, FindAndConsume) { } TEST(RE2, FindAndConsumeN) { - const string s(" one two three 4"); + const std::string s(" one two three 4"); StringPiece input(s); RE2::Arg argv[2]; @@ -295,7 +295,7 @@ TEST(RE2, FindAndConsumeN) { EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one". // 1 arg - string word; + std::string word; argv[0] = &word; EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1)); EXPECT_EQ("two", word); @@ -310,9 +310,9 @@ TEST(RE2, FindAndConsumeN) { TEST(RE2, MatchNumberPeculiarity) { RE2 r("(foo)|(bar)|(baz)"); - string word1; - string word2; - string word3; + std::string word1; + std::string word2; + std::string word3; ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3)); ASSERT_EQ(word1, "foo"); @@ -328,7 +328,7 @@ TEST(RE2, MatchNumberPeculiarity) { ASSERT_EQ(word3, "baz"); ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3)); - string a; + std::string a; ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a)); ASSERT_EQ(a, ""); } @@ -351,7 +351,7 @@ TEST(RE2, Match) { ASSERT_EQ(group[2], "chrisr"); ASSERT_EQ(group[3], "9000"); - string all, host; + std::string all, host; int port; ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); ASSERT_EQ(all, "chrisr:9000"); @@ -361,7 +361,7 @@ TEST(RE2, Match) { static void TestRecursion(int size, const char* pattern) { // Fill up a string repeating the pattern given - string domain; + std::string domain; domain.resize(size); size_t patlen = strlen(pattern); for (int i = 0; i < size; i++) { @@ -374,9 +374,9 @@ static void TestRecursion(int size, const char* pattern) { // A meta-quoted string, interpreted as a pattern, should always match // the original unquoted string. -static void TestQuoteMeta(const string& unquoted, +static void TestQuoteMeta(const std::string& unquoted, const RE2::Options& options = RE2::DefaultOptions) { - string quoted = RE2::QuoteMeta(unquoted); + std::string quoted = RE2::QuoteMeta(unquoted); RE2 re(quoted, options); EXPECT_TRUE(RE2::FullMatch(unquoted, re)) << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; @@ -385,9 +385,9 @@ static void TestQuoteMeta(const string& unquoted, // A meta-quoted string, interpreted as a pattern, should always match // the original unquoted string. static void NegativeTestQuoteMeta( - const string& unquoted, const string& should_not_match, + const std::string& unquoted, const std::string& should_not_match, const RE2::Options& options = RE2::DefaultOptions) { - string quoted = RE2::QuoteMeta(unquoted); + std::string quoted = RE2::QuoteMeta(unquoted); RE2 re(quoted, options); EXPECT_FALSE(RE2::FullMatch(should_not_match, re)) << "Unquoted='" << unquoted << "', quoted='" << quoted << "'."; @@ -440,7 +440,7 @@ TEST(QuoteMeta, UTF8) { } TEST(QuoteMeta, HasNull) { - string has_null; + std::string has_null; // string with one null character has_null += '\0'; @@ -543,14 +543,14 @@ TEST(Capture, NamedGroups) { { RE2 re("(hello world)"); ASSERT_EQ(re.NumberOfCapturingGroups(), 1); - const std::map<string, int>& m = re.NamedCapturingGroups(); + const std::map<std::string, int>& m = re.NamedCapturingGroups(); ASSERT_EQ(m.size(), 0); } { RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); ASSERT_EQ(re.NumberOfCapturingGroups(), 6); - const std::map<string, int>& m = re.NamedCapturingGroups(); + const std::map<std::string, int>& m = re.NamedCapturingGroups(); ASSERT_EQ(m.size(), 4); ASSERT_EQ(m.find("A")->second, 1); ASSERT_EQ(m.find("B")->second, 2); @@ -563,7 +563,7 @@ TEST(RE2, CapturedGroupTest) { RE2 re("directions from (?P<S>.*) to (?P<D>.*)"); int num_groups = re.NumberOfCapturingGroups(); EXPECT_EQ(2, num_groups); - string args[4]; + std::string args[4]; RE2::Arg arg0(&args[0]); RE2::Arg arg1(&args[1]); RE2::Arg arg2(&args[2]); @@ -572,7 +572,7 @@ TEST(RE2, CapturedGroupTest) { const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3}; EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose", re, matches, num_groups)); - const std::map<string, int>& named_groups = re.NamedCapturingGroups(); + const std::map<std::string, int>& named_groups = re.NamedCapturingGroups(); EXPECT_TRUE(named_groups.find("S") != named_groups.end()); EXPECT_TRUE(named_groups.find("D") != named_groups.end()); @@ -619,7 +619,7 @@ TEST(RE2, PartialMatchN) { EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1)); // Multi-arg - string s; + std::string s; argv[1] = &s; EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2)); EXPECT_EQ(42, i); @@ -662,10 +662,10 @@ TEST(RE2, FullMatchIntegerArg) { } TEST(RE2, FullMatchStringArg) { - string s; + std::string s; // String-arg ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s)); - ASSERT_EQ(s, string("ell")); + ASSERT_EQ(s, std::string("ell")); } TEST(RE2, FullMatchStringPieceArg) { @@ -680,10 +680,10 @@ TEST(RE2, FullMatchStringPieceArg) { TEST(RE2, FullMatchMultiArg) { int i; - string s; + std::string s; // Multi-arg ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); - ASSERT_EQ(s, string("ruby")); + ASSERT_EQ(s, std::string("ruby")); ASSERT_EQ(i, 1234); } @@ -703,7 +703,7 @@ TEST(RE2, FullMatchN) { EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1)); // Multi-arg - string s; + std::string s; argv[1] = &s; EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2)); EXPECT_EQ(42, i); @@ -713,26 +713,26 @@ TEST(RE2, FullMatchN) { TEST(RE2, FullMatchIgnoredArg) { int i; - string s; + std::string s; // Old-school NULL should be ignored. ASSERT_TRUE( RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i)); - ASSERT_EQ(s, string("ruby")); + ASSERT_EQ(s, std::string("ruby")); ASSERT_EQ(i, 1234); // C++11 nullptr should also be ignored. ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i)); - ASSERT_EQ(s, string("rubz")); + ASSERT_EQ(s, std::string("rubz")); ASSERT_EQ(i, 1235); } TEST(RE2, FullMatchTypedNullArg) { - string s; + std::string s; // Ignore non-void* NULL arg ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL)); - ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (string*)NULL)); + ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL)); ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL)); ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL)); ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL)); @@ -775,7 +775,7 @@ TEST(RE2, NULTerminated) { TEST(RE2, FullMatchTypeTests) { // Type tests - string zeros(1000, '0'); + std::string zeros(1000, '0'); { char c; ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c)); @@ -837,7 +837,7 @@ TEST(RE2, FullMatchTypeTests) { int64_t v; static const int64_t max = INT64_C(0x7fffffffffffffff); static const int64_t min = -max - 1; - string str; + std::string str; ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100); ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100); @@ -862,7 +862,7 @@ TEST(RE2, FullMatchTypeTests) { uint64_t v; int64_t v2; static const uint64_t max = UINT64_C(0xffffffffffffffff); - string str; + std::string str; ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100); ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100); @@ -877,7 +877,7 @@ TEST(RE2, FullMatchTypeTests) { } TEST(RE2, FloatingPointFullMatchTypes) { - string zeros(1000, '0'); + std::string zeros(1000, '0'); { float v; ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100); @@ -1056,7 +1056,7 @@ TEST(RE2, FullMatchArgCount) { TEST(RE2, Accessors) { // Check the pattern() accessor { - const string kPattern = "http://([^/]+)/.*"; + const std::string kPattern = "http://([^/]+)/.*"; const RE2 re(kPattern); ASSERT_EQ(kPattern, re.pattern()); } @@ -1094,13 +1094,13 @@ TEST(RE2, UTF8) { // Check that '.' matches one byte or UTF-8 character // according to the mode. - string s; + std::string s; RE2 re_test3("(.)", RE2::Latin1); ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s)); - ASSERT_EQ(s, string("\xe6")); + ASSERT_EQ(s, std::string("\xe6")); RE2 re_test4("(.)"); ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s)); - ASSERT_EQ(s, string("\xe6\x97\xa5")); + ASSERT_EQ(s, std::string("\xe6\x97\xa5")); // Check that string matches itself in either mode RE2 re_test5(utf8_string, RE2::Latin1); @@ -1121,7 +1121,7 @@ TEST(RE2, UngreedyUTF8) { { // This code always worked. const char* pattern = "\\w+X"; - const string target = "a aX"; + const std::string target = "a aX"; RE2 match_sentence(pattern, RE2::Latin1); RE2 match_sentence_re(pattern); @@ -1130,7 +1130,7 @@ TEST(RE2, UngreedyUTF8) { } { const char* pattern = "(?U)\\w+X"; - const string target = "a aX"; + const std::string target = "a aX"; RE2 match_sentence(pattern, RE2::Latin1); ASSERT_EQ(match_sentence.error(), ""); RE2 match_sentence_re(pattern); @@ -1185,7 +1185,7 @@ TEST(RE2, NoCrash) { { RE2 re(".{512}x", RE2::Quiet); ASSERT_TRUE(re.ok()); - string s; + std::string s; s.append(515, 'c'); s.append("x"); ASSERT_TRUE(RE2::PartialMatch(s, re)); @@ -1210,7 +1210,7 @@ TEST(RE2, BigCountedRepetition) { RE2 re(".{512}x", opt); ASSERT_TRUE(re.ok()); - string s; + std::string s; s.append(515, 'c'); s.append("x"); ASSERT_TRUE(RE2::PartialMatch(s, re)); @@ -1221,8 +1221,8 @@ TEST(RE2, DeepRecursion) { // segmentation violation due to stack overflow before pcre was // patched. // Again, a PCRE legacy test. RE2 doesn't recurse. - string comment("x*"); - string a(131072, 'a'); + std::string comment("x*"); + std::string a(131072, 'a'); comment += a; comment += "*x"; RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)"); @@ -1232,8 +1232,8 @@ TEST(RE2, DeepRecursion) { // Suggested by Josh Hyman. Failed when SearchOnePass was // not implementing case-folding. TEST(CaseInsensitive, MatchAndConsume) { - string result; - string text = "A fish named *Wanda*"; + std::string result; + std::string text = "A fish named *Wanda*"; StringPiece sp(text); EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result)); @@ -1243,7 +1243,7 @@ TEST(CaseInsensitive, MatchAndConsume) { // RE2 should permit implicit conversions from string, StringPiece, const char*, // and C string literals. TEST(RE2, ImplicitConversions) { - string re_string("."); + std::string re_string("."); StringPiece re_stringpiece("."); const char* re_cstring = "."; EXPECT_TRUE(RE2::PartialMatch("e", re_string)); @@ -1255,12 +1255,12 @@ TEST(RE2, ImplicitConversions) { // Bugs introduced by 8622304 TEST(RE2, CL8622304) { // reported by ingow - string dir; + std::string dir; EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails // reported by jacobsa - string key, val; + std::string key, val; EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true", "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?", &key, @@ -1364,8 +1364,8 @@ TEST(RE2, BitstateCaptureBug) { // C++ version of bug 609710. TEST(RE2, UnicodeClasses) { - const string str = "ABCDEFGHIèšæ°¸é‹’"; - string a, b, c; + const std::string str = "ABCDEFGHIèšæ°¸é‹’"; + std::string a, b, c; EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}")); EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}")); @@ -1490,7 +1490,7 @@ TEST(RE2, NullVsEmptyStringSubmatches) { TEST(RE2, Bug1816809) { RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))"); StringPiece piece("llx-3;llx4"); - string x; + std::string x; EXPECT_TRUE(RE2::Consume(&piece, re, &x)); } @@ -1507,8 +1507,8 @@ TEST(RE2, CapturingGroupNames) { // 12 3 45 6 7 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))"); EXPECT_TRUE(re.ok()); - const std::map<int, string>& have = re.CapturingGroupNames(); - std::map<int, string> want; + const std::map<int, std::string>& have = re.CapturingGroupNames(); + std::map<int, std::string> want; want[3] = "G2"; want[6] = "G2"; want[7] = "G1"; @@ -1582,7 +1582,7 @@ TEST(RE2, Bug18523943) { RE2 re((const char*)b, opt); ASSERT_TRUE(re.ok()); - string s1; + std::string s1; ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1)); } @@ -1606,7 +1606,7 @@ TEST(RE2, Bug26356109) { RE2 re("a\\C*?c|a\\C*?b"); ASSERT_TRUE(re.ok()); - string s = "abc"; + std::string s = "abc"; StringPiece m; ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1)); @@ -1620,7 +1620,7 @@ TEST(RE2, Issue104) { // RE2::GlobalReplace always advanced by one byte when the empty string was // matched, which would clobber any rune that is longer than one byte. - string s = "bc"; + std::string s = "bc"; ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d")); ASSERT_EQ("dbdcd", s); diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index 8b82e0b..968fb86 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -34,6 +34,7 @@ void Test() { Prog* prog = re->CompileToProg(0); CHECK(prog); CHECK(prog->IsOnePass()); + CHECK(prog->CanBitState()); const char* text = "650-253-0001"; StringPiece sp[4]; CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); @@ -61,6 +62,7 @@ void MemoryUsage() { Prog* prog = re->CompileToProg(0); CHECK(prog); CHECK(prog->IsOnePass()); + CHECK(prog->CanBitState()); fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth()); mc.Reset(); @@ -139,7 +141,7 @@ ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; // Generate random text that won't contain the search string, // to test worst-case search behavior. -void MakeText(string* text, int nbytes) { +void MakeText(std::string* text, int nbytes) { srand(1); text->resize(nbytes); for (int i = 0; i < nbytes; i++) { @@ -156,7 +158,7 @@ void MakeText(string* text, int nbytes) { // the text for regexp iters times. void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, nbytes); BenchmarkMemoryUsage(); StartBenchmarkTiming(); @@ -261,10 +263,10 @@ BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs( void SearchBigFixed(int iters, int nbytes, SearchImpl* search) { StopBenchmarkTiming(); - string s; + std::string s; s.append(nbytes/2, 'x'); - string regexp = "^" + s + ".*$"; - string t; + std::string regexp = "^" + s + ".*$"; + std::string t; MakeText(&t, nbytes/2); s += t; BenchmarkMemoryUsage(); @@ -289,7 +291,7 @@ BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs void FindAndConsume(int iters, int nbytes) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, nbytes); s.append("Hello World"); StartBenchmarkTiming(); @@ -309,7 +311,7 @@ BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, nbytes); BenchmarkMemoryUsage(); StartBenchmarkTiming(); @@ -383,7 +385,7 @@ BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPU void SearchAltMatch(int iters, int nbytes, SearchImpl* search) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, nbytes); BenchmarkMemoryUsage(); StartBenchmarkTiming(); @@ -604,7 +606,7 @@ BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs()); void Parse1SplitBig1(int iters, void (*run)(int, const char*, const StringPiece&)) { - string s; + std::string s; s.append(100000, 'x'); s.append("650-253-0001"); BenchmarkMemoryUsage(); @@ -624,7 +626,7 @@ BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs()); void Parse1SplitBig2(int iters, void (*run)(int, const char*, const StringPiece&)) { - string s; + std::string s; s.append("650-253-"); s.append(100000, '0'); BenchmarkMemoryUsage(); @@ -643,7 +645,7 @@ BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs()); // Benchmark: measure time required to parse (but not execute) // a simple regular expression. -void ParseRegexp(int iters, const string& regexp) { +void ParseRegexp(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); @@ -651,7 +653,7 @@ void ParseRegexp(int iters, const string& regexp) { } } -void SimplifyRegexp(int iters, const string& regexp) { +void SimplifyRegexp(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); @@ -662,7 +664,7 @@ void SimplifyRegexp(int iters, const string& regexp) { } } -void NullWalkRegexp(int iters, const string& regexp) { +void NullWalkRegexp(int iters, const std::string& regexp) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); for (int i = 0; i < iters; i++) { @@ -671,7 +673,7 @@ void NullWalkRegexp(int iters, const string& regexp) { re->Decref(); } -void SimplifyCompileRegexp(int iters, const string& regexp) { +void SimplifyCompileRegexp(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); @@ -685,7 +687,7 @@ void SimplifyCompileRegexp(int iters, const string& regexp) { } } -void CompileRegexp(int iters, const string& regexp) { +void CompileRegexp(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); @@ -696,7 +698,7 @@ void CompileRegexp(int iters, const string& regexp) { } } -void CompileToProg(int iters, const string& regexp) { +void CompileToProg(int iters, const std::string& regexp) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); for (int i = 0; i < iters; i++) { @@ -707,7 +709,7 @@ void CompileToProg(int iters, const string& regexp) { re->Decref(); } -void CompileByteMap(int iters, const string& regexp) { +void CompileByteMap(int iters, const std::string& regexp) { Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); CHECK(re); Prog* prog = re->CompileToProg(0); @@ -719,21 +721,22 @@ void CompileByteMap(int iters, const string& regexp) { re->Decref(); } -void CompilePCRE(int iters, const string& regexp) { +void CompilePCRE(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { PCRE re(regexp, PCRE::UTF8); CHECK_EQ(re.error(), ""); } } -void CompileRE2(int iters, const string& regexp) { +void CompileRE2(int iters, const std::string& regexp) { for (int i = 0; i < iters; i++) { RE2 re(regexp); CHECK_EQ(re.error(), ""); } } -void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) { +void RunBuild(int iters, const std::string& regexp, + void (*run)(int, const std::string&)) { run(iters, regexp); SetBenchmarkItemsProcessed(iters); } @@ -770,7 +773,7 @@ BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); // the text for regexp iters times. void SearchPhone(int iters, int nbytes, ParseImpl* search) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, nbytes); s.append("(650) 253-0001"); BenchmarkMemoryUsage(); @@ -799,7 +802,7 @@ TODO(rsc): Make this work again. // brute force method would generate a string of length n * 2^n, but this // generates a string of length n + 2^n - 1 called a De Bruijn cycle. // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. -static string DeBruijnString(int n) { +static std::string DeBruijnString(int n) { CHECK_LT(n, 8*sizeof(int)); CHECK_GT(n, 0); @@ -807,7 +810,7 @@ static string DeBruijnString(int n) { for (int i = 0; i < 1<<n; i++) did[i] = false; - string s; + std::string s; for (int i = 0; i < n-1; i++) s.append("0"); int bits = 0; @@ -828,8 +831,8 @@ static string DeBruijnString(int n) { } void CacheFill(int iters, int n, SearchImpl *srch) { - string s = DeBruijnString(n+1); - string t; + std::string s = DeBruijnString(n+1); + std::string t; for (int i = n+1; i < 20; i++) { t = s + s; using std::swap; @@ -932,6 +935,7 @@ void SearchBitState(int iters, const char* regexp, const StringPiece& text, CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), expect_match); delete prog; @@ -1019,6 +1023,7 @@ void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); for (int i = 0; i < iters; i++) CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), expect_match); @@ -1088,6 +1093,7 @@ void Parse3BitState(int iters, const char* regexp, const StringPiece& text) { CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); StringPiece sp[4]; // 4 because sp[0] is whole match. CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); delete prog; @@ -1158,6 +1164,7 @@ void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); StringPiece sp[4]; // 4 because sp[0] is whole match. for (int i = 0; i < iters; i++) CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); @@ -1233,6 +1240,7 @@ void Parse1BitState(int iters, const char* regexp, const StringPiece& text) { CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); StringPiece sp[2]; // 2 because sp[0] is whole match. CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); delete prog; @@ -1290,6 +1298,7 @@ void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text CHECK(re); Prog* prog = re->CompileToProg(0); CHECK(prog); + CHECK(prog->CanBitState()); StringPiece sp[2]; // 2 because sp[0] is whole match. for (int i = 0; i < iters; i++) CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); @@ -1403,7 +1412,7 @@ BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); #endif BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); -static string http_text = +static std::string http_text = "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1"; @@ -1428,7 +1437,7 @@ BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); #endif BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); -static string smallhttp_text = +static std::string smallhttp_text = "GET /abc HTTP/1.1"; void SmallHTTPPartialMatchPCRE(int n) { @@ -1496,7 +1505,7 @@ BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); void FullMatchPCRE(int iter, int n, const char *regexp) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, n); s += "ABCDEFGHIJ"; BenchmarkMemoryUsage(); @@ -1509,7 +1518,7 @@ void FullMatchPCRE(int iter, int n, const char *regexp) { void FullMatchRE2(int iter, int n, const char *regexp) { StopBenchmarkTiming(); - string s; + std::string s; MakeText(&s, n); s += "ABCDEFGHIJ"; BenchmarkMemoryUsage(); @@ -1548,8 +1557,8 @@ void PossibleMatchRangeCommon(int iter, const char* regexp) { StopBenchmarkTiming(); RE2 re(regexp); StartBenchmarkTiming(); - string min; - string max; + std::string min; + std::string max; const int kMaxLen = 16; for (int i = 0; i < iter; i++) { CHECK(re.PossibleMatchRange(&min, &max, kMaxLen)); diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc index c0f26fe..1e4d3da 100644 --- a/re2/testing/regexp_generator.cc +++ b/re2/testing/regexp_generator.cc @@ -38,7 +38,7 @@ namespace re2 { // Returns a vector of the egrep regexp operators. -const std::vector<string>& RegexpGenerator::EgrepOps() { +const std::vector<std::string>& RegexpGenerator::EgrepOps() { static const char *ops[] = { "%s%s", "%s|%s", @@ -47,13 +47,13 @@ const std::vector<string>& RegexpGenerator::EgrepOps() { "%s?", "%s\\C*", }; - static std::vector<string> v(ops, ops + arraysize(ops)); + static std::vector<std::string> v(ops, ops + arraysize(ops)); return v; } RegexpGenerator::RegexpGenerator(int maxatoms, int maxops, - const std::vector<string>& atoms, - const std::vector<string>& ops) + const std::vector<std::string>& atoms, + const std::vector<std::string>& ops) : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) { // Degenerate case. if (atoms_.empty()) @@ -65,7 +65,7 @@ RegexpGenerator::RegexpGenerator(int maxatoms, int maxops, // Generates all possible regular expressions (within the parameters), // calling HandleRegexp for each one. void RegexpGenerator::Generate() { - std::vector<string> postfix; + std::vector<std::string> postfix; GeneratePostfix(&postfix, 0, 0, 0); } @@ -74,13 +74,13 @@ void RegexpGenerator::GenerateRandom(int32_t seed, int n) { rng_.seed(seed); for (int i = 0; i < n; i++) { - std::vector<string> postfix; + std::vector<std::string> postfix; GenerateRandomPostfix(&postfix, 0, 0, 0); } } // Counts and returns the number of occurrences of "%s" in s. -static int CountArgs(const string& s) { +static int CountArgs(const std::string& s) { const char *p = s.c_str(); int n = 0; while ((p = strstr(p, "%s")) != NULL) { @@ -103,8 +103,8 @@ static int CountArgs(const string& s) { // // The initial call should be GeneratePostfix([empty vector], 0, 0, 0). // -void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk, - int ops, int atoms) { +void RegexpGenerator::GeneratePostfix(std::vector<std::string>* post, + int nstk, int ops, int atoms) { if (nstk == 1) RunPostfix(*post); @@ -126,7 +126,7 @@ void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk, // Add operators if there are enough arguments. if (ops < maxops_) { for (size_t i = 0; i < ops_.size(); i++) { - const string& fmt = ops_[i]; + const std::string& fmt = ops_[i]; int nargs = CountArgs(fmt); if (nargs <= nstk) { post->push_back(fmt); @@ -139,8 +139,8 @@ void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk, // Generates a random postfix command sequence. // Stops and returns true once a single sequence has been generated. -bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk, - int ops, int atoms) { +bool RegexpGenerator::GenerateRandomPostfix(std::vector<std::string>* post, + int nstk, int ops, int atoms) { std::uniform_int_distribution<int> random_stop(0, maxatoms_ - atoms); std::uniform_int_distribution<int> random_bit(0, 1); std::uniform_int_distribution<int> random_ops_index( @@ -163,7 +163,7 @@ bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk, // Add operators if there are enough arguments. if (ops < maxops_ && random_bit(rng_) == 0) { - const string& fmt = ops_[random_ops_index(rng_)]; + const std::string& fmt = ops_[random_ops_index(rng_)]; int nargs = CountArgs(fmt); if (nargs <= nstk) { post->push_back(fmt); @@ -189,8 +189,8 @@ bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk, // Interprets the postfix command sequence to create a regular expression // passed to HandleRegexp. The results of operators like %s|%s are wrapped // in (?: ) to avoid needing to maintain a precedence table. -void RegexpGenerator::RunPostfix(const std::vector<string>& post) { - std::stack<string> regexps; +void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) { + std::stack<std::string> regexps; for (size_t i = 0; i < post.size(); i++) { switch (CountArgs(post[i])) { default: @@ -199,15 +199,15 @@ void RegexpGenerator::RunPostfix(const std::vector<string>& post) { regexps.push(post[i]); break; case 1: { - string a = regexps.top(); + std::string a = regexps.top(); regexps.pop(); regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")"); break; } case 2: { - string b = regexps.top(); + std::string b = regexps.top(); regexps.pop(); - string a = regexps.top(); + std::string a = regexps.top(); regexps.pop(); regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) + @@ -238,14 +238,14 @@ void RegexpGenerator::RunPostfix(const std::vector<string>& post) { } // Split s into an vector of strings, one for each UTF-8 character. -std::vector<string> Explode(const StringPiece& s) { - std::vector<string> v; +std::vector<std::string> Explode(const StringPiece& s) { + std::vector<std::string> v; for (const char *q = s.begin(); q < s.end(); ) { const char* p = q; Rune r; q += chartorune(&r, q); - v.push_back(string(p, q - p)); + v.push_back(std::string(p, q - p)); } return v; @@ -253,8 +253,8 @@ std::vector<string> Explode(const StringPiece& s) { // Split string everywhere a substring is found, returning // vector of pieces. -std::vector<string> Split(const StringPiece& sep, const StringPiece& s) { - std::vector<string> v; +std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) { + std::vector<std::string> v; if (sep.size() == 0) return Explode(s); @@ -262,14 +262,14 @@ std::vector<string> Split(const StringPiece& sep, const StringPiece& s) { const char *p = s.begin(); for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) { if (StringPiece(q, sep.size()) == sep) { - v.push_back(string(p, q - p)); + v.push_back(std::string(p, q - p)); p = q + sep.size(); q = p - 1; // -1 for ++ in loop continue; } } if (p < s.end()) - v.push_back(string(p, s.end() - p)); + v.push_back(std::string(p, s.end() - p)); return v; } diff --git a/re2/testing/regexp_generator.h b/re2/testing/regexp_generator.h index b746399..7d72aff 100644 --- a/re2/testing/regexp_generator.h +++ b/re2/testing/regexp_generator.h @@ -29,8 +29,9 @@ namespace re2 { // class RegexpGenerator { public: - RegexpGenerator(int maxatoms, int maxops, const std::vector<string>& atoms, - const std::vector<string>& ops); + RegexpGenerator(int maxatoms, int maxops, + const std::vector<std::string>& atoms, + const std::vector<std::string>& ops); virtual ~RegexpGenerator() {} // Generates all the regular expressions, calling HandleRegexp(re) for each. @@ -40,22 +41,23 @@ class RegexpGenerator { void GenerateRandom(int32_t seed, int n); // Handles a regular expression. Must be provided by subclass. - virtual void HandleRegexp(const string& regexp) = 0; + virtual void HandleRegexp(const std::string& regexp) = 0; // The egrep regexp operators: * + ? | and concatenation. - static const std::vector<string>& EgrepOps(); + static const std::vector<std::string>& EgrepOps(); private: - void RunPostfix(const std::vector<string>& post); - void GeneratePostfix(std::vector<string>* post, int nstk, int ops, int lits); - bool GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops, - int lits); - - int maxatoms_; // Maximum number of atoms allowed in expr. - int maxops_; // Maximum number of ops allowed in expr. - std::vector<string> atoms_; // Possible atoms. - std::vector<string> ops_; // Possible ops. - std::minstd_rand0 rng_; // Random number generator. + void RunPostfix(const std::vector<std::string>& post); + void GeneratePostfix(std::vector<std::string>* post, + int nstk, int ops, int lits); + bool GenerateRandomPostfix(std::vector<std::string>* post, + int nstk, int ops, int lits); + + int maxatoms_; // Maximum number of atoms allowed in expr. + int maxops_; // Maximum number of ops allowed in expr. + std::vector<std::string> atoms_; // Possible atoms. + std::vector<std::string> ops_; // Possible ops. + std::minstd_rand0 rng_; // Random number generator. RegexpGenerator(const RegexpGenerator&) = delete; RegexpGenerator& operator=(const RegexpGenerator&) = delete; @@ -64,11 +66,11 @@ class RegexpGenerator { // Helpers for preparing arguments to RegexpGenerator constructor. // Returns one string for each character in s. -std::vector<string> Explode(const StringPiece& s); +std::vector<std::string> Explode(const StringPiece& s); // Splits string everywhere sep is found, returning // vector of pieces. -std::vector<string> Split(const StringPiece& sep, const StringPiece& s); +std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s); } // namespace re2 diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc index 7830322..f7e7e92 100644 --- a/re2/testing/regexp_test.cc +++ b/re2/testing/regexp_test.cc @@ -38,7 +38,7 @@ TEST(Regexp, BigConcat) { ASSERT_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref(); Regexp* re = Regexp::Concat(v.data(), static_cast<int>(v.size()), Regexp::NoParseFlags); - ASSERT_EQ(re->ToString(), string(v.size(), 'x')); + ASSERT_EQ(re->ToString(), std::string(v.size(), 'x')); re->Decref(); ASSERT_EQ(x->Ref(), 1) << x->Ref(); x->Decref(); @@ -51,11 +51,11 @@ TEST(Regexp, NamedCaptures) { "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status); EXPECT_TRUE(status.ok()); EXPECT_EQ(4, x->NumCaptures()); - const std::map<string, int>* have = x->NamedCaptures(); + const std::map<std::string, int>* have = x->NamedCaptures(); EXPECT_TRUE(have != NULL); EXPECT_EQ(2, have->size()); // there are only two named groups in // the regexp: 'g1' and 'g2'. - std::map<string, int> want; + std::map<std::string, int> want; want["g1"] = 1; want["g2"] = 3; EXPECT_EQ(want, *have); @@ -70,10 +70,10 @@ TEST(Regexp, CaptureNames) { "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status); EXPECT_TRUE(status.ok()); EXPECT_EQ(4, x->NumCaptures()); - const std::map<int, string>* have = x->CaptureNames(); + const std::map<int, std::string>* have = x->CaptureNames(); EXPECT_TRUE(have != NULL); EXPECT_EQ(3, have->size()); - std::map<int, string> want; + std::map<int, std::string> want; want[1] = "g1"; want[3] = "g2"; want[4] = "g1"; diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc index 3f18d9b..749c5ad 100644 --- a/re2/testing/required_prefix_test.cc +++ b/re2/testing/required_prefix_test.cc @@ -49,18 +49,18 @@ TEST(RequiredPrefix, SimpleTests) { Regexp* re = Regexp::Parse(t.regexp, flags, NULL); ASSERT_TRUE(re != NULL) << " " << t.regexp; - string p; + std::string p; bool f; Regexp* s; ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s)) << " " << t.regexp << " " << (j==0 ? "latin1" : "utf") << " " << re->Dump(); if (t.return_value) { - ASSERT_EQ(p, string(t.prefix)) + ASSERT_EQ(p, std::string(t.prefix)) << " " << t.regexp << " " << (j==0 ? "latin1" : "utf"); ASSERT_EQ(f, t.foldcase) << " " << t.regexp << " " << (j==0 ? "latin1" : "utf"); - ASSERT_EQ(s->ToString(), string(t.suffix)) + ASSERT_EQ(s->ToString(), std::string(t.suffix)) << " " << t.regexp << " " << (j==0 ? "latin1" : "utf"); s->Decref(); } diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc index 8adef6c..43a3952 100644 --- a/re2/testing/search_test.cc +++ b/re2/testing/search_test.cc @@ -307,6 +307,7 @@ RegexpTest simple_tests[] = { // Former bugs. { "a\\C*|ba\\C", "baba" }, + { "\\w*I\\w*", "Inc." }, }; TEST(Regexp, SearchTests) { @@ -319,7 +320,7 @@ TEST(Regexp, SearchTests) { if (LOGGING) { // Build a dummy ExhaustiveTest call that will trigger just // this one test, so that we log the test case. - std::vector<string> atom, alpha, ops; + std::vector<std::string> atom, alpha, ops; atom.push_back(t.regexp); alpha.push_back(t.text); ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", ""); diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc index 5cdc11f..ad20ed7 100644 --- a/re2/testing/set_test.cc +++ b/re2/testing/set_test.cc @@ -204,7 +204,7 @@ TEST(Set, Prefix) { TEST(Set, OutOfMemory) { RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED); - string a(10000, 'a'); + std::string a(10000, 'a'); ASSERT_EQ(s.Add(a, NULL), 0); ASSERT_EQ(s.Compile(), true); diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc index feef200..030cc45 100644 --- a/re2/testing/string_generator.cc +++ b/re2/testing/string_generator.cc @@ -18,7 +18,7 @@ namespace re2 { StringGenerator::StringGenerator(int maxlen, - const std::vector<string>& alphabet) + const std::vector<std::string>& alphabet) : maxlen_(maxlen), alphabet_(alphabet), generate_null_(false), random_(false), nrandom_(0) { diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h index 5a36617..6184176 100644 --- a/re2/testing/string_generator.h +++ b/re2/testing/string_generator.h @@ -21,7 +21,7 @@ namespace re2 { class StringGenerator { public: - StringGenerator(int maxlen, const std::vector<string>& alphabet); + StringGenerator(int maxlen, const std::vector<std::string>& alphabet); ~StringGenerator() {} const StringPiece& Next(); @@ -41,12 +41,12 @@ class StringGenerator { bool RandomDigits(); // Global state. - int maxlen_; // Maximum length string to generate. - std::vector<string> alphabet_; // Alphabet, one string per letter. + int maxlen_; // Maximum length string to generate. + std::vector<std::string> alphabet_; // Alphabet, one string per letter. // Iteration state. StringPiece sp_; // Last StringPiece returned by Next(). - string s_; // String data in last StringPiece returned by Next(). + std::string s_; // String data in last StringPiece returned by Next(). bool hasnext_; // Whether Next() can be called again. std::vector<int> digits_; // Alphabet indices for next string. bool generate_null_; // Whether to generate a NULL StringPiece next. diff --git a/re2/testing/string_generator_test.cc b/re2/testing/string_generator_test.cc index 2c040a3..d0f84f4 100644 --- a/re2/testing/string_generator_test.cc +++ b/re2/testing/string_generator_test.cc @@ -31,12 +31,12 @@ static int64_t IntegerPower(int i, int e) { // If all of these hold, the StringGenerator is behaving. // Assumes that the alphabet is sorted, so that the generated // strings can just be compared lexicographically. -static void RunTest(int len, const string& alphabet, bool donull) { +static void RunTest(int len, const std::string& alphabet, bool donull) { StringGenerator g(len, Explode(alphabet)); int n = 0; int last_l = -1; - string last_s; + std::string last_s; if (donull) { g.GenerateNULL(); @@ -47,7 +47,7 @@ static void RunTest(int len, const string& alphabet, bool donull) { } while (g.HasNext()) { - string s = string(g.Next()); + std::string s = std::string(g.Next()); n++; // Check that all characters in s appear in alphabet. diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc index c37aada..92b5972 100644 --- a/re2/testing/tester.cc +++ b/re2/testing/tester.cc @@ -66,7 +66,7 @@ static uint32_t Engines() { cached_engines = ~0; } else { for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) - if (FLAGS_regexp_engines.find(EngineName(i)) != string::npos) + if (FLAGS_regexp_engines.find(EngineName(i)) != std::string::npos) cached_engines |= 1<<i; } @@ -97,7 +97,8 @@ typedef TestInstance::Result Result; // Formats a single capture range s in text in the form (a,b) // where a and b are the starting and ending offsets of s in text. -static string FormatCapture(const StringPiece& text, const StringPiece& s) { +static std::string FormatCapture(const StringPiece& text, + const StringPiece& s) { if (s.begin() == NULL) return "(?,?)"; return StringPrintf("(%td,%td)", @@ -113,7 +114,7 @@ static bool NonASCII(const StringPiece& text) { } // Returns string representation of match kind. -static string FormatKind(Prog::MatchKind kind) { +static std::string FormatKind(Prog::MatchKind kind) { switch (kind) { case Prog::kFullMatch: return "full match"; @@ -128,7 +129,7 @@ static string FormatKind(Prog::MatchKind kind) { } // Returns string representation of anchor kind. -static string FormatAnchor(Prog::Anchor anchor) { +static std::string FormatAnchor(Prog::Anchor anchor) { switch (anchor) { case Prog::kAnchored: return "anchored"; @@ -140,7 +141,7 @@ static string FormatAnchor(Prog::Anchor anchor) { struct ParseMode { Regexp::ParseFlags parse_flags; - string desc; + std::string desc; }; static const Regexp::ParseFlags single_line = @@ -156,7 +157,7 @@ static ParseMode parse_modes[] = { { multi_line|Regexp::Latin1, "multiline, latin1" }, }; -static string FormatMode(Regexp::ParseFlags flags) { +static std::string FormatMode(Regexp::ParseFlags flags) { for (int i = 0; i < arraysize(parse_modes); i++) if (parse_modes[i].parse_flags == flags) return parse_modes[i].desc; @@ -220,7 +221,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind, } // Create re string that will be used for RE and RE2. - string re = string(regexp_str); + std::string re = std::string(regexp_str); // Accomodate flags. // Regexp::Latin1 will be accomodated below. if (!(flags & Regexp::OneLine)) @@ -364,8 +365,8 @@ void TestInstance::RunSearch(Engine type, case kEngineOnePass: if (prog_ == NULL || - anchor == Prog::kUnanchored || !prog_->IsOnePass() || + anchor == Prog::kUnanchored || nsubmatch > Prog::kMaxOnePassCapture) { result->skipped = true; break; @@ -376,7 +377,8 @@ void TestInstance::RunSearch(Engine type, break; case kEngineBitState: - if (prog_ == NULL) { + if (prog_ == NULL || + !prog_->CanBitState()) { result->skipped = true; break; } diff --git a/re2/tostring.cc b/re2/tostring.cc index 278c310..2d06551 100644 --- a/re2/tostring.cc +++ b/re2/tostring.cc @@ -28,7 +28,7 @@ enum { }; // Helper function. See description below. -static void AppendCCRange(string* t, Rune lo, Rune hi); +static void AppendCCRange(std::string* t, Rune lo, Rune hi); // Walker to generate string in s_. // The arg pointers are actually integers giving the @@ -36,7 +36,7 @@ static void AppendCCRange(string* t, Rune lo, Rune hi); // The child_args are always NULL. class ToStringWalker : public Regexp::Walker<int> { public: - explicit ToStringWalker(string* t) : t_(t) {} + explicit ToStringWalker(std::string* t) : t_(t) {} virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, @@ -46,14 +46,14 @@ class ToStringWalker : public Regexp::Walker<int> { } private: - string* t_; // The string the walker appends to. + std::string* t_; // The string the walker appends to. ToStringWalker(const ToStringWalker&) = delete; ToStringWalker& operator=(const ToStringWalker&) = delete; }; -string Regexp::ToString() { - string t; +std::string Regexp::ToString() { + std::string t; ToStringWalker w(&t); w.WalkExponential(this, PrecToplevel, 100000); if (w.stopped_early()) @@ -126,7 +126,7 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { return nprec; } -static void AppendLiteral(string *t, Rune r, bool foldcase) { +static void AppendLiteral(std::string *t, Rune r, bool foldcase) { if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { t->append(1, '\\'); t->append(1, static_cast<char>(r)); @@ -303,7 +303,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, } // Appends a rune for use in a character class to the string t. -static void AppendCCChar(string* t, Rune r) { +static void AppendCCChar(std::string* t, Rune r) { if (0x20 <= r && r <= 0x7E) { if (strchr("[]^-\\", r)) t->append("\\"); @@ -338,7 +338,7 @@ static void AppendCCChar(string* t, Rune r) { StringAppendF(t, "\\x{%x}", static_cast<int>(r)); } -static void AppendCCRange(string* t, Rune lo, Rune hi) { +static void AppendCCRange(std::string* t, Rune lo, Rune hi) { if (lo > hi) return; AppendCCChar(t, lo); diff --git a/util/flags.h b/util/flags.h index 5af1320..e0f1f42 100644 --- a/util/flags.h +++ b/util/flags.h @@ -20,10 +20,10 @@ #define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc) #define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc) -#define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc) +#define DEFINE_string(name, deflt, desc) DEFINE_flag(std::string, name, deflt, desc) #define DECLARE_bool(name) DECLARE_flag(bool, name) #define DECLARE_int32(name) DECLARE_flag(int32_t, name) -#define DECLARE_string(name) DECLARE_flag(string, name) +#define DECLARE_string(name) DECLARE_flag(std::string, name) #endif // UTIL_FLAGS_H_ diff --git a/util/logging.h b/util/logging.h index c78f6c1..5b2217f 100644 --- a/util/logging.h +++ b/util/logging.h @@ -62,7 +62,7 @@ class LogMessage { } void Flush() { stream() << "\n"; - string s = str_.str(); + std::string s = str_.str(); size_t n = s.size(); if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc flushed_ = true; diff --git a/util/pcre.cc b/util/pcre.cc index 78de292..93ac90c 100644 --- a/util/pcre.cc +++ b/util/pcre.cc @@ -99,7 +99,7 @@ const PCRE::ConsumeFunctor PCRE::Consume = { }; const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { }; // If a regular expression has no error, its error_ field points here -static const string empty_string; +static const std::string empty_string; void PCRE::Init(const char* pattern, Option options, int match_limit, int stack_limit, bool report_errors) { @@ -114,7 +114,7 @@ void PCRE::Init(const char* pattern, Option options, int match_limit, re_partial_ = NULL; if (options & ~(EnabledCompileOptions | EnabledExecOptions)) { - error_ = new string("illegal regexp option"); + error_ = new std::string("illegal regexp option"); PCREPORT(ERROR) << "Error compiling '" << pattern << "': illegal regexp option"; } else { @@ -131,13 +131,13 @@ PCRE::PCRE(const char* pattern) { PCRE::PCRE(const char* pattern, Option option) { Init(pattern, option, 0, 0, true); } -PCRE::PCRE(const string& pattern) { +PCRE::PCRE(const std::string& pattern) { Init(pattern.c_str(), None, 0, 0, true); } -PCRE::PCRE(const string& pattern, Option option) { +PCRE::PCRE(const std::string& pattern, Option option) { Init(pattern.c_str(), option, 0, 0, true); } -PCRE::PCRE(const string& pattern, const PCRE_Options& re_option) { +PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) { Init(pattern.c_str(), re_option.option(), re_option.match_limit(), re_option.stack_limit(), re_option.report_errors()); } @@ -176,7 +176,7 @@ pcre* PCRE::Compile(Anchor anchor) { } else { // Tack a '\z' at the end of PCRE. Parenthesize it first so that // the '\z' applies to all top-level alternatives in the regexp. - string wrapped = "(?:"; // A non-counting grouping operator + std::string wrapped = "(?:"; // A non-counting grouping operator wrapped += pattern_; wrapped += ")\\z"; re = pcre_compile(wrapped.c_str(), @@ -184,7 +184,7 @@ pcre* PCRE::Compile(Anchor anchor) { &error, &eoffset, NULL); } if (re == NULL) { - if (error_ == &empty_string) error_ = new string(error); + if (error_ == &empty_string) error_ = new std::string(error); PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error; } return re; @@ -376,7 +376,7 @@ done: } } -bool PCRE::Replace(string *str, +bool PCRE::Replace(std::string *str, const PCRE& pattern, const StringPiece& rewrite) { int vec[kVecSize] = {}; @@ -384,7 +384,7 @@ bool PCRE::Replace(string *str, if (matches == 0) return false; - string s; + std::string s; if (!pattern.Rewrite(&s, rewrite, *str, vec, matches)) return false; @@ -394,12 +394,12 @@ bool PCRE::Replace(string *str, return true; } -int PCRE::GlobalReplace(string *str, +int PCRE::GlobalReplace(std::string *str, const PCRE& pattern, const StringPiece& rewrite) { int count = 0; int vec[kVecSize] = {}; - string out; + std::string out; size_t start = 0; bool last_match_was_empty_string = false; @@ -455,7 +455,7 @@ int PCRE::GlobalReplace(string *str, bool PCRE::Extract(const StringPiece &text, const PCRE& pattern, const StringPiece &rewrite, - string *out) { + std::string *out) { int vec[kVecSize] = {}; int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); if (matches == 0) @@ -464,8 +464,8 @@ bool PCRE::Extract(const StringPiece &text, return pattern.Rewrite(out, rewrite, text, vec, matches); } -string PCRE::QuoteMeta(const StringPiece& unquoted) { - string result; +std::string PCRE::QuoteMeta(const StringPiece& unquoted) { + std::string result; result.reserve(unquoted.size() << 1); // Escape any ascii character not in [A-Za-z_0-9]. @@ -669,7 +669,7 @@ bool PCRE::DoMatch(const StringPiece& text, return b; } -bool PCRE::Rewrite(string *out, const StringPiece &rewrite, +bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite, const StringPiece &text, int *vec, int veclen) const { int number_of_capturing_groups = NumberOfCapturingGroups(); for (const char *s = rewrite.data(), *end = s + rewrite.size(); @@ -705,7 +705,8 @@ bool PCRE::Rewrite(string *out, const StringPiece &rewrite, return true; } -bool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const { +bool PCRE::CheckRewriteString(const StringPiece& rewrite, + std::string* error) const { int max_token = -1; for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) { @@ -769,7 +770,7 @@ bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) { bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) { if (dest == NULL) return true; - reinterpret_cast<string*>(dest)->assign(str, n); + reinterpret_cast<std::string*>(dest)->assign(str, n); return true; } diff --git a/util/pcre.h b/util/pcre.h index 10ec4f2..644dce6 100644 --- a/util/pcre.h +++ b/util/pcre.h @@ -67,7 +67,7 @@ // // Example: extracts "ruby" into "s" and 1234 into "i" // int i; -// string s; +// std::string s; // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); // // Example: fails because string cannot be stored in integer @@ -124,10 +124,10 @@ // which represents a sub-range of a real string. // // Example: read lines of the form "var = value" from a string. -// string contents = ...; // Fill string somehow +// std::string contents = ...; // Fill string somehow // StringPiece input(contents); // Wrap a StringPiece around it // -// string var; +// std::string var; // int value; // while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { // ...; @@ -212,21 +212,21 @@ class PCRE { // pass in a string or a "const char*" wherever an "PCRE" is expected. PCRE(const char* pattern); PCRE(const char* pattern, Option option); - PCRE(const string& pattern); - PCRE(const string& pattern, Option option); + PCRE(const std::string& pattern); + PCRE(const std::string& pattern, Option option); PCRE(const char *pattern, const PCRE_Options& re_option); - PCRE(const string& pattern, const PCRE_Options& re_option); + PCRE(const std::string& pattern, const PCRE_Options& re_option); ~PCRE(); // The string specification for this PCRE. E.g. // PCRE re("ab*c?d+"); // re.pattern(); // "ab*c?d+" - const string& pattern() const { return pattern_; } + const std::string& pattern() const { return pattern_; } // If PCRE could not be created properly, returns an error string. // Else returns the empty string. - const string& error() const { return *error_; } + const std::string& error() const { return *error_; } // Whether the PCRE has hit a match limit during execution. // Not thread safe. Intended only for testing. @@ -241,12 +241,12 @@ class PCRE { // Matches "text" against "pattern". If pointer arguments are // supplied, copies matched sub-patterns into them. // - // You can pass in a "const char*" or a "string" for "text". - // You can pass in a "const char*" or a "string" or a "PCRE" for "pattern". + // You can pass in a "const char*" or a "std::string" for "text". + // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern". // // The provided pointer arguments can be pointers to any scalar numeric // type, or one of: - // string (matched piece is copied to string) + // std::string (matched piece is copied to string) // StringPiece (StringPiece is mutated to point to matched piece) // T (where "bool T::ParseFrom(const char*, size_t)" exists) // (void*)NULL (the corresponding matched sub-pattern is not copied) @@ -369,14 +369,14 @@ class PCRE { // from the pattern. \0 in "rewrite" refers to the entire matching // text. E.g., // - // string s = "yabba dabba doo"; + // std::string s = "yabba dabba doo"; // CHECK(PCRE::Replace(&s, "b+", "d")); // // will leave "s" containing "yada dabba doo" // // Returns true if the pattern matches and a replacement occurs, // false otherwise. - static bool Replace(string *str, + static bool Replace(std::string *str, const PCRE& pattern, const StringPiece& rewrite); @@ -384,13 +384,13 @@ class PCRE { // the string with the rewrite. Replacements are not subject to // re-matching. E.g., // - // string s = "yabba dabba doo"; + // std::string s = "yabba dabba doo"; // CHECK(PCRE::GlobalReplace(&s, "b+", "d")); // // will leave "s" containing "yada dada doo" // // Returns the number of replacements made. - static int GlobalReplace(string *str, + static int GlobalReplace(std::string *str, const PCRE& pattern, const StringPiece& rewrite); @@ -403,7 +403,7 @@ class PCRE { static bool Extract(const StringPiece &text, const PCRE& pattern, const StringPiece &rewrite, - string *out); + std::string *out); // Check that the given @p rewrite string is suitable for use with // this PCRE. It checks that: @@ -418,7 +418,8 @@ class PCRE { // @param error An error message is recorded here, iff we return false. // Otherwise, it is unchanged. // @return true, iff @p rewrite is suitable for use with the PCRE. - bool CheckRewriteString(const StringPiece& rewrite, string* error) const; + bool CheckRewriteString(const StringPiece& rewrite, + std::string* error) const; // Returns a copy of 'unquoted' with all potentially meaningful // regexp characters backslash-escaped. The returned string, used @@ -427,7 +428,7 @@ class PCRE { // 1.5-2.0? // becomes: // 1\.5\-2\.0\? - static string QuoteMeta(const StringPiece& unquoted); + static std::string QuoteMeta(const StringPiece& unquoted); /***** Generic matching interface (not so nice to use) *****/ @@ -473,7 +474,7 @@ class PCRE { // Append the "rewrite" string, with backslash subsitutions from "text" // and "vec", to string "out". - bool Rewrite(string *out, + bool Rewrite(std::string *out, const StringPiece &rewrite, const StringPiece &text, int *vec, @@ -491,15 +492,15 @@ class PCRE { // Compile the regexp for the specified anchoring mode pcre* Compile(Anchor anchor); - string pattern_; - Option options_; - pcre* re_full_; // For full matches - pcre* re_partial_; // For partial matches - const string* error_; // Error indicator (or empty string) - bool report_errors_; // Silences error logging if false - int match_limit_; // Limit on execution resources - int stack_limit_; // Limit on stack resources (bytes) - mutable int32_t hit_limit_; // Hit limit during execution (bool)? + std::string pattern_; + Option options_; + pcre* re_full_; // For full matches + pcre* re_partial_; // For partial matches + const std::string* error_; // Error indicator (or empty string) + bool report_errors_; // Silences error logging if false + int match_limit_; // Limit on execution resources + int stack_limit_; // Limit on stack resources (bytes) + mutable int32_t hit_limit_; // Hit limit during execution (bool) PCRE(const PCRE&) = delete; PCRE& operator=(const PCRE&) = delete; @@ -584,7 +585,7 @@ class PCRE::Arg { MAKE_PARSER(unsigned char, parse_uchar); MAKE_PARSER(float, parse_float); MAKE_PARSER(double, parse_double); - MAKE_PARSER(string, parse_string); + MAKE_PARSER(std::string, parse_string); MAKE_PARSER(StringPiece, parse_stringpiece); MAKE_PARSER(short, parse_short); diff --git a/util/strutil.cc b/util/strutil.cc index 8eabfa4..cc3b857 100644 --- a/util/strutil.cc +++ b/util/strutil.cc @@ -65,17 +65,17 @@ static size_t CEscapeString(const char* src, size_t src_len, // Copies 'src' to result, escaping dangerous characters using // C-style escape sequences. 'src' and 'dest' should not overlap. // ---------------------------------------------------------------------- -string CEscape(const StringPiece& src) { +std::string CEscape(const StringPiece& src) { const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion char* dest = new char[dest_len]; const size_t used = CEscapeString(src.data(), src.size(), dest, dest_len); - string s = string(dest, used); + std::string s = std::string(dest, used); delete[] dest; return s; } -void PrefixSuccessor(string* prefix) { +void PrefixSuccessor(std::string* prefix) { // We can increment the last character in the string and be done // unless that character is 255, in which case we have to erase the // last character and increment the previous character, unless that @@ -92,7 +92,7 @@ void PrefixSuccessor(string* prefix) { } } -static void StringAppendV(string* dst, const char* format, va_list ap) { +static void StringAppendV(std::string* dst, const char* format, va_list ap) { // First try with a small fixed size buffer char space[1024]; @@ -137,16 +137,16 @@ static void StringAppendV(string* dst, const char* format, va_list ap) { } } -string StringPrintf(const char* format, ...) { +std::string StringPrintf(const char* format, ...) { va_list ap; va_start(ap, format); - string result; + std::string result; StringAppendV(&result, format, ap); va_end(ap); return result; } -void SStringPrintf(string* dst, const char* format, ...) { +void SStringPrintf(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); dst->clear(); @@ -154,7 +154,7 @@ void SStringPrintf(string* dst, const char* format, ...) { va_end(ap); } -void StringAppendF(string* dst, const char* format, ...) { +void StringAppendF(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); StringAppendV(dst, format, ap); diff --git a/util/strutil.h b/util/strutil.h index 2c3c104..b16981e 100644 --- a/util/strutil.h +++ b/util/strutil.h @@ -12,11 +12,11 @@ namespace re2 { -string CEscape(const StringPiece& src); -void PrefixSuccessor(string* prefix); -string StringPrintf(const char* format, ...); -void SStringPrintf(string* dst, const char* format, ...); -void StringAppendF(string* dst, const char* format, ...); +std::string CEscape(const StringPiece& src); +void PrefixSuccessor(std::string* prefix); +std::string StringPrintf(const char* format, ...); +void SStringPrintf(std::string* dst, const char* format, ...); +void StringAppendF(std::string* dst, const char* format, ...); } // namespace re2 diff --git a/util/util.h b/util/util.h index 33d100a..3f75794 100644 --- a/util/util.h +++ b/util/util.h @@ -5,10 +5,6 @@ #ifndef UTIL_UTIL_H_ #define UTIL_UTIL_H_ -// TODO(junyer): Get rid of this. -#include <string> -using std::string; - #define arraysize(array) (int)(sizeof(array)/sizeof((array)[0])) #ifndef ATTRIBUTE_NORETURN |