summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2019-09-25 15:36:14 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2019-09-25 15:36:14 +0900
commit25519175010718a309b1616cfae68d75cdb1f046 (patch)
tree04a3edb2dc219d9c1c84f51432f22745ffd42099
parentde99325ffe35c346d71d37e5f60b67e29313c1f2 (diff)
downloadre2-25519175010718a309b1616cfae68d75cdb1f046.tar.gz
re2-25519175010718a309b1616cfae68d75cdb1f046.tar.bz2
re2-25519175010718a309b1616cfae68d75cdb1f046.zip
Imported Upstream version 20190401upstream/20190401
-rw-r--r--re2/bitmap256.h5
-rw-r--r--re2/bitstate.cc205
-rw-r--r--re2/compile.cc5
-rw-r--r--re2/dfa.cc20
-rw-r--r--re2/filtered_re2.cc2
-rw-r--r--re2/filtered_re2.h2
-rw-r--r--re2/fuzzing/re2_fuzzer.cc7
-rw-r--r--re2/nfa.cc24
-rw-r--r--re2/onepass.cc2
-rw-r--r--re2/parse.cc8
-rw-r--r--re2/prefilter.cc50
-rw-r--r--re2/prefilter.h10
-rw-r--r--re2/prefilter_tree.cc19
-rw-r--r--re2/prefilter_tree.h10
-rw-r--r--re2/prog.cc136
-rw-r--r--re2/prog.h74
-rw-r--r--re2/re2.cc80
-rw-r--r--re2/re2.h65
-rw-r--r--re2/regexp.cc29
-rw-r--r--re2/regexp.h28
-rw-r--r--re2/set.cc4
-rw-r--r--re2/set.h4
-rw-r--r--re2/simplify.cc3
-rw-r--r--re2/testing/compile_test.cc180
-rw-r--r--re2/testing/dfa_test.cc16
-rw-r--r--re2/testing/dump.cc8
-rw-r--r--re2/testing/exhaustive1_test.cc6
-rw-r--r--re2/testing/exhaustive2_test.cc6
-rw-r--r--re2/testing/exhaustive3_test.cc22
-rw-r--r--re2/testing/exhaustive_tester.cc20
-rw-r--r--re2/testing/exhaustive_tester.h32
-rw-r--r--re2/testing/filtered_re2_test.cc18
-rw-r--r--re2/testing/parse_test.cc25
-rw-r--r--re2/testing/possible_match_test.cc20
-rw-r--r--re2/testing/random_test.cc14
-rw-r--r--re2/testing/re2_test.cc130
-rw-r--r--re2/testing/regexp_benchmark.cc71
-rw-r--r--re2/testing/regexp_generator.cc50
-rw-r--r--re2/testing/regexp_generator.h34
-rw-r--r--re2/testing/regexp_test.cc10
-rw-r--r--re2/testing/required_prefix_test.cc6
-rw-r--r--re2/testing/search_test.cc3
-rw-r--r--re2/testing/set_test.cc2
-rw-r--r--re2/testing/string_generator.cc2
-rw-r--r--re2/testing/string_generator.h8
-rw-r--r--re2/testing/string_generator_test.cc6
-rw-r--r--re2/testing/tester.cc20
-rw-r--r--re2/tostring.cc16
-rw-r--r--util/flags.h4
-rw-r--r--util/logging.h2
-rw-r--r--util/pcre.cc35
-rw-r--r--util/pcre.h59
-rw-r--r--util/strutil.cc16
-rw-r--r--util/strutil.h10
-rw-r--r--util/util.h4
55 files changed, 892 insertions, 755 deletions
diff --git a/re2/bitmap256.h b/re2/bitmap256.h
index 1abae99..f649b4c 100644
--- a/re2/bitmap256.h
+++ b/re2/bitmap256.h
@@ -19,6 +19,11 @@ namespace re2 {
class Bitmap256 {
public:
Bitmap256() {
+ Clear();
+ }
+
+ // Clears all of the bits.
+ void Clear() {
memset(words_, 0, sizeof words_);
}
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 6e1b44c..6f045b1 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -5,10 +5,10 @@
// Tested by search_test.cc, exhaustive_test.cc, tester.cc
// Prog::SearchBitState is a regular expression search with submatch
-// tracking for small regular expressions and texts. Like
-// testing/backtrack.cc, it allocates a bit vector with (length of
-// text) * (length of prog) bits, to make sure it never explores the
-// same (character position, instruction) state multiple times. This
+// tracking for small regular expressions and texts. Similarly to
+// testing/backtrack.cc, it allocates a bitmap with (count of
+// lists) * (length of prog) bits to make sure it never explores the
+// same (instruction list, character position) multiple times. This
// limits the search to run in time linear in the length of the text.
//
// Unlike testing/backtrack.cc, SearchBitState is not recursive
@@ -20,6 +20,7 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <limits>
#include <utility>
#include "util/logging.h"
@@ -31,7 +32,7 @@ namespace re2 {
struct Job {
int id;
- int arg;
+ int rle; // run length encoding
const char* p;
};
@@ -47,7 +48,7 @@ class BitState {
private:
inline bool ShouldVisit(int id, const char* p);
- void Push(int id, const char* p, int arg);
+ void Push(int id, const char* p);
void GrowStack();
bool TrySearch(int id, const char* p);
@@ -63,7 +64,7 @@ class BitState {
// Search state
static const int VisitedBits = 32;
- PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
+ PODArray<uint32_t> visited_; // bitmap: (list ID, char*) pairs visited
PODArray<const char*> cap_; // capture registers
PODArray<Job> job_; // stack of text positions to explore
int njob_; // stack size
@@ -79,11 +80,12 @@ BitState::BitState(Prog* prog)
njob_(0) {
}
-// Should the search visit the pair ip, p?
+// Given id, which *must* be a list head, we can look up its list ID.
+// Then the question is: Should the search visit the (list ID, p) pair?
// If so, remember that it was visited so that the next time,
// we don't repeat the visit.
bool BitState::ShouldVisit(int id, const char* p) {
- int n = id * static_cast<int>(text_.size()+1) +
+ int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
static_cast<int>(p-text_.begin());
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
return false;
@@ -98,8 +100,8 @@ void BitState::GrowStack() {
job_ = std::move(tmp);
}
-// Push the triple (id, p, arg) onto the stack, growing it if necessary.
-void BitState::Push(int id, const char* p, int arg) {
+// Push (id, p) onto the stack, growing it if necessary.
+void BitState::Push(int id, const char* p) {
if (njob_ >= job_.size()) {
GrowStack();
if (njob_ >= job_.size()) {
@@ -109,93 +111,81 @@ void BitState::Push(int id, const char* p, int arg) {
return;
}
}
- int op = prog_->inst(id)->opcode();
- if (op == kInstFail)
- return;
-
- // Only check ShouldVisit when arg == 0.
- // When arg > 0, we are continuing a previous visit.
- if (arg == 0 && !ShouldVisit(id, p))
- return;
-
- Job* j = &job_[njob_++];
- j->id = id;
- j->p = p;
- j->arg = arg;
+
+ // If id < 0, it's undoing a Capture,
+ // so we mustn't interfere with that.
+ if (id >= 0 && njob_ > 0) {
+ Job* top = &job_[njob_-1];
+ if (id == top->id &&
+ p == top->p + top->rle + 1 &&
+ top->rle < std::numeric_limits<int>::max()) {
+ ++top->rle;
+ return;
+ }
+ }
+
+ Job* top = &job_[njob_++];
+ top->id = id;
+ top->rle = 0;
+ top->p = p;
}
// Try a search from instruction id0 in state p0.
// Return whether it succeeded.
bool BitState::TrySearch(int id0, const char* p0) {
bool matched = false;
- bool inaltmatch = false;
const char* end = text_.end();
njob_ = 0;
- Push(id0, p0, 0);
+ // Push() no longer checks ShouldVisit(),
+ // so we must perform the check ourselves.
+ if (ShouldVisit(id0, p0))
+ Push(id0, p0);
while (njob_ > 0) {
// Pop job off stack.
--njob_;
int id = job_[njob_].id;
+ int& rle = job_[njob_].rle;
const char* p = job_[njob_].p;
- int arg = job_[njob_].arg;
-
- // Optimization: rather than push and pop,
- // code that is going to Push and continue
- // the loop simply updates ip, p, and arg
- // and jumps to CheckAndLoop. We have to
- // do the ShouldVisit check that Push
- // would have, but we avoid the stack
- // manipulation.
- if (0) {
- Next:
- // If the Match of a non-greedy AltMatch failed,
- // we stop ourselves from trying the ByteRange,
- // which would steer us off the short circuit.
- if (prog_->inst(id)->last() || inaltmatch)
- continue;
- id++;
-
- CheckAndLoop:
- if (!ShouldVisit(id, p))
- continue;
+
+ if (id < 0) {
+ // Undo the Capture.
+ cap_[prog_->inst(-id)->cap()] = p;
+ continue;
+ }
+
+ if (rle > 0) {
+ p += rle;
+ // Revivify job on stack.
+ --rle;
+ ++njob_;
}
- // Visit ip, p.
+ Loop:
+ // Visit id, p.
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
- LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg;
+ LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
return false;
case kInstFail:
- continue;
+ break;
case kInstAltMatch:
- switch (arg) {
- case 0:
- inaltmatch = true;
- Push(id, p, 1); // come back when we're done
-
- // One opcode is ByteRange; the other leads to Match
- // (possibly via Nop or Capture).
- if (ip->greedy(prog_)) {
- // out1 is the match
- Push(ip->out1(), p, 0);
- id = ip->out1();
- p = end;
- goto CheckAndLoop;
- }
- // out is the match - non-greedy
- Push(ip->out(), end, 0);
- id = ip->out();
- goto CheckAndLoop;
-
- case 1:
- inaltmatch = false;
- continue;
+ if (ip->greedy(prog_)) {
+ // out1 is the Match instruction.
+ id = ip->out1();
+ p = end;
+ goto Loop;
}
- LOG(DFATAL) << "Bad arg in kInstAltMatch: " << arg;
- continue;
+ if (longest_) {
+ // ip must be non-greedy...
+ // out is the Match instruction.
+ id = ip->out();
+ p = end;
+ goto Loop;
+ }
+ goto Next;
case kInstByteRange: {
int c = -1;
@@ -204,54 +194,50 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (!ip->Matches(c))
goto Next;
- if (!ip->last())
- Push(id+1, p, 0); // try the next when we're done
+ if (ip->hint() != 0)
+ Push(id+ip->hint(), p); // try the next when we're done
id = ip->out();
p++;
goto CheckAndLoop;
}
case kInstCapture:
- switch (arg) {
- case 0:
- if (!ip->last())
- Push(id+1, p, 0); // try the next when we're done
-
- if (0 <= ip->cap() && ip->cap() < cap_.size()) {
- // Capture p to register, but save old value.
- Push(id, cap_[ip->cap()], 1); // come back when we're done
- cap_[ip->cap()] = p;
- }
-
- // Continue on.
- id = ip->out();
- goto CheckAndLoop;
-
- case 1:
- // Finished ip->out(); restore the old value.
- cap_[ip->cap()] = p;
- continue;
+ if (!ip->last())
+ Push(id+1, p); // try the next when we're done
+
+ if (0 <= ip->cap() && ip->cap() < cap_.size()) {
+ // Capture p to register, but save old value first.
+ Push(-id, cap_[ip->cap()]); // undo when we're done
+ cap_[ip->cap()] = p;
}
- LOG(DFATAL) << "Bad arg in kInstCapture: " << arg;
- continue;
+
+ id = ip->out();
+ goto CheckAndLoop;
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
goto Next;
if (!ip->last())
- Push(id+1, p, 0); // try the next when we're done
+ Push(id+1, p); // try the next when we're done
id = ip->out();
goto CheckAndLoop;
case kInstNop:
if (!ip->last())
- Push(id+1, p, 0); // try the next when we're done
+ Push(id+1, p); // try the next when we're done
id = ip->out();
- goto CheckAndLoop;
+
+ CheckAndLoop:
+ // Sanity check: id is the head of its list, which must
+ // be the case if id-1 is the last of *its* list. :)
+ DCHECK(id == 0 || prog_->inst(id-1)->last());
+ if (ShouldVisit(id, p))
+ goto Loop;
+ break;
case kInstMatch: {
- if (endmatch_ && p != text_.end())
+ if (endmatch_ && p != end)
goto Next;
// We found a match. If the caller doesn't care
@@ -277,11 +263,18 @@ bool BitState::TrySearch(int id0, const char* p0) {
return true;
// If we used the entire text, no longer match is possible.
- if (p == text_.end())
+ if (p == end)
return true;
// Otherwise, continue on in hope of a longer match.
- goto Next;
+ // Note the absence of the ShouldVisit() check here
+ // due to execution remaining in the same list.
+ Next:
+ if (!ip->last()) {
+ id++;
+ goto Loop;
+ }
+ break;
}
}
}
@@ -310,7 +303,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
submatch_[i] = StringPiece();
// Allocate scratch space.
- int nvisited = prog_->size() * static_cast<int>(text.size()+1);
+ int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
nvisited = (nvisited + VisitedBits-1) / VisitedBits;
visited_ = PODArray<uint32_t>(nvisited);
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
@@ -321,8 +314,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
cap_ = PODArray<const char*>(ncap);
memset(cap_.data(), 0, ncap*sizeof cap_[0]);
- // When sizeof(Job) == 16, we start with a nice round 4KiB. :)
- job_ = PODArray<Job>(256);
+ // When sizeof(Job) == 16, we start with a nice round 1KiB. :)
+ job_ = PODArray<Job>(64);
// Anchored search must start at text.begin().
if (anchored_) {
diff --git a/re2/compile.cc b/re2/compile.cc
index 3f8e0cc..ab18cef 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -1202,7 +1202,10 @@ Prog* Compiler::Finish() {
if (max_mem_ <= 0) {
prog_->set_dfa_mem(1<<20);
} else {
- int64_t m = max_mem_ - sizeof(Prog) - prog_->size_*sizeof(Prog::Inst);
+ int64_t m = max_mem_ - sizeof(Prog);
+ m -= prog_->size_*sizeof(Prog::Inst); // account for inst_
+ if (prog_->CanBitState())
+ m -= prog_->size_*sizeof(uint16_t); // account for list_heads_
if (m < 0)
m = 0;
prog_->set_dfa_mem(m);
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 89b9b77..91292d4 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -106,7 +106,7 @@ class DFA {
// Computes min and max for matching strings. Won't return strings
// bigger than maxlen.
- bool PossibleMatchRange(string* min, string* max, int maxlen);
+ bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
// These data structures are logically private, but C++ makes it too
// difficult to mark them as such.
@@ -241,10 +241,10 @@ class DFA {
void AddToQueue(Workq* q, int id, uint32_t flag);
// For debugging, returns a text representation of State.
- static string DumpState(State* state);
+ static std::string DumpState(State* state);
// For debugging, returns a text representation of a Workq.
- static string DumpWorkq(Workq* q);
+ static std::string DumpWorkq(Workq* q);
// Search parameters
struct SearchParams {
@@ -505,8 +505,8 @@ DFA::~DFA() {
// Debugging printouts
// For debugging, returns a string representation of the work queue.
-string DFA::DumpWorkq(Workq* q) {
- string s;
+std::string DFA::DumpWorkq(Workq* q) {
+ std::string s;
const char* sep = "";
for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
if (q->is_mark(*it)) {
@@ -521,14 +521,14 @@ string DFA::DumpWorkq(Workq* q) {
}
// For debugging, returns a string representation of the state.
-string DFA::DumpState(State* state) {
+std::string DFA::DumpState(State* state) {
if (state == NULL)
return "_";
if (state == DeadState)
return "X";
if (state == FullMatchState)
return "*";
- string s;
+ std::string s;
const char* sep = "";
StringAppendF(&s, "(%p)", state);
for (int i = 0; i < state->ninst_; i++) {
@@ -1769,7 +1769,7 @@ bool DFA::Search(const StringPiece& text,
if (ExtraDebug) {
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
- string(text).c_str(), anchored, want_earliest_match,
+ std::string(text).c_str(), anchored, want_earliest_match,
run_forward, kind_);
}
@@ -1995,7 +1995,7 @@ void Prog::TEST_dfa_should_bail_when_slow(bool b) {
// Computes min and max for matching string.
// Won't return strings bigger than maxlen.
-bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
+bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
if (!ok())
return false;
@@ -2132,7 +2132,7 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
}
// PossibleMatchRange for a Prog.
-bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) {
+bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Have to use dfa_longest_ to get all strings for full matches.
// For example, (a|aa) never matches aa in first-match mode.
return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index 12f638a..e5d8de5 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc
@@ -49,7 +49,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
return code;
}
-void FilteredRE2::Compile(std::vector<string>* atoms) {
+void FilteredRE2::Compile(std::vector<std::string>* atoms) {
if (compiled_) {
LOG(ERROR) << "Compile called already.";
return;
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
index b1317cc..965b5c9 100644
--- a/re2/filtered_re2.h
+++ b/re2/filtered_re2.h
@@ -49,7 +49,7 @@ class FilteredRE2 {
// the search text should be lowercased first to find matching
// strings from the set of strings returned by Compile. Call after
// all Add calls are done.
- void Compile(std::vector<string>* strings_to_match);
+ void Compile(std::vector<std::string>* strings_to_match);
// Returns the index of the first matching regexp.
// Returns -1 on no match. Can be called prior to Compile.
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 5e5d324..2faebe0 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -13,7 +13,6 @@
#include "re2/re2.h"
using re2::StringPiece;
-using std::string;
// NOT static, NOT signed.
uint8_t dummy = 0;
@@ -87,12 +86,12 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
RE2::FindAndConsume(&sp, re, &d);
}
- string s = string(text);
+ std::string s = std::string(text);
RE2::Replace(&s, re, "");
- s = string(text); // Reset.
+ s = std::string(text); // Reset.
RE2::GlobalReplace(&s, re, "");
- string min, max;
+ std::string min, max;
re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
// Exercise some other API functionality.
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 04d4c6f..e459b6f 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -105,7 +105,7 @@ class NFA {
const char* p);
// Returns text version of capture information, for debugging.
- string FormatCapture(const char** capture);
+ std::string FormatCapture(const char** capture);
inline void CopyCapture(const char** dst, const char** src);
@@ -288,15 +288,25 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
case kInstByteRange:
if (!ip->Matches(c))
goto Next;
- FALLTHROUGH_INTENDED;
- case kInstMatch:
// Save state; will pick up at next byte.
t = Incref(t0);
*tp = t;
if (ExtraDebug)
fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
+ if (ip->hint() == 0)
+ break;
+ a = {id+ip->hint(), NULL};
+ goto Loop;
+
+ case kInstMatch:
+ // Save state; will pick up at next byte.
+ t = Incref(t0);
+ *tp = t;
+ if (ExtraDebug)
+ fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
+
Next:
if (ip->last())
break;
@@ -415,9 +425,8 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
return 0;
}
-string NFA::FormatCapture(const char** capture) {
- string s;
-
+std::string NFA::FormatCapture(const char** capture) {
+ std::string s;
for (int i = 0; i < ncapture_; i+=2) {
if (capture[i] == NULL)
StringAppendF(&s, "(?,?)");
@@ -482,7 +491,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (ExtraDebug)
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
- string(text).c_str(), string(context).c_str(), anchored, longest);
+ std::string(text).c_str(), std::string(context).c_str(), anchored,
+ longest);
// Set up search.
Threadq* runq = &q0_;
diff --git a/re2/onepass.cc b/re2/onepass.cc
index edd2c48..e04c56d 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -590,7 +590,7 @@ bool Prog::IsOnePass() {
if (nodebyid[i] != -1)
idmap[nodebyid[i]] = i;
- string dump;
+ std::string dump;
for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
int id = *it;
int nodeindex = nodebyid[id];
diff --git a/re2/parse.cc b/re2/parse.cc
index c8dea7e..f0a1387 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -610,7 +610,7 @@ bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
Regexp* re = new Regexp(kLeftParen, flags_);
re->cap_ = ++ncap_;
if (name.data() != NULL)
- re->name_ = new string(name);
+ re->name_ = new std::string(name);
return PushRegexp(re);
}
@@ -1790,7 +1790,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Look up the group in the ICU Unicode data. Because ICU provides full
// Unicode properties support, this could be more than a lookup by name.
::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
- string("\\p{") + string(name) + string("}"));
+ std::string("\\p{") + std::string(name) + std::string("}"));
UErrorCode uerr = U_ZERO_ERROR;
::icu::UnicodeSet uset(ustr, uerr);
if (U_FAILURE(uerr)) {
@@ -2181,7 +2181,7 @@ BadPerlOp:
// into UTF8 encoding in string.
// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is
// deprecated and because it rejects code points 0x80-0x9F.
-void ConvertLatin1ToUTF8(const StringPiece& latin1, string* utf) {
+void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
char buf[UTFmax];
utf->clear();
@@ -2208,7 +2208,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// Convert regexp to UTF-8 (easier on the rest of the parser).
if (global_flags & Latin1) {
- string* tmp = new string;
+ std::string* tmp = new std::string;
ConvertLatin1ToUTF8(t, tmp);
status->set_tmp(tmp);
t = *tmp;
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index b657357..4d6df8d 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -21,8 +21,8 @@ namespace re2 {
static const bool ExtraDebug = false;
-typedef std::set<string>::iterator SSIter;
-typedef std::set<string>::const_iterator ConstSSIter;
+typedef std::set<std::string>::iterator SSIter;
+typedef std::set<std::string>::const_iterator ConstSSIter;
// Initializes a Prefilter, allocating subs_ as necessary.
Prefilter::Prefilter(Op op) {
@@ -140,7 +140,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
return AndOr(OR, a, b);
}
-static void SimplifyStringSet(std::set<string> *ss) {
+static void SimplifyStringSet(std::set<std::string> *ss) {
// Now make sure that the strings aren't redundant. For example, if
// we know "ab" is a required string, then it doesn't help at all to
// know that "abc" is also a required string, so delete "abc". This
@@ -155,13 +155,13 @@ static void SimplifyStringSet(std::set<string> *ss) {
// Increment j early so that we can erase the element it points to.
SSIter old_j = j;
++j;
- if (old_j->find(*i) != string::npos)
+ if (old_j->find(*i) != std::string::npos)
ss->erase(old_j);
}
}
}
-Prefilter* Prefilter::OrStrings(std::set<string>* ss) {
+Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
SimplifyStringSet(ss);
Prefilter* or_prefilter = NULL;
if (!ss->empty()) {
@@ -191,7 +191,7 @@ static Rune ToLowerRuneLatin1(Rune r) {
return r;
}
-Prefilter* Prefilter::FromString(const string& str) {
+Prefilter* Prefilter::FromString(const std::string& str) {
Prefilter* m = new Prefilter(Prefilter::ATOM);
m->atom_ = str;
return m;
@@ -221,19 +221,19 @@ class Prefilter::Info {
static Info* AnyMatch();
// Format Info as a string.
- string ToString();
+ std::string ToString();
// Caller takes ownership of the Prefilter.
Prefilter* TakeMatch();
- std::set<string>& exact() { return exact_; }
+ std::set<std::string>& exact() { return exact_; }
bool is_exact() const { return is_exact_; }
class Walker;
private:
- std::set<string> exact_;
+ std::set<std::string> exact_;
// When is_exact_ is true, the strings that match
// are placed in exact_. When it is no longer an exact
@@ -268,13 +268,11 @@ Prefilter* Prefilter::Info::TakeMatch() {
}
// Format a Info in string form.
-string Prefilter::Info::ToString() {
+std::string Prefilter::Info::ToString() {
if (is_exact_) {
int n = 0;
- string s;
- for (std::set<string>::iterator i = exact_.begin();
- i != exact_.end();
- ++i) {
+ std::string s;
+ for (SSIter i = exact_.begin(); i != exact_.end(); ++i) {
if (n++ > 0)
s += ",";
s += *i;
@@ -289,17 +287,17 @@ string Prefilter::Info::ToString() {
}
// Add the strings from src to dst.
-static void CopyIn(const std::set<string>& src,
- std::set<string>* dst) {
+static void CopyIn(const std::set<std::string>& src,
+ std::set<std::string>* dst) {
for (ConstSSIter i = src.begin(); i != src.end(); ++i)
dst->insert(*i);
}
// Add the cross-product of a and b to dst.
// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const std::set<string>& a,
- const std::set<string>& b,
- std::set<string>* dst) {
+static void CrossProduct(const std::set<std::string>& a,
+ const std::set<std::string>& b,
+ std::set<std::string>* dst) {
for (ConstSSIter i = a.begin(); i != a.end(); ++i)
for (ConstSSIter j = b.begin(); j != b.end(); ++j)
dst->insert(*i + *j);
@@ -390,15 +388,15 @@ Prefilter::Info* Prefilter::Info::Plus(Info *a) {
return ab;
}
-static string RuneToString(Rune r) {
+static std::string RuneToString(Rune r) {
char buf[UTFmax];
int n = runetochar(buf, &r);
- return string(buf, n);
+ return std::string(buf, n);
}
-static string RuneToStringLatin1(Rune r) {
+static std::string RuneToStringLatin1(Rune r) {
char c = r & 0xff;
- return string(&c, 1);
+ return std::string(&c, 1);
}
// Constructs Info for literal rune.
@@ -662,7 +660,7 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
return m;
}
-string Prefilter::DebugString() const {
+std::string Prefilter::DebugString() const {
switch (op_) {
default:
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
@@ -674,7 +672,7 @@ string Prefilter::DebugString() const {
case ALL:
return "";
case AND: {
- string s = "";
+ std::string s = "";
for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += " ";
@@ -684,7 +682,7 @@ string Prefilter::DebugString() const {
return s;
}
case OR: {
- string s = "(";
+ std::string s = "(";
for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += "|";
diff --git a/re2/prefilter.h b/re2/prefilter.h
index ead09e1..4fedeb4 100644
--- a/re2/prefilter.h
+++ b/re2/prefilter.h
@@ -37,7 +37,7 @@ class Prefilter {
~Prefilter();
Op op() { return op_; }
- const string& atom() const { return atom_; }
+ const std::string& atom() const { return atom_; }
void set_unique_id(int id) { unique_id_ = id; }
int unique_id() const { return unique_id_; }
@@ -57,7 +57,7 @@ class Prefilter {
static Prefilter* FromRE2(const RE2* re2);
// Returns a readable debug string of the prefilter.
- string DebugString() const;
+ std::string DebugString() const;
private:
class Info;
@@ -75,9 +75,9 @@ class Prefilter {
static Prefilter* FromRegexp(Regexp* a);
- static Prefilter* FromString(const string& str);
+ static Prefilter* FromString(const std::string& str);
- static Prefilter* OrStrings(std::set<string>* ss);
+ static Prefilter* OrStrings(std::set<std::string>* ss);
static Info* BuildInfo(Regexp* re);
@@ -90,7 +90,7 @@ class Prefilter {
std::vector<Prefilter*>* subs_;
// Actual string to match in leaf node.
- string atom_;
+ std::string atom_;
// If different prefilters have the same string atom, or if they are
// structurally the same (e.g., OR of same atom strings) they are
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index a07de40..187e2ec 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -54,7 +54,7 @@ void PrefilterTree::Add(Prefilter* prefilter) {
prefilter_vec_.push_back(prefilter);
}
-void PrefilterTree::Compile(std::vector<string>* atom_vec) {
+void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
if (compiled_) {
LOG(DFATAL) << "Compile called already.";
return;
@@ -106,16 +106,16 @@ void PrefilterTree::Compile(std::vector<string>* atom_vec) {
}
Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
- string node_string = NodeString(node);
- std::map<string, Prefilter*>::iterator iter = nodes->find(node_string);
+ std::string node_string = NodeString(node);
+ std::map<std::string, Prefilter*>::iterator iter = nodes->find(node_string);
if (iter == nodes->end())
return NULL;
return (*iter).second;
}
-string PrefilterTree::NodeString(Prefilter* node) const {
+std::string PrefilterTree::NodeString(Prefilter* node) const {
// Adding the operation disambiguates AND/OR/atom nodes.
- string s = StringPrintf("%d", node->op()) + ":";
+ std::string s = StringPrintf("%d", node->op()) + ":";
if (node->op() == Prefilter::ATOM) {
s += node->atom();
} else {
@@ -166,7 +166,7 @@ bool PrefilterTree::KeepNode(Prefilter* node) const {
}
void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
- std::vector<string>* atom_vec) {
+ std::vector<std::string>* atom_vec) {
atom_vec->clear();
// Build vector of all filter nodes, sorted topologically
@@ -377,15 +377,14 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
LOG(ERROR) << it->first;
}
LOG(ERROR) << "Map:";
- for (std::map<string, Prefilter*>::const_iterator iter = nodes->begin();
+ for (std::map<std::string, Prefilter*>::const_iterator iter = nodes->begin();
iter != nodes->end(); ++iter)
LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
<< " Str: " << (*iter).first;
}
-string PrefilterTree::DebugNodeString(Prefilter* node) const {
- string node_string = "";
-
+std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
+ std::string node_string = "";
if (node->op() == Prefilter::ATOM) {
DCHECK(!node->atom().empty());
node_string += node->atom();
diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
index f81e134..10d6f7c 100644
--- a/re2/prefilter_tree.h
+++ b/re2/prefilter_tree.h
@@ -43,7 +43,7 @@ class PrefilterTree {
// The caller should use the returned set of strings to do string matching.
// Each time a string matches, the corresponding index then has to be
// and passed to RegexpsGivenStrings below.
- void Compile(std::vector<string>* atom_vec);
+ void Compile(std::vector<std::string>* atom_vec);
// Given the indices of the atoms that matched, returns the indexes
// of regexps that should be searched. The matched_atoms should
@@ -60,7 +60,7 @@ class PrefilterTree {
private:
typedef SparseArray<int> IntMap;
typedef std::map<int, int> StdIntMap;
- typedef std::map<string, Prefilter*> NodeMap;
+ typedef std::map<std::string, Prefilter*> NodeMap;
// Each unique node has a corresponding Entry that helps in
// passing the matching trigger information along the tree.
@@ -90,7 +90,7 @@ class PrefilterTree {
// This function assigns unique ids to various parts of the
// prefilter, by looking at if these nodes are already in the
// PrefilterTree.
- void AssignUniqueIds(NodeMap* nodes, std::vector<string>* atom_vec);
+ void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
// Given the matching atoms, find the regexps to be triggered.
void PropagateMatch(const std::vector<int>& atom_ids,
@@ -102,10 +102,10 @@ class PrefilterTree {
// A string that uniquely identifies the node. Assumes that the
// children of node has already been assigned unique ids.
- string NodeString(Prefilter* node) const;
+ std::string NodeString(Prefilter* node) const;
// Recursively constructs a readable prefilter string.
- string DebugNodeString(Prefilter* node) const;
+ std::string DebugNodeString(Prefilter* node) const;
// Used for debugging.
void PrintDebugInfo(NodeMap* nodes);
diff --git a/re2/prog.cc b/re2/prog.cc
index 9729aa4..9853d6d 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -34,7 +34,7 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
set_out_opcode(out, kInstByteRange);
lo_ = lo & 0xFF;
hi_ = hi & 0xFF;
- foldcase_ = foldcase & 0xFF;
+ hint_foldcase_ = foldcase&1;
}
void Prog::Inst::InitCapture(int cap, uint32_t out) {
@@ -65,7 +65,7 @@ void Prog::Inst::InitFail() {
set_opcode(kInstFail);
}
-string Prog::Inst::Dump() {
+std::string Prog::Inst::Dump() {
switch (opcode()) {
default:
return StringPrintf("opcode %d", static_cast<int>(opcode()));
@@ -77,9 +77,9 @@ string Prog::Inst::Dump() {
return StringPrintf("altmatch -> %d | %d", out(), out1_);
case kInstByteRange:
- return StringPrintf("byte%s [%02x-%02x] -> %d",
- foldcase_ ? "/i" : "",
- lo_, hi_, out());
+ return StringPrintf("byte%s [%02x-%02x] %d -> %d",
+ foldcase() ? "/i" : "",
+ lo_, hi_, hint(), out());
case kInstCapture:
return StringPrintf("capture %d -> %d", cap_, out());
@@ -129,8 +129,8 @@ static inline void AddToQueue(Workq* q, int id) {
q->insert(id);
}
-static string ProgToString(Prog* prog, Workq* q) {
- string s;
+static std::string ProgToString(Prog* prog, Workq* q) {
+ std::string s;
for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
int id = *i;
Prog::Inst* ip = prog->inst(id);
@@ -142,8 +142,8 @@ static string ProgToString(Prog* prog, Workq* q) {
return s;
}
-static string FlattenedProgToString(Prog* prog, int start) {
- string s;
+static std::string FlattenedProgToString(Prog* prog, int start) {
+ std::string s;
for (int id = start; id < prog->size(); id++) {
Prog::Inst* ip = prog->inst(id);
if (ip->last())
@@ -154,7 +154,7 @@ static string FlattenedProgToString(Prog* prog, int start) {
return s;
}
-string Prog::Dump() {
+std::string Prog::Dump() {
if (did_flatten_)
return FlattenedProgToString(this, start_);
@@ -163,7 +163,7 @@ string Prog::Dump() {
return ProgToString(this, &q);
}
-string Prog::DumpUnanchored() {
+std::string Prog::DumpUnanchored() {
if (did_flatten_)
return FlattenedProgToString(this, start_unanchored_);
@@ -172,8 +172,8 @@ string Prog::DumpUnanchored() {
return ProgToString(this, &q);
}
-string Prog::DumpByteMap() {
- string map;
+std::string Prog::DumpByteMap() {
+ std::string map;
for (int c = 0; c < 256; c++) {
int b = bytemap_[c];
int lo = c;
@@ -341,7 +341,6 @@ class ByteMapBuilder {
// This will avoid problems during the second phase,
// in which we assign byte classes numbered from 0.
splits_.Set(255);
- colors_.resize(256);
colors_[255] = 256;
nextcolor_ = 257;
}
@@ -354,7 +353,7 @@ class ByteMapBuilder {
int Recolor(int oldcolor);
Bitmap256 splits_;
- std::vector<int> colors_;
+ int colors_[256];
int nextcolor_;
std::vector<std::pair<int, int>> colormap_;
std::vector<std::pair<int, int>> ranges_;
@@ -468,8 +467,11 @@ void Prog::ComputeByteMap() {
foldlo = 'a';
if (foldhi > 'z')
foldhi = 'z';
- if (foldlo <= foldhi)
- builder.Mark(foldlo + 'A' - 'a', foldhi + 'A' - 'a');
+ if (foldlo <= foldhi) {
+ foldlo += 'A' - 'a';
+ foldhi += 'A' - 'a';
+ builder.Mark(foldlo, foldhi);
+ }
}
// If this Inst is not the last Inst in its list AND the next Inst is
// also a ByteRange AND the Insts have the same out, defer the merge.
@@ -591,6 +593,9 @@ void Prog::Flatten() {
flatmap[i->value()] = static_cast<int>(flat.size());
EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
flat.back().set_last();
+ // We have the bounds of the "list", so this is the
+ // most convenient point at which to compute hints.
+ ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
}
list_count_ = static_cast<int>(flatmap.size());
@@ -625,7 +630,17 @@ void Prog::Flatten() {
// Finally, replace the old instructions with the new instructions.
size_ = static_cast<int>(flat.size());
inst_ = PODArray<Inst>(size_);
- memmove(inst_.data(), flat.data(), size_*sizeof(inst_[0]));
+ memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
+
+ // Populate the list heads for BitState.
+ // 512 instructions limits the memory footprint to 1KiB.
+ if (size_ <= 512) {
+ list_heads_ = PODArray<uint16_t>(size_);
+ // 0xFF makes it more obvious if we try to look up a non-head.
+ memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
+ for (int i = 0; i < list_count_; ++i)
+ list_heads_[flatmap[i]] = i;
+ }
}
void Prog::MarkSuccessors(SparseArray<int>* rootmap,
@@ -818,4 +833,89 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
}
}
+// For each ByteRange instruction in [begin, end), computes a hint to execution
+// engines: the delta to the next instruction (in flat) worth exploring iff the
+// current instruction matched.
+//
+// Implements a coloring algorithm related to ByteMapBuilder, but in this case,
+// colors are instructions and recoloring ranges precisely identifies conflicts
+// between instructions. Iterating backwards over [begin, end) is guaranteed to
+// identify the nearest conflict (if any) with only linear complexity.
+void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
+ Bitmap256 splits;
+ int colors[256];
+
+ bool dirty = false;
+ for (int id = end; id >= begin; --id) {
+ if (id == end ||
+ (*flat)[id].opcode() != kInstByteRange) {
+ if (dirty) {
+ dirty = false;
+ splits.Clear();
+ }
+ splits.Set(255);
+ colors[255] = id;
+ // At this point, the [0-255] range is colored with id.
+ // Thus, hints cannot point beyond id; and if id == end,
+ // hints that would have pointed to id will be 0 instead.
+ continue;
+ }
+ dirty = true;
+
+ // We recolor the [lo-hi] range with id. Note that first ratchets backwards
+ // from end to the nearest conflict (if any) during recoloring.
+ int first = end;
+ auto Recolor = [&](int lo, int hi) {
+ // Like ByteMapBuilder, we split at lo-1 and at hi.
+ --lo;
+
+ if (0 <= lo && !splits.Test(lo)) {
+ splits.Set(lo);
+ int next = splits.FindNextSetBit(lo+1);
+ colors[lo] = colors[next];
+ }
+ if (!splits.Test(hi)) {
+ splits.Set(hi);
+ int next = splits.FindNextSetBit(hi+1);
+ colors[hi] = colors[next];
+ }
+
+ int c = lo+1;
+ while (c < 256) {
+ int next = splits.FindNextSetBit(c);
+ // Ratchet backwards...
+ first = std::min(first, colors[next]);
+ // Recolor with id - because it's the new nearest conflict!
+ colors[next] = id;
+ if (next == hi)
+ break;
+ c = next+1;
+ }
+ };
+
+ Inst* ip = &(*flat)[id];
+ int lo = ip->lo();
+ int hi = ip->hi();
+ Recolor(lo, hi);
+ if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
+ int foldlo = lo;
+ int foldhi = hi;
+ if (foldlo < 'a')
+ foldlo = 'a';
+ if (foldhi > 'z')
+ foldhi = 'z';
+ if (foldlo <= foldhi) {
+ foldlo += 'A' - 'a';
+ foldhi += 'A' - 'a';
+ Recolor(foldlo, foldhi);
+ }
+ }
+
+ if (first != end) {
+ uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
+ ip->hint_foldcase_ |= hint<<1;
+ }
+ }
+}
+
} // namespace re2
diff --git a/re2/prog.h b/re2/prog.h
index edac935..bacc411 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -86,7 +86,8 @@ class Prog {
int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
- int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; }
+ int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
+ int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
@@ -100,13 +101,13 @@ class Prog {
// Does this inst (an kInstByteRange) match c?
inline bool Matches(int c) {
DCHECK_EQ(opcode(), kInstByteRange);
- if (foldcase_ && 'A' <= c && c <= 'Z')
+ if (foldcase() && 'A' <= c && c <= 'Z')
c += 'a' - 'A';
return lo_ <= c && c <= hi_;
}
// Returns string representation for debugging.
- string Dump();
+ std::string Dump();
// Maximum instruction id.
// (Must fit in out_opcode_. PatchList/last steal another bit.)
@@ -129,25 +130,31 @@ class Prog {
out_opcode_ = (out<<4) | (last()<<3) | opcode;
}
- uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
- union { // additional instruction arguments:
- uint32_t out1_; // opcode == kInstAlt
- // alternate next instruction
-
- int32_t cap_; // opcode == kInstCapture
- // Index of capture register (holds text
- // position recorded by capturing parentheses).
- // For \n (the submatch for the nth parentheses),
- // the left parenthesis captures into register 2*n
- // and the right one captures into register 2*n+1.
-
- int32_t match_id_; // opcode == kInstMatch
- // Match ID to identify this match (for re2::Set).
-
- struct { // opcode == kInstByteRange
- uint8_t lo_; // byte range is lo_-hi_ inclusive
- uint8_t hi_; //
- uint8_t foldcase_; // convert A-Z to a-z before checking range.
+ uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
+ union { // additional instruction arguments:
+ uint32_t out1_; // opcode == kInstAlt
+ // alternate next instruction
+
+ int32_t cap_; // opcode == kInstCapture
+ // Index of capture register (holds text
+ // position recorded by capturing parentheses).
+ // For \n (the submatch for the nth parentheses),
+ // the left parenthesis captures into register 2*n
+ // and the right one captures into register 2*n+1.
+
+ int32_t match_id_; // opcode == kInstMatch
+ // Match ID to identify this match (for re2::Set).
+
+ struct { // opcode == kInstByteRange
+ uint8_t lo_; // byte range is lo_-hi_ inclusive
+ uint8_t hi_; //
+ uint16_t hint_foldcase_; // 15 bits: hint, 1 (low) bit: foldcase
+ // hint to execution engines: the delta to the
+ // next instruction (in the current list) worth
+ // exploring iff this instruction matched; 0
+ // means there are no remaining possibilities,
+ // which is most likely for character classes.
+ // foldcase: A-Z -> a-z before checking range.
};
EmptyOp empty_; // opcode == kInstEmptyWidth
@@ -199,6 +206,7 @@ class Prog {
void set_reversed(bool reversed) { reversed_ = reversed; }
int list_count() { return list_count_; }
int inst_count(InstOp op) { return inst_count_[op]; }
+ uint16_t* list_heads() { return list_heads_.data(); }
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
int64_t dfa_mem() { return dfa_mem_; }
int flags() { return flags_; }
@@ -214,9 +222,9 @@ class Prog {
int first_byte();
// Returns string representation of program for debugging.
- string Dump();
- string DumpUnanchored();
- string DumpByteMap();
+ std::string Dump();
+ std::string DumpUnanchored();
+ std::string DumpByteMap();
// Returns the set of kEmpty flags that are in effect at
// position p within context.
@@ -305,7 +313,8 @@ class Prog {
StringPiece* match, int nmatch);
// Bit-state backtracking. Fast on small cases but uses memory
- // proportional to the product of the program size and the text size.
+ // proportional to the product of the list count and the text size.
+ bool CanBitState() { return list_heads_.data() != NULL; }
bool SearchBitState(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind,
StringPiece* match, int nmatch);
@@ -337,7 +346,7 @@ class Prog {
// do not compile down to infinite repetitions.
//
// Returns true on success, false on error.
- bool PossibleMatchRange(string* min, string* max, int maxlen);
+ bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
// EXPERIMENTAL! SUBJECT TO CHANGE!
// Outputs the program fanout into the given sparse array.
@@ -374,6 +383,9 @@ class Prog {
std::vector<Inst>* flat,
SparseSet* reachable, std::vector<int>* stk);
+ // Computes hints for ByteRange instructions in [begin, end).
+ void ComputeHints(std::vector<Inst>* flat, int begin, int end);
+
private:
friend class Compiler;
@@ -393,10 +405,12 @@ class Prog {
int first_byte_; // required first byte for match, or -1 if none
int flags_; // regexp parse flags
- int list_count_; // count of lists (see above)
- int inst_count_[kNumInst]; // count of instructions by opcode
+ int list_count_; // count of lists (see above)
+ int inst_count_[kNumInst]; // count of instructions by opcode
+ PODArray<uint16_t> list_heads_; // sparse array enumerating list heads
+ // not populated if size_ is overly large
- PODArray<Inst> inst_; // pointer to instruction array
+ PODArray<Inst> inst_; // pointer to instruction array
PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
int64_t dfa_mem_; // Maximum memory for DFAs.
diff --git a/re2/re2.cc b/re2/re2.cc
index 1529807..fe8eb34 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -56,9 +56,9 @@ RE2::Options::Options(RE2::CannedOptions opt)
// static empty objects for use as const references.
// To avoid global constructors, allocated in RE2::Init().
-static const string* empty_string;
-static const std::map<string, int>* empty_named_groups;
-static const std::map<int, string>* empty_group_names;
+static const std::string* empty_string;
+static const std::map<std::string, int>* empty_named_groups;
+static const std::map<int, std::string>* empty_group_names;
// Converts from Regexp error code to RE2 error code.
// Maybe some day they will diverge. In any event, this
@@ -97,10 +97,10 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
return RE2::ErrorInternal;
}
-static string trunc(const StringPiece& pattern) {
+static std::string trunc(const StringPiece& pattern) {
if (pattern.size() < 100)
- return string(pattern);
- return string(pattern.substr(0, 100)) + "...";
+ return std::string(pattern);
+ return std::string(pattern.substr(0, 100)) + "...";
}
@@ -108,7 +108,7 @@ RE2::RE2(const char* pattern) {
Init(pattern, DefaultOptions);
}
-RE2::RE2(const string& pattern) {
+RE2::RE2(const std::string& pattern) {
Init(pattern, DefaultOptions);
}
@@ -167,12 +167,12 @@ int RE2::Options::ParseFlags() const {
void RE2::Init(const StringPiece& pattern, const Options& options) {
static std::once_flag empty_once;
std::call_once(empty_once, []() {
- empty_string = new string;
- empty_named_groups = new std::map<string, int>;
- empty_group_names = new std::map<int, string>;
+ empty_string = new std::string;
+ empty_named_groups = new std::map<std::string, int>;
+ empty_group_names = new std::map<int, std::string>;
});
- pattern_ = string(pattern);
+ pattern_ = std::string(pattern);
options_.Copy(options);
entire_regexp_ = NULL;
suffix_regexp_ = NULL;
@@ -194,9 +194,9 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
<< status.Text();
}
- error_ = new string(status.Text());
+ error_ = new std::string(status.Text());
error_code_ = RegexpErrorToRE2(status.code());
- error_arg_ = string(status.error_arg());
+ error_arg_ = std::string(status.error_arg());
return;
}
@@ -213,7 +213,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
- error_ = new string("pattern too large - compile failed");
+ error_ = new std::string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
}
@@ -239,7 +239,8 @@ re2::Prog* RE2::ReverseProg() const {
if (re->rprog_ == NULL) {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
- re->error_ = new string("pattern too large - reverse compile failed");
+ re->error_ =
+ new std::string("pattern too large - reverse compile failed");
re->error_code_ = RE2::ErrorPatternTooLarge;
}
}, this);
@@ -307,7 +308,7 @@ int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
}
// Returns named_groups_, computing it if needed.
-const std::map<string, int>& RE2::NamedCapturingGroups() const {
+const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
std::call_once(named_groups_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->named_groups_ = re->suffix_regexp_->NamedCaptures();
@@ -318,7 +319,7 @@ const std::map<string, int>& RE2::NamedCapturingGroups() const {
}
// Returns group_names_, computing it if needed.
-const std::map<int, string>& RE2::CapturingGroupNames() const {
+const std::map<int, std::string>& RE2::CapturingGroupNames() const {
std::call_once(group_names_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->group_names_ = re->suffix_regexp_->CaptureNames();
@@ -362,7 +363,7 @@ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
}
}
-bool RE2::Replace(string* str,
+bool RE2::Replace(std::string* str,
const RE2& re,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
@@ -372,7 +373,7 @@ bool RE2::Replace(string* str,
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
- string s;
+ std::string s;
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
@@ -382,7 +383,7 @@ bool RE2::Replace(string* str,
return true;
}
-int RE2::GlobalReplace(string* str,
+int RE2::GlobalReplace(std::string* str,
const RE2& re,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
@@ -393,7 +394,7 @@ int RE2::GlobalReplace(string* str,
const char* p = str->data();
const char* ep = p + str->size();
const char* lastend = NULL;
- string out;
+ std::string out;
int count = 0;
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
// Iterate just once when fuzzing. Otherwise, we easily get bogged down
@@ -410,11 +411,10 @@ int RE2::GlobalReplace(string* str,
if (vec[0].begin() == lastend && vec[0].size() == 0) {
// Disallow empty match at end of last match: skip ahead.
//
- // fullrune() takes int, not size_t. However, it just looks
+ // fullrune() takes int, not ptrdiff_t. However, it just looks
// at the leading byte and treats any length >= 4 the same.
if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
- fullrune(p, static_cast<int>(std::min(static_cast<ptrdiff_t>(4),
- ep - p)))) {
+ fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
// re is in UTF-8 mode and there is enough left of str
// to allow us to advance by up to UTFmax bytes.
Rune r;
@@ -457,7 +457,7 @@ int RE2::GlobalReplace(string* str,
bool RE2::Extract(const StringPiece& text,
const RE2& re,
const StringPiece& rewrite,
- string* out) {
+ std::string* out) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
@@ -470,8 +470,8 @@ bool RE2::Extract(const StringPiece& text,
return re.Rewrite(out, rewrite, vec, nvec);
}
-string RE2::QuoteMeta(const StringPiece& unquoted) {
- string result;
+std::string RE2::QuoteMeta(const StringPiece& unquoted) {
+ std::string result;
result.reserve(unquoted.size() << 1);
// Escape any ascii character not in [A-Za-z_0-9].
@@ -508,7 +508,8 @@ string RE2::QuoteMeta(const StringPiece& unquoted) {
return result;
}
-bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
+bool RE2::PossibleMatchRange(std::string* min, std::string* max,
+ int maxlen) const {
if (prog_ == NULL)
return false;
@@ -529,7 +530,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
}
// Add to prefix min max using PossibleMatchRange on regexp.
- string dmin, dmax;
+ std::string dmin, dmax;
maxlen -= n;
if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
min->append(dmin);
@@ -646,15 +647,13 @@ bool RE2::Match(const StringPiece& text,
bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
- // SearchBitState allocates a bit vector of size prog_->size() * text.size().
+ // BitState allocates a bitmap of size prog_->list_count() * text.size().
// It also allocates a stack of 3-word structures which could potentially
- // grow as large as prog_->size() * text.size() but in practice is much
- // smaller.
- // Conditions for using SearchBitState:
- const int MaxBitStateProg = 500; // prog_->size() <= Max.
- const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits)
- bool can_bit_state = prog_->size() <= MaxBitStateProg;
- size_t bit_state_text_max = MaxBitStateVector / prog_->size();
+ // grow as large as prog_->list_count() * text.size(), but in practice is
+ // much smaller.
+ const int kMaxBitStateBitmapSize = 256*1024; // bitmap size <= max (bits)
+ bool can_bit_state = prog_->CanBitState();
+ size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
bool dfa_failed = false;
switch (re_anchor) {
@@ -854,7 +853,8 @@ bool RE2::DoMatch(const StringPiece& text,
// Checks that the rewrite string is well-formed with respect to this
// regular expression.
-bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
+bool RE2::CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -911,7 +911,7 @@ int RE2::MaxSubmatch(const StringPiece& rewrite) {
// Append the "rewrite" string, with backslash subsitutions from "vec",
// to string "out".
-bool RE2::Rewrite(string* out,
+bool RE2::Rewrite(std::string* out,
const StringPiece& rewrite,
const StringPiece* vec,
int veclen) const {
@@ -955,7 +955,7 @@ bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
if (dest == NULL) return true;
- reinterpret_cast<string*>(dest)->assign(str, n);
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
return true;
}
diff --git a/re2/re2.h b/re2/re2.h
index 216347d..299501e 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -69,7 +69,7 @@
//
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
-// string s;
+// std::string s;
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
// Example: fails because string cannot be stored in integer
@@ -131,10 +131,10 @@
// which represents a sub-range of a real string.
//
// Example: read lines of the form "var = value" from a string.
-// string contents = ...; // Fill string somehow
+// std::string contents = ...; // Fill string somehow
// StringPiece input(contents); // Wrap a StringPiece around it
//
-// string var;
+// std::string var;
// int value;
// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
// ...;
@@ -205,9 +205,6 @@ class Regexp;
namespace re2 {
-// TODO(junyer): Get rid of this.
-using std::string;
-
// Interface for regular expression matching. Also corresponds to a
// pre-compiled regular expression. An "RE2" object is safe for
// concurrent use by multiple threads.
@@ -254,12 +251,12 @@ class RE2 {
Quiet // do not log about regexp parse errors
};
- // Need to have the const char* and const string& forms for implicit
+ // Need to have the const char* and const std::string& forms for implicit
// conversions when passing string literals to FullMatch and PartialMatch.
// Otherwise the StringPiece form would be sufficient.
#ifndef SWIG
RE2(const char* pattern);
- RE2(const string& pattern);
+ RE2(const std::string& pattern);
#endif
RE2(const StringPiece& pattern);
RE2(const StringPiece& pattern, const Options& options);
@@ -271,11 +268,11 @@ class RE2 {
// The string specification for this RE2. E.g.
// RE2 re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
- const string& pattern() const { return pattern_; }
+ const std::string& pattern() const { return pattern_; }
// If RE2 could not be created properly, returns an error string.
// Else returns the empty string.
- const string& error() const { return *error_; }
+ const std::string& error() const { return *error_; }
// If RE2 could not be created properly, returns an error code.
// Else returns RE2::NoError (== 0).
@@ -283,7 +280,7 @@ class RE2 {
// If RE2 could not be created properly, returns the offending
// portion of the regexp.
- const string& error_arg() const { return error_arg_; }
+ const std::string& error_arg() const { return error_arg_; }
// Returns the program size, a very approximate measure of a regexp's "cost".
// Larger numbers are more expensive than smaller numbers.
@@ -341,12 +338,12 @@ class RE2 {
// Matches "text" against "re". If pointer arguments are
// supplied, copies matched sub-patterns into them.
//
- // You can pass in a "const char*" or a "string" for "text".
- // You can pass in a "const char*" or a "string" or a "RE2" for "re".
+ // You can pass in a "const char*" or a "std::string" for "text".
+ // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of:
- // string (matched piece is copied to string)
+ // std::string (matched piece is copied to string)
// StringPiece (StringPiece is mutated to point to matched piece)
// T (where "bool T::ParseFrom(const char*, size_t)" exists)
// (void*)NULL (the corresponding matched sub-pattern is not copied)
@@ -402,21 +399,21 @@ class RE2 {
// from the pattern. \0 in "rewrite" refers to the entire matching
// text. E.g.,
//
- // string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(RE2::Replace(&s, "b+", "d"));
//
// will leave "s" containing "yada dabba doo"
//
// Returns true if the pattern matches and a replacement occurs,
// false otherwise.
- static bool Replace(string* str,
+ static bool Replace(std::string* str,
const RE2& re,
const StringPiece& rewrite);
// Like Replace(), except replaces successive non-overlapping occurrences
// of the pattern in the string with the rewrite. E.g.
//
- // string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(RE2::GlobalReplace(&s, "b+", "d"));
//
// will leave "s" containing "yada dada doo"
@@ -426,7 +423,7 @@ class RE2 {
// replacing "ana" within "banana" makes only one replacement, not two.
//
// Returns the number of replacements made.
- static int GlobalReplace(string* str,
+ static int GlobalReplace(std::string* str,
const RE2& re,
const StringPiece& rewrite);
@@ -441,7 +438,7 @@ class RE2 {
static bool Extract(const StringPiece& text,
const RE2& re,
const StringPiece& rewrite,
- string* out);
+ std::string* out);
// Escapes all potentially meaningful regexp characters in
// 'unquoted'. The returned string, used as a regular expression,
@@ -449,7 +446,7 @@ class RE2 {
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
- static string QuoteMeta(const StringPiece& unquoted);
+ static std::string QuoteMeta(const StringPiece& unquoted);
// Computes range for any strings matching regexp. The min and max can in
// some cases be arbitrarily precise, so the caller gets to specify the
@@ -465,7 +462,8 @@ class RE2 {
// do not compile down to infinite repetitions.
//
// Returns true on success, false on error.
- bool PossibleMatchRange(string* min, string* max, int maxlen) const;
+ bool PossibleMatchRange(std::string* min, std::string* max,
+ int maxlen) const;
// Generic matching interface
@@ -485,12 +483,12 @@ class RE2 {
// The map records the index of the leftmost group
// with the given name.
// Only valid until the re is deleted.
- const std::map<string, int>& NamedCapturingGroups() const;
+ const std::map<std::string, int>& NamedCapturingGroups() const;
// Return a map from capturing indices to names.
// The map has no entries for unnamed groups.
// Only valid until the re is deleted.
- const std::map<int, string>& CapturingGroupNames() const;
+ const std::map<int, std::string>& CapturingGroupNames() const;
// General matching routine.
// Match against text starting at offset startpos
@@ -528,7 +526,8 @@ class RE2 {
// '\' followed by anything other than a digit or '\'.
// A true return value guarantees that Replace() and Extract() won't
// fail because of a bad rewrite string.
- bool CheckRewriteString(const StringPiece& rewrite, string* error) const;
+ bool CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const;
// Returns the maximum submatch needed for the rewrite to be done by
// Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
@@ -539,7 +538,7 @@ class RE2 {
// Returns true on success. This method can fail because of a malformed
// rewrite string. CheckRewriteString guarantees that the rewrite will
// be sucessful.
- bool Rewrite(string* out,
+ bool Rewrite(std::string* out,
const StringPiece& rewrite,
const StringPiece* vec,
int veclen) const;
@@ -737,9 +736,9 @@ class RE2 {
re2::Prog* ReverseProg() const;
- string pattern_; // string regular expression
+ std::string pattern_; // string regular expression
Options options_; // option flags
- string prefix_; // required prefix (before regexp_)
+ std::string prefix_; // required prefix (before regexp_)
bool prefix_foldcase_; // prefix is ASCII case-insensitive
re2::Regexp* entire_regexp_; // parsed regular expression
re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed
@@ -747,17 +746,17 @@ class RE2 {
int num_captures_; // Number of capturing groups
bool is_one_pass_; // can use prog_->SearchOnePass?
- mutable re2::Prog* rprog_; // reverse program for regexp
- mutable const string* error_; // Error indicator
+ mutable re2::Prog* rprog_; // reverse program for regexp
+ mutable const std::string* error_; // Error indicator
// (or points to empty string)
mutable ErrorCode error_code_; // Error code
- mutable string error_arg_; // Fragment of regexp showing error
+ mutable std::string error_arg_; // Fragment of regexp showing error
// Map from capture names to indices
- mutable const std::map<string, int>* named_groups_;
+ mutable const std::map<std::string, int>* named_groups_;
// Map from capture indices to names
- mutable const std::map<int, string>* group_names_;
+ mutable const std::map<int, std::string>* group_names_;
// Onces for lazy computations.
mutable std::once_flag rprog_once_;
@@ -804,7 +803,7 @@ class RE2::Arg {
MAKE_PARSER(unsigned char, parse_uchar)
MAKE_PARSER(float, parse_float)
MAKE_PARSER(double, parse_double)
- MAKE_PARSER(string, parse_string)
+ MAKE_PARSER(std::string, parse_string)
MAKE_PARSER(StringPiece, parse_stringpiece)
MAKE_PARSER(short, parse_short)
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 7cfbbcb..7995ffc 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -510,16 +510,16 @@ static const char *kErrorStrings[] = {
"invalid named capture group",
};
-string RegexpStatus::CodeText(enum RegexpStatusCode code) {
+std::string RegexpStatus::CodeText(enum RegexpStatusCode code) {
if (code < 0 || code >= arraysize(kErrorStrings))
code = kRegexpInternalError;
return kErrorStrings[code];
}
-string RegexpStatus::Text() const {
+std::string RegexpStatus::Text() const {
if (error_arg_.empty())
return CodeText(code_);
- string s;
+ std::string s;
s.append(CodeText(code_));
s.append(": ");
s.append(error_arg_.data(), error_arg_.size());
@@ -569,8 +569,8 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
NamedCapturesWalker() : map_(NULL) {}
~NamedCapturesWalker() { delete map_; }
- std::map<string, int>* TakeMap() {
- std::map<string, int>* m = map_;
+ std::map<std::string, int>* TakeMap() {
+ std::map<std::string, int>* m = map_;
map_ = NULL;
return m;
}
@@ -579,7 +579,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
if (re->op() == kRegexpCapture && re->name() != NULL) {
// Allocate map once we find a name.
if (map_ == NULL)
- map_ = new std::map<string, int>;
+ map_ = new std::map<std::string, int>;
// Record first occurrence of each name.
// (The rule is that if you have the same name
@@ -597,13 +597,13 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
}
private:
- std::map<string, int>* map_;
+ std::map<std::string, int>* map_;
NamedCapturesWalker(const NamedCapturesWalker&) = delete;
NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete;
};
-std::map<string, int>* Regexp::NamedCaptures() {
+std::map<std::string, int>* Regexp::NamedCaptures() {
NamedCapturesWalker w;
w.Walk(this, 0);
return w.TakeMap();
@@ -615,8 +615,8 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
CaptureNamesWalker() : map_(NULL) {}
~CaptureNamesWalker() { delete map_; }
- std::map<int, string>* TakeMap() {
- std::map<int, string>* m = map_;
+ std::map<int, std::string>* TakeMap() {
+ std::map<int, std::string>* m = map_;
map_ = NULL;
return m;
}
@@ -625,7 +625,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
if (re->op() == kRegexpCapture && re->name() != NULL) {
// Allocate map once we find a name.
if (map_ == NULL)
- map_ = new std::map<int, string>;
+ map_ = new std::map<int, std::string>;
(*map_)[re->cap()] = *re->name();
}
@@ -639,13 +639,13 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
}
private:
- std::map<int, string>* map_;
+ std::map<int, std::string>* map_;
CaptureNamesWalker(const CaptureNamesWalker&) = delete;
CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete;
};
-std::map<int, string>* Regexp::CaptureNames() {
+std::map<int, std::string>* Regexp::CaptureNames() {
CaptureNamesWalker w;
w.Walk(this, 0);
return w.TakeMap();
@@ -655,7 +655,8 @@ std::map<int, string>* Regexp::CaptureNames() {
// with a fixed string prefix. If so, returns the prefix and
// the regexp that remains after the prefix. The prefix might
// be ASCII case-insensitive.
-bool Regexp::RequiredPrefix(string* prefix, bool* foldcase, Regexp** suffix) {
+bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
+ Regexp** suffix) {
// No need for a walker: the regexp must be of the form
// 1. some number of ^ anchors
// 2. a literal char or string
diff --git a/re2/regexp.h b/re2/regexp.h
index 2ca96cd..a5d85c8 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -194,7 +194,7 @@ class RegexpStatus {
void set_code(RegexpStatusCode code) { code_ = code; }
void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
- void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
+ void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
RegexpStatusCode code() const { return code_; }
const StringPiece& error_arg() const { return error_arg_; }
bool ok() const { return code() == kRegexpSuccess; }
@@ -204,16 +204,16 @@ class RegexpStatus {
// Returns text equivalent of code, e.g.:
// "Bad character class"
- static string CodeText(RegexpStatusCode code);
+ static std::string CodeText(RegexpStatusCode code);
// Returns text describing error, e.g.:
// "Bad character class: [z-a]"
- string Text() const;
+ std::string Text() const;
private:
RegexpStatusCode code_; // Kind of error
- StringPiece error_arg_; // Piece of regexp containing syntax error.
- string* tmp_; // Temporary storage, possibly where error_arg_ is.
+ StringPiece error_arg_; // Piece of regexp containing syntax error.
+ std::string* tmp_; // Temporary storage, possibly where error_arg_ is.
RegexpStatus(const RegexpStatus&) = delete;
RegexpStatus& operator=(const RegexpStatus&) = delete;
@@ -336,7 +336,7 @@ class Regexp {
Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
- const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
+ const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
@@ -368,8 +368,7 @@ class Regexp {
// string representation of the simplified form. Returns true on success.
// Returns false and sets *status (if status != NULL) on parse error.
static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
- string* dst,
- RegexpStatus* status);
+ std::string* dst, RegexpStatus* status);
// Returns the number of capturing groups in the regexp.
int NumCaptures();
@@ -378,16 +377,16 @@ class Regexp {
// Returns a map from names to capturing group indices,
// or NULL if the regexp contains no named capture groups.
// The caller is responsible for deleting the map.
- std::map<string, int>* NamedCaptures();
+ std::map<std::string, int>* NamedCaptures();
// Returns a map from capturing group indices to capturing group
// names or NULL if the regexp contains no named capture groups. The
// caller is responsible for deleting the map.
- std::map<int, string>* CaptureNames();
+ std::map<int, std::string>* CaptureNames();
// Returns a string representation of the current regexp,
// using as few parentheses as possible.
- string ToString();
+ std::string ToString();
// Convenience functions. They consume the passed reference,
// so in many cases you should use, e.g., Plus(re->Incref(), flags).
@@ -409,7 +408,7 @@ class Regexp {
// Debugging function. Returns string format for regexp
// that makes structure clear. Does NOT use regexp syntax.
- string Dump();
+ std::string Dump();
// Helper traversal class, defined fully in walker-inl.h.
template<typename T> class Walker;
@@ -438,7 +437,8 @@ class Regexp {
// follows it.
// Callers should expect *prefix, *foldcase and *suffix to be "zeroed"
// regardless of the return value.
- bool RequiredPrefix(string* prefix, bool* foldcase, Regexp** suffix);
+ bool RequiredPrefix(std::string* prefix, bool* foldcase,
+ Regexp** suffix);
private:
// Constructor allocates vectors as appropriate for operator.
@@ -564,7 +564,7 @@ class Regexp {
};
struct { // Capture
int cap_;
- string* name_;
+ std::string* name_;
};
struct { // LiteralString
int nrunes_;
diff --git a/re2/set.cc b/re2/set.cc
index 2572d50..d4c34ad 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -33,7 +33,7 @@ RE2::Set::~Set() {
delete prog_;
}
-int RE2::Set::Add(const StringPiece& pattern, string* error) {
+int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
if (compiled_) {
LOG(DFATAL) << "RE2::Set::Add() called after compiling";
return -1;
@@ -68,7 +68,7 @@ int RE2::Set::Add(const StringPiece& pattern, string* error) {
sub[1] = m;
re = re2::Regexp::Concat(sub, 2, pf);
}
- elem_.emplace_back(string(pattern), re);
+ elem_.emplace_back(std::string(pattern), re);
return n;
}
diff --git a/re2/set.h b/re2/set.h
index a8c2caa..59733fd 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -42,7 +42,7 @@ class RE2::Set {
// Indices are assigned in sequential order starting from 0.
// Errors do not increment the index; if error is not NULL, *error will hold
// the error message from the parser.
- int Add(const StringPiece& pattern, string* error);
+ int Add(const StringPiece& pattern, std::string* error);
// Compiles the set in preparation for matching.
// Returns false if the compiler runs out of memory.
@@ -62,7 +62,7 @@ class RE2::Set {
ErrorInfo* error_info) const;
private:
- typedef std::pair<string, re2::Regexp*> Elem;
+ typedef std::pair<std::string, re2::Regexp*> Elem;
RE2::Options options_;
RE2::Anchor anchor_;
diff --git a/re2/simplify.cc b/re2/simplify.cc
index 7cc0419..8939678 100644
--- a/re2/simplify.cc
+++ b/re2/simplify.cc
@@ -21,8 +21,7 @@ namespace re2 {
// string representation of the simplified form. Returns true on success.
// Returns false and sets *error (if error != NULL) on error.
bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
- string* dst,
- RegexpStatus* status) {
+ std::string* dst, RegexpStatus* status) {
Regexp* re = Parse(src, flags, status);
if (re == NULL)
return false;
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index d89d80f..2accba1 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -26,88 +26,88 @@ struct Test {
static Test tests[] = {
{ "a",
- "3. byte [61-61] -> 4\n"
+ "3. byte [61-61] 0 -> 4\n"
"4. match! 0\n" },
{ "ab",
- "3. byte [61-61] -> 4\n"
- "4. byte [62-62] -> 5\n"
+ "3. byte [61-61] 0 -> 4\n"
+ "4. byte [62-62] 0 -> 5\n"
"5. match! 0\n" },
{ "a|c",
- "3+ byte [61-61] -> 5\n"
- "4. byte [63-63] -> 5\n"
+ "3+ byte [61-61] 0 -> 5\n"
+ "4. byte [63-63] 0 -> 5\n"
"5. match! 0\n" },
{ "a|b",
- "3. byte [61-62] -> 4\n"
+ "3. byte [61-62] 0 -> 4\n"
"4. match! 0\n" },
{ "[ab]",
- "3. byte [61-62] -> 4\n"
+ "3. byte [61-62] 0 -> 4\n"
"4. match! 0\n" },
{ "a+",
- "3. byte [61-61] -> 4\n"
+ "3. byte [61-61] 0 -> 4\n"
"4+ nop -> 3\n"
"5. match! 0\n" },
{ "a+?",
- "3. byte [61-61] -> 4\n"
+ "3. byte [61-61] 0 -> 4\n"
"4+ match! 0\n"
"5. nop -> 3\n" },
{ "a*",
- "3+ byte [61-61] -> 3\n"
+ "3+ byte [61-61] 1 -> 3\n"
"4. match! 0\n" },
{ "a*?",
"3+ match! 0\n"
- "4. byte [61-61] -> 3\n" },
+ "4. byte [61-61] 0 -> 3\n" },
{ "a?",
- "3+ byte [61-61] -> 5\n"
+ "3+ byte [61-61] 1 -> 5\n"
"4. nop -> 5\n"
"5. match! 0\n" },
{ "a??",
"3+ nop -> 5\n"
- "4. byte [61-61] -> 5\n"
+ "4. byte [61-61] 0 -> 5\n"
"5. match! 0\n" },
{ "a{4}",
- "3. byte [61-61] -> 4\n"
- "4. byte [61-61] -> 5\n"
- "5. byte [61-61] -> 6\n"
- "6. byte [61-61] -> 7\n"
+ "3. byte [61-61] 0 -> 4\n"
+ "4. byte [61-61] 0 -> 5\n"
+ "5. byte [61-61] 0 -> 6\n"
+ "6. byte [61-61] 0 -> 7\n"
"7. match! 0\n" },
{ "(a)",
"3. capture 2 -> 4\n"
- "4. byte [61-61] -> 5\n"
+ "4. byte [61-61] 0 -> 5\n"
"5. capture 3 -> 6\n"
"6. match! 0\n" },
{ "(?:a)",
- "3. byte [61-61] -> 4\n"
+ "3. byte [61-61] 0 -> 4\n"
"4. match! 0\n" },
{ "",
"3. match! 0\n" },
{ ".",
- "3+ byte [00-09] -> 5\n"
- "4. byte [0b-ff] -> 5\n"
+ "3+ byte [00-09] 0 -> 5\n"
+ "4. byte [0b-ff] 0 -> 5\n"
"5. match! 0\n" },
{ "[^ab]",
- "3+ byte [00-09] -> 6\n"
- "4+ byte [0b-60] -> 6\n"
- "5. byte [63-ff] -> 6\n"
+ "3+ byte [00-09] 0 -> 6\n"
+ "4+ byte [0b-60] 0 -> 6\n"
+ "5. byte [63-ff] 0 -> 6\n"
"6. match! 0\n" },
{ "[Aa]",
- "3. byte/i [61-61] -> 4\n"
+ "3. byte/i [61-61] 0 -> 4\n"
"4. match! 0\n" },
{ "\\C+",
- "3. byte [00-ff] -> 4\n"
+ "3. byte [00-ff] 0 -> 4\n"
"4+ altmatch -> 5 | 6\n"
"5+ nop -> 3\n"
"6. match! 0\n" },
{ "\\C*",
"3+ altmatch -> 4 | 5\n"
- "4+ byte [00-ff] -> 3\n"
+ "4+ byte [00-ff] 1 -> 3\n"
"5. match! 0\n" },
{ "\\C?",
- "3+ byte [00-ff] -> 5\n"
+ "3+ byte [00-ff] 1 -> 5\n"
"4. nop -> 5\n"
"5. match! 0\n" },
// Issue 20992936
{ "[[-`]",
- "3. byte [5b-60] -> 4\n"
+ "3. byte [5b-60] 0 -> 4\n"
"4. match! 0\n" },
};
@@ -129,7 +129,7 @@ TEST(TestRegexpCompileToProg, Simple) {
continue;
}
ASSERT_TRUE(re->CompileToProg(1) == NULL);
- string s = prog->Dump();
+ std::string s = prog->Dump();
if (s != t.code) {
LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
LOG(ERROR) << "Want:\n" << t.code;
@@ -143,7 +143,7 @@ TEST(TestRegexpCompileToProg, Simple) {
}
static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
- string* bytemap) {
+ std::string* bytemap) {
Regexp* re = Regexp::Parse(pattern, flags, NULL);
EXPECT_TRUE(re != NULL);
@@ -158,7 +158,7 @@ static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
TEST(TestCompile, Latin1Ranges) {
// The distinct byte ranges involved in the Latin-1 dot ([^\n]).
- string bytemap;
+ std::string bytemap;
DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap);
EXPECT_EQ("[00-09] -> 0\n"
@@ -168,7 +168,7 @@ TEST(TestCompile, Latin1Ranges) {
}
TEST(TestCompile, OtherByteMapTests) {
- string bytemap;
+ std::string bytemap;
// Test that "absent" ranges are mapped to the same byte class.
DumpByteMap("[0-9A-Fa-f]+", Regexp::PerlX|Regexp::Latin1, &bytemap);
@@ -207,7 +207,7 @@ TEST(TestCompile, UTF8Ranges) {
// Once, erroneously split between 0x3f and 0x40 because it is
// a 6-bit boundary.
- string bytemap;
+ std::string bytemap;
DumpByteMap(".", Regexp::PerlX, &bytemap);
EXPECT_EQ("[00-09] -> 0\n"
@@ -240,7 +240,7 @@ TEST(TestCompile, InsufficientMemory) {
}
static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
- string* forward, string* reverse) {
+ std::string* forward, std::string* reverse) {
Regexp* re = Regexp::Parse(pattern, flags, NULL);
EXPECT_TRUE(re != NULL);
@@ -265,54 +265,54 @@ TEST(TestCompile, Bug26705922) {
// Bug in the compiler caused inefficient bytecode to be generated for Unicode
// groups: common suffixes were cached, but common prefixes were not factored.
- string forward, reverse;
+ std::string forward, reverse;
Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse);
- EXPECT_EQ("3. byte [f0-f0] -> 4\n"
- "4. byte [90-90] -> 5\n"
- "5. byte [80-80] -> 6\n"
- "6+ byte [80-80] -> 8\n"
- "7. byte [90-90] -> 8\n"
+ EXPECT_EQ("3. byte [f0-f0] 0 -> 4\n"
+ "4. byte [90-90] 0 -> 5\n"
+ "5. byte [80-80] 0 -> 6\n"
+ "6+ byte [80-80] 0 -> 8\n"
+ "7. byte [90-90] 0 -> 8\n"
"8. match! 0\n",
forward);
- EXPECT_EQ("3+ byte [80-80] -> 5\n"
- "4. byte [90-90] -> 5\n"
- "5. byte [80-80] -> 6\n"
- "6. byte [90-90] -> 7\n"
- "7. byte [f0-f0] -> 8\n"
+ EXPECT_EQ("3+ byte [80-80] 0 -> 5\n"
+ "4. byte [90-90] 0 -> 5\n"
+ "5. byte [80-80] 0 -> 6\n"
+ "6. byte [90-90] 0 -> 7\n"
+ "7. byte [f0-f0] 0 -> 8\n"
"8. match! 0\n",
reverse);
Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse);
- EXPECT_EQ("3+ byte [e8-ef] -> 5\n"
- "4. byte [f0-f0] -> 8\n"
- "5. byte [80-bf] -> 6\n"
- "6. byte [80-bf] -> 7\n"
+ EXPECT_EQ("3+ byte [e8-ef] 0 -> 5\n"
+ "4. byte [f0-f0] 0 -> 8\n"
+ "5. byte [80-bf] 0 -> 6\n"
+ "6. byte [80-bf] 0 -> 7\n"
"7. match! 0\n"
- "8. byte [90-90] -> 5\n",
+ "8. byte [90-90] 0 -> 5\n",
forward);
- EXPECT_EQ("3. byte [80-bf] -> 4\n"
- "4. byte [80-bf] -> 5\n"
- "5+ byte [e8-ef] -> 7\n"
- "6. byte [90-90] -> 8\n"
+ EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+ "4. byte [80-bf] 0 -> 5\n"
+ "5+ byte [e8-ef] 0 -> 7\n"
+ "6. byte [90-90] 0 -> 8\n"
"7. match! 0\n"
- "8. byte [f0-f0] -> 7\n",
+ "8. byte [f0-f0] 0 -> 7\n",
reverse);
Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
- EXPECT_EQ("3. byte [80-bf] -> 4\n"
- "4+ byte [c2-df] -> 7\n"
- "5+ byte [a0-bf] -> 8\n"
- "6. byte [80-bf] -> 9\n"
+ EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+ "4+ byte [c2-df] 0 -> 7\n"
+ "5+ byte [a0-bf] 1 -> 8\n"
+ "6. byte [80-bf] 0 -> 9\n"
"7. match! 0\n"
- "8. byte [e0-e0] -> 7\n"
- "9+ byte [e1-ef] -> 7\n"
- "10+ byte [90-bf] -> 13\n"
- "11+ byte [80-bf] -> 14\n"
- "12. byte [80-8f] -> 15\n"
- "13. byte [f0-f0] -> 7\n"
- "14. byte [f1-f3] -> 7\n"
- "15. byte [f4-f4] -> 7\n",
+ "8. byte [e0-e0] 0 -> 7\n"
+ "9+ byte [e1-ef] 0 -> 7\n"
+ "10+ byte [90-bf] 1 -> 13\n"
+ "11+ byte [80-bf] 1 -> 14\n"
+ "12. byte [80-8f] 0 -> 15\n"
+ "13. byte [f0-f0] 0 -> 7\n"
+ "14. byte [f1-f3] 0 -> 7\n"
+ "15. byte [f4-f4] 0 -> 7\n",
reverse);
}
@@ -320,14 +320,14 @@ TEST(TestCompile, Bug35237384) {
// Bug in the compiler caused inefficient bytecode to be generated for
// nested nullable subexpressions.
- string forward;
+ std::string forward;
Dump("a**{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
- EXPECT_EQ("3+ byte [61-61] -> 3\n"
+ EXPECT_EQ("3+ byte [61-61] 1 -> 3\n"
"4. nop -> 5\n"
- "5+ byte [61-61] -> 5\n"
+ "5+ byte [61-61] 1 -> 5\n"
"6. nop -> 7\n"
- "7+ byte [61-61] -> 7\n"
+ "7+ byte [61-61] 1 -> 7\n"
"8. match! 0\n",
forward);
@@ -335,17 +335,17 @@ TEST(TestCompile, Bug35237384) {
EXPECT_EQ("3+ nop -> 6\n"
"4+ nop -> 8\n"
"5. nop -> 21\n"
- "6+ byte [61-61] -> 6\n"
+ "6+ byte [61-61] 1 -> 6\n"
"7. nop -> 3\n"
- "8+ byte [62-62] -> 8\n"
+ "8+ byte [62-62] 1 -> 8\n"
"9. nop -> 3\n"
- "10+ byte [61-61] -> 10\n"
+ "10+ byte [61-61] 1 -> 10\n"
"11. nop -> 21\n"
- "12+ byte [62-62] -> 12\n"
+ "12+ byte [62-62] 1 -> 12\n"
"13. nop -> 21\n"
- "14+ byte [61-61] -> 14\n"
+ "14+ byte [61-61] 1 -> 14\n"
"15. nop -> 18\n"
- "16+ byte [62-62] -> 16\n"
+ "16+ byte [62-62] 1 -> 16\n"
"17. nop -> 18\n"
"18+ nop -> 14\n"
"19+ nop -> 16\n"
@@ -359,38 +359,38 @@ TEST(TestCompile, Bug35237384) {
EXPECT_EQ("3+ nop -> 36\n"
"4+ nop -> 31\n"
"5. nop -> 33\n"
- "6+ byte [00-09] -> 8\n"
- "7. byte [0b-ff] -> 8\n"
+ "6+ byte [00-09] 0 -> 8\n"
+ "7. byte [0b-ff] 0 -> 8\n"
"8+ nop -> 6\n"
"9+ nop -> 29\n"
"10. nop -> 28\n"
- "11+ byte [00-09] -> 13\n"
- "12. byte [0b-ff] -> 13\n"
+ "11+ byte [00-09] 0 -> 13\n"
+ "12. byte [0b-ff] 0 -> 13\n"
"13+ nop -> 11\n"
"14+ nop -> 26\n"
"15. nop -> 28\n"
- "16+ byte [00-09] -> 18\n"
- "17. byte [0b-ff] -> 18\n"
+ "16+ byte [00-09] 0 -> 18\n"
+ "17. byte [0b-ff] 0 -> 18\n"
"18+ nop -> 16\n"
"19+ nop -> 36\n"
"20. nop -> 33\n"
- "21+ byte [00-09] -> 23\n"
- "22. byte [0b-ff] -> 23\n"
+ "21+ byte [00-09] 0 -> 23\n"
+ "22. byte [0b-ff] 0 -> 23\n"
"23+ nop -> 21\n"
"24+ nop -> 31\n"
"25. nop -> 33\n"
"26+ nop -> 28\n"
- "27. byte [53-53] -> 11\n"
+ "27. byte [53-53] 0 -> 11\n"
"28. match! 0\n"
"29+ nop -> 28\n"
- "30. byte [53-53] -> 6\n"
+ "30. byte [53-53] 0 -> 6\n"
"31+ nop -> 33\n"
- "32. byte [53-53] -> 21\n"
+ "32. byte [53-53] 0 -> 21\n"
"33+ nop -> 29\n"
"34+ nop -> 26\n"
"35. nop -> 28\n"
"36+ nop -> 33\n"
- "37. byte [53-53] -> 16\n",
+ "37. byte [53-53] 0 -> 16\n",
forward);
}
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index eb44b4a..09d31f8 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -33,7 +33,7 @@ static void DoBuild(Prog* prog) {
TEST(Multithreaded, BuildEntireDFA) {
// Create regexp with 2^FLAGS_size states in DFA.
- string s = "a";
+ std::string s = "a";
for (int i = 0; i < FLAGS_size; i++)
s += "[ab]";
s += "b";
@@ -116,7 +116,7 @@ TEST(SingleThreaded, BuildEntireDFA) {
// DeBruijn string causes the DFA to need to create a new state at every
// position in the input, never reusing any states until it gets to the
// end of the string. This is the worst possible case for DFA execution.
-static string DeBruijnString(int n) {
+static std::string DeBruijnString(int n) {
CHECK_LT(n, static_cast<int>(8*sizeof(int)));
CHECK_GT(n, 0);
@@ -124,7 +124,7 @@ static string DeBruijnString(int n) {
for (int i = 0; i < 1<<n; i++)
did[i] = false;
- string s;
+ std::string s;
for (int i = 0; i < n-1; i++)
s.append("0");
int bits = 0;
@@ -180,8 +180,8 @@ TEST(SingleThreaded, SearchDFA) {
// The De Bruijn string for n ends with a 1 followed by n 0s in a row,
// which is not a match for 0[01]{n}$. Adding one more 0 is a match.
- string no_match = DeBruijnString(n);
- string match = no_match + "0";
+ std::string no_match = DeBruijnString(n);
+ std::string match = no_match + "0";
int64_t usage;
int64_t peak_usage;
@@ -243,8 +243,8 @@ TEST(Multithreaded, SearchDFA) {
Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
- string no_match = DeBruijnString(n);
- string match = no_match + "0";
+ std::string no_match = DeBruijnString(n);
+ std::string match = no_match + "0";
// Check that single-threaded code works.
{
@@ -356,7 +356,7 @@ TEST(DFA, Callback) {
ASSERT_TRUE(re != NULL);
Prog* prog = re->CompileToProg(0);
ASSERT_TRUE(prog != NULL);
- string dump;
+ std::string dump;
prog->BuildEntireDFA(Prog::kLongestMatch, [&](const int* next, bool match) {
ASSERT_TRUE(next != NULL);
if (!dump.empty())
diff --git a/re2/testing/dump.cc b/re2/testing/dump.cc
index b60bf24..743f7b5 100644
--- a/re2/testing/dump.cc
+++ b/re2/testing/dump.cc
@@ -57,7 +57,7 @@ static const char* kOpcodeNames[] = {
// Create string representation of regexp with explicit structure.
// Nothing pretty, just for testing.
-static void DumpRegexpAppending(Regexp* re, string* s) {
+static void DumpRegexpAppending(Regexp* re, std::string* s) {
if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
StringAppendF(s, "op%d", re->op());
} else {
@@ -136,7 +136,7 @@ static void DumpRegexpAppending(Regexp* re, string* s) {
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpCharClass: {
- string sep;
+ std::string sep;
for (CharClass::iterator it = re->cc()->begin();
it != re->cc()->end(); ++it) {
RuneRange rr = *it;
@@ -153,8 +153,8 @@ static void DumpRegexpAppending(Regexp* re, string* s) {
s->append("}");
}
-string Regexp::Dump() {
- string s;
+std::string Regexp::Dump() {
+ std::string s;
// Make sure being called from a unit test.
if (FLAGS_test_tmpdir.empty()) {
diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc
index 29c5def..9ead27e 100644
--- a/re2/testing/exhaustive1_test.cc
+++ b/re2/testing/exhaustive1_test.cc
@@ -16,7 +16,7 @@ namespace re2 {
// Test simple repetition operators
TEST(Repetition, Simple) {
- std::vector<string> ops = Split(" ",
+ std::vector<std::string> ops = Split(" ",
"%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
"%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
"%s* %s+ %s? %s*? %s+? %s??");
@@ -28,7 +28,7 @@ TEST(Repetition, Simple) {
// Test capturing parens -- (a) -- inside repetition operators
TEST(Repetition, Capturing) {
- std::vector<string> ops = Split(" ",
+ std::vector<std::string> ops = Split(" ",
"%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} "
"%s{1,2} %s{2} %s{2,} %s{3,4} %s{4,5} "
"%s* %s+ %s? %s*? %s+? %s??");
@@ -36,7 +36,7 @@ TEST(Repetition, Capturing) {
7, Explode("ab"), "(?:%s)", "");
// This would be a great test, but it runs forever when PCRE is enabled.
- if (FLAGS_regexp_engines.find("PCRE") == string::npos)
+ if (FLAGS_regexp_engines.find("PCRE") == std::string::npos)
ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
50, Explode("a"), "(?:%s)", "");
}
diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc
index ba38a6e..ce4235b 100644
--- a/re2/testing/exhaustive2_test.cc
+++ b/re2/testing/exhaustive2_test.cc
@@ -24,8 +24,8 @@ TEST(EmptyString, Exhaustive) {
// Test escaped versions of regexp syntax.
TEST(Punctuation, Literals) {
- std::vector<string> alphabet = Explode("()*+?{}[]\\^$.");
- std::vector<string> escaped = alphabet;
+ std::vector<std::string> alphabet = Explode("()*+?{}[]\\^$.");
+ std::vector<std::string> escaped = alphabet;
for (size_t i = 0; i < escaped.size(); i++)
escaped[i] = "\\" + escaped[i];
ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
@@ -63,7 +63,7 @@ TEST(LineEnds, Exhaustive) {
// provides a mechanism, and RE2 could add new syntax if needed.
//
// TEST(Newlines, Exhaustive) {
-// std::vector<string> empty_vector;
+// std::vector<std::string> empty_vector;
// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
// RegexpGenerator::EgrepOps(),
// 4, Explode("a\n"), "");
diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc
index cf09e18..1fe46b6 100644
--- a/re2/testing/exhaustive3_test.cc
+++ b/re2/testing/exhaustive3_test.cc
@@ -17,7 +17,7 @@ namespace re2 {
// Test simple character classes by themselves.
TEST(CharacterClasses, Exhaustive) {
- std::vector<string> atoms = Split(" ",
+ std::vector<std::string> atoms = Split(" ",
"[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
5, Explode("ab"), "", "");
@@ -25,25 +25,25 @@ TEST(CharacterClasses, Exhaustive) {
// Test simple character classes inside a___b (for example, a[a]b).
TEST(CharacterClasses, ExhaustiveAB) {
- std::vector<string> atoms = Split(" ",
+ std::vector<std::string> atoms = Split(" ",
"[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
5, Explode("ab"), "a%sb", "");
}
// Returns UTF8 for Rune r
-static string UTF8(Rune r) {
+static std::string UTF8(Rune r) {
char buf[UTFmax+1];
buf[runetochar(buf, &r)] = 0;
- return string(buf);
+ return std::string(buf);
}
// Returns a vector of "interesting" UTF8 characters.
// Unicode is now too big to just return all of them,
// so UTF8Characters return a set likely to be good test cases.
-static const std::vector<string>& InterestingUTF8() {
+static const std::vector<std::string>& InterestingUTF8() {
static bool init;
- static std::vector<string> v;
+ static std::vector<std::string> v;
if (init)
return v;
@@ -70,12 +70,12 @@ static const std::vector<string>& InterestingUTF8() {
// Test interesting UTF-8 characters against character classes.
TEST(InterestingUTF8, SingleOps) {
- std::vector<string> atoms = Split(" ",
+ std::vector<std::string> atoms = Split(" ",
". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
"[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
"[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
"[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
- std::vector<string> ops; // no ops
+ std::vector<std::string> ops; // no ops
ExhaustiveTest(1, 0, atoms, ops,
1, InterestingUTF8(), "", "");
}
@@ -83,13 +83,13 @@ TEST(InterestingUTF8, SingleOps) {
// Test interesting UTF-8 characters against character classes,
// but wrap everything inside AB.
TEST(InterestingUTF8, AB) {
- std::vector<string> atoms = Split(" ",
+ std::vector<std::string> atoms = Split(" ",
". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
"[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
"[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
"[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
- std::vector<string> ops; // no ops
- std::vector<string> alpha = InterestingUTF8();
+ std::vector<std::string> ops; // no ops
+ std::vector<std::string> alpha = InterestingUTF8();
for (size_t i = 0; i < alpha.size(); i++)
alpha[i] = "a" + alpha[i] + "b";
ExhaustiveTest(1, 0, atoms, ops,
diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc
index 4f6335f..7e5dd14 100644
--- a/re2/testing/exhaustive_tester.cc
+++ b/re2/testing/exhaustive_tester.cc
@@ -73,9 +73,9 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
// Processes a single generated regexp.
// Compiles it using Regexp interface and PCRE, and then
// checks that NFA, DFA, and PCRE all return the same results.
-void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
+void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
regexps_++;
- string regexp = const_regexp;
+ std::string regexp = const_regexp;
if (!topwrapper_.empty())
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
@@ -142,12 +142,12 @@ void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
// Runs an exhaustive test on the given parameters.
void ExhaustiveTest(int maxatoms, int maxops,
- const std::vector<string>& alphabet,
- const std::vector<string>& ops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
int maxstrlen,
- const std::vector<string>& stralphabet,
- const string& wrapper,
- const string& topwrapper) {
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper) {
if (RE2_DEBUG_MODE) {
if (maxatoms > 1)
maxatoms--;
@@ -169,9 +169,9 @@ void ExhaustiveTest(int maxatoms, int maxops,
// Runs an exhaustive test using the given parameters and
// the basic egrep operators.
-void EgrepTest(int maxatoms, int maxops, const string& alphabet,
- int maxstrlen, const string& stralphabet,
- const string& wrapper) {
+void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
+ int maxstrlen, const std::string& stralphabet,
+ const std::string& wrapper) {
const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
for (int i = 0; i < arraysize(tops); i++) {
diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h
index 769d8b5..3a14282 100644
--- a/re2/testing/exhaustive_tester.h
+++ b/re2/testing/exhaustive_tester.h
@@ -42,12 +42,12 @@ class ExhaustiveTester : public RegexpGenerator {
public:
ExhaustiveTester(int maxatoms,
int maxops,
- const std::vector<string>& alphabet,
- const std::vector<string>& ops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
int maxstrlen,
- const std::vector<string>& stralphabet,
- const string& wrapper,
- const string& topwrapper)
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper)
: RegexpGenerator(maxatoms, maxops, alphabet, ops),
strgen_(maxstrlen, stralphabet),
wrapper_(wrapper),
@@ -60,7 +60,7 @@ class ExhaustiveTester : public RegexpGenerator {
int failures() { return failures_; }
// Needed for RegexpGenerator interface.
- void HandleRegexp(const string& regexp);
+ void HandleRegexp(const std::string& regexp);
// Causes testing to generate random input strings.
void RandomStrings(int32_t seed, int32_t count) {
@@ -71,8 +71,8 @@ class ExhaustiveTester : public RegexpGenerator {
private:
StringGenerator strgen_;
- string wrapper_; // Regexp wrapper - either empty or has one %s.
- string topwrapper_; // Regexp top-level wrapper.
+ std::string wrapper_; // Regexp wrapper - either empty or has one %s.
+ std::string topwrapper_; // Regexp top-level wrapper.
int regexps_; // Number of HandleRegexp calls
int tests_; // Number of regexp tests.
int failures_; // Number of tests failed.
@@ -87,18 +87,18 @@ class ExhaustiveTester : public RegexpGenerator {
// Runs an exhaustive test on the given parameters.
void ExhaustiveTest(int maxatoms, int maxops,
- const std::vector<string>& alphabet,
- const std::vector<string>& ops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
int maxstrlen,
- const std::vector<string>& stralphabet,
- const string& wrapper,
- const string& topwrapper);
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper);
// Runs an exhaustive test using the given parameters and
// the basic egrep operators.
-void EgrepTest(int maxatoms, int maxops, const string& alphabet,
- int maxstrlen, const string& stralphabet,
- const string& wrapper);
+void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
+ int maxstrlen, const std::string& stralphabet,
+ const std::string& wrapper);
} // namespace re2
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
index 867eac6..835ebcf 100644
--- a/re2/testing/filtered_re2_test.cc
+++ b/re2/testing/filtered_re2_test.cc
@@ -19,7 +19,7 @@ struct FilterTestVars {
FilterTestVars() {}
explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {}
- std::vector<string> atoms;
+ std::vector<std::string> atoms;
std::vector<int> atom_indices;
std::vector<int> matches;
RE2::Options opts;
@@ -157,7 +157,7 @@ bool CheckExpectedAtoms(const char* atoms[],
int n,
const char* testname,
struct FilterTestVars* v) {
- std::vector<string> expected;
+ std::vector<std::string> expected;
for (int i = 0; i < n; i++)
expected.push_back(atoms[i]);
@@ -200,8 +200,8 @@ TEST(FilteredRE2Test, AtomTests) {
EXPECT_EQ(0, nfail);
}
-void FindAtomIndices(const std::vector<string>& atoms,
- const std::vector<string>& matched_atoms,
+void FindAtomIndices(const std::vector<std::string>& atoms,
+ const std::vector<std::string>& matched_atoms,
std::vector<int>* atom_indices) {
atom_indices->clear();
for (size_t i = 0; i < matched_atoms.size(); i++) {
@@ -220,13 +220,13 @@ TEST(FilteredRE2Test, MatchEmptyPattern) {
// We are using the regexps used in one of the atom tests
// for this test. Adding the EXPECT here to make sure
// the index we use for the test is for the correct test.
- EXPECT_EQ("CheckEmptyPattern", string(t->testname));
+ EXPECT_EQ("CheckEmptyPattern", std::string(t->testname));
int nregexp;
for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
if (t->regexps[nregexp] == NULL)
break;
AddRegexpsAndCompile(t->regexps, nregexp, &v);
- string text = "0123";
+ std::string text = "0123";
std::vector<int> atom_ids;
std::vector<int> matching_regexps;
EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
@@ -237,17 +237,17 @@ TEST(FilteredRE2Test, MatchTests) {
AtomTest* t = &atom_tests[2];
// We are using the regexps used in one of the atom tests
// for this test.
- EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname));
+ EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname));
int nregexp;
for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
if (t->regexps[nregexp] == NULL)
break;
AddRegexpsAndCompile(t->regexps, nregexp, &v);
- string text = "abc121212xyz";
+ std::string text = "abc121212xyz";
// atoms = abc
std::vector<int> atom_ids;
- std::vector<string> atoms;
+ std::vector<std::string> atoms;
atoms.push_back("abc");
FindAtomIndices(v.atoms, atoms, &atom_ids);
std::vector<int> matching_regexps;
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index d2b04fc..5cb3952 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -224,7 +224,7 @@ bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
}
void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
- const string& title) {
+ const std::string& title) {
Regexp** re = new Regexp*[ntests];
for (int i = 0; i < ntests; i++) {
RegexpStatus status;
@@ -235,14 +235,16 @@ void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
re[i] = Regexp::Parse(tests[i].regexp, f, &status);
ASSERT_TRUE(re[i] != NULL)
<< " " << tests[i].regexp << " " << status.Text();
- string s = re[i]->Dump();
- EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp
- << "\nparse: " << string(tests[i].parse) << " s: " << s << " flag=" << f;
+ std::string s = re[i]->Dump();
+ EXPECT_EQ(std::string(tests[i].parse), s)
+ << "Regexp: " << tests[i].regexp
+ << "\nparse: " << std::string(tests[i].parse)
+ << " s: " << s << " flag=" << f;
}
for (int i = 0; i < ntests; i++) {
for (int j = 0; j < ntests; j++) {
- EXPECT_EQ(string(tests[i].parse) == string(tests[j].parse),
+ EXPECT_EQ(std::string(tests[i].parse) == std::string(tests[j].parse),
RegexpEqualTestingOnly(re[i], re[j]))
<< "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
}
@@ -453,9 +455,12 @@ TEST(TestToString, EquivalentParse) {
}
Regexp* re = Regexp::Parse(tests[i].regexp, f, &status);
ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
- string s = re->Dump();
- EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << string(tests[i].parse) << " " << s;
- string t = re->ToString();
+ std::string s = re->Dump();
+ EXPECT_EQ(std::string(tests[i].parse), s)
+ << "Regexp: " << tests[i].regexp
+ << "\nparse: " << std::string(tests[i].parse)
+ << " s: " << s << " flag=" << f;
+ std::string t = re->ToString();
if (t != tests[i].regexp) {
// If ToString didn't return the original regexp,
// it must have found one with fewer parens.
@@ -468,8 +473,8 @@ TEST(TestToString, EquivalentParse) {
// Test that if we parse the new regexp we get the same structure.
Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
- string ss = nre->Dump();
- string tt = nre->ToString();
+ std::string ss = nre->Dump();
+ std::string tt = nre->ToString();
if (s != ss || t != tt)
LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
EXPECT_EQ(s, ss);
diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc
index f43a78b..438cb41 100644
--- a/re2/testing/possible_match_test.cc
+++ b/re2/testing/possible_match_test.cc
@@ -21,8 +21,8 @@ namespace re2 {
// Test that C++ strings are compared as uint8s, not int8s.
// PossibleMatchRange doesn't depend on this, but callers probably will.
TEST(CplusplusStrings, EightBit) {
- string s = "\x70";
- string t = "\xA0";
+ std::string s = "\x70";
+ std::string t = "\xA0";
EXPECT_LT(s, t);
}
@@ -110,7 +110,7 @@ TEST(PossibleMatchRange, HandWritten) {
for (int i = 0; i < arraysize(tests); i++) {
for (int j = 0; j < 2; j++) {
const PrefixTest& t = tests[i];
- string min, max;
+ std::string min, max;
if (j == 0) {
LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
@@ -132,7 +132,7 @@ TEST(PossibleMatchRange, HandWritten) {
// Test cases where PossibleMatchRange should return false.
TEST(PossibleMatchRange, Failures) {
- string min, max;
+ std::string min, max;
// Fails because no room to write max.
EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
@@ -172,10 +172,10 @@ class PossibleMatchTester : public RegexpGenerator {
public:
PossibleMatchTester(int maxatoms,
int maxops,
- const std::vector<string>& alphabet,
- const std::vector<string>& ops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
int maxstrlen,
- const std::vector<string>& stralphabet)
+ const std::vector<std::string>& stralphabet)
: RegexpGenerator(maxatoms, maxops, alphabet, ops),
strgen_(maxstrlen, stralphabet),
regexps_(0), tests_(0) { }
@@ -184,7 +184,7 @@ class PossibleMatchTester : public RegexpGenerator {
int tests() { return tests_; }
// Needed for RegexpGenerator interface.
- void HandleRegexp(const string& regexp);
+ void HandleRegexp(const std::string& regexp);
private:
StringGenerator strgen_;
@@ -198,7 +198,7 @@ class PossibleMatchTester : public RegexpGenerator {
// Processes a single generated regexp.
// Checks that all accepted strings agree with the prefix range.
-void PossibleMatchTester::HandleRegexp(const string& regexp) {
+void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
regexps_++;
VLOG(3) << CEscape(regexp);
@@ -206,7 +206,7 @@ void PossibleMatchTester::HandleRegexp(const string& regexp) {
RE2 re(regexp, RE2::Latin1);
ASSERT_EQ(re.error(), "");
- string min, max;
+ std::string min, max;
if(!re.PossibleMatchRange(&min, &max, 10)) {
// There's no good max for "\\C*". Can't use strcmp
// because sometimes it gets embedded in more
diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc
index bd0842f..c0b1fe5 100644
--- a/re2/testing/random_test.cc
+++ b/re2/testing/random_test.cc
@@ -22,11 +22,11 @@ namespace re2 {
// (Always uses the same random seeds for reproducibility.
// Can give different seeds on command line.)
static void RandomTest(int maxatoms, int maxops,
- const std::vector<string>& alphabet,
- const std::vector<string>& ops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
int maxstrlen,
- const std::vector<string>& stralphabet,
- const string& wrapper) {
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper) {
// Limit to smaller test cases in debug mode,
// because everything is so much slower.
if (RE2_DEBUG_MODE) {
@@ -79,7 +79,7 @@ TEST(Random, BigEgrepCaptures) {
// character classes like \d. (Adding larger character classes would
// make for too many possibilities.)
TEST(Random, Complicated) {
- std::vector<string> ops = Split(" ",
+ std::vector<std::string> ops = Split(" ",
"%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
"%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
"%s{2} %s{2,} %s{3,4} %s{4,5}");
@@ -87,11 +87,11 @@ TEST(Random, Complicated) {
// Use (?:\b) and (?:\B) instead of \b and \B,
// because PCRE rejects \b* but accepts (?:\b)*.
// Ditto ^ and $.
- std::vector<string> atoms = Split(" ",
+ std::vector<std::string> atoms = Split(" ",
". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
"\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
"a (a) b c - \\\\");
- std::vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
+ std::vector<std::string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
RandomTest(10, 10, atoms, ops, 20, alphabet, "");
}
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index cae956c..2d692a6 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -176,10 +176,10 @@ TEST(RE2, Replace) {
};
for (const ReplaceTest* t = tests; t->original != NULL; t++) {
- string one(t->original);
+ std::string one(t->original);
ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
ASSERT_EQ(one, t->single);
- string all(t->original);
+ std::string all(t->original);
ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
<< "Got: " << all;
ASSERT_EQ(all, t->global);
@@ -188,7 +188,7 @@ TEST(RE2, Replace) {
static void TestCheckRewriteString(const char* regexp, const char* rewrite,
bool expect_ok) {
- string error;
+ std::string error;
RE2 exp(regexp);
bool actual_ok = exp.CheckRewriteString(rewrite, &error);
EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
@@ -211,7 +211,7 @@ TEST(CheckRewriteString, all) {
}
TEST(RE2, Extract) {
- string s;
+ std::string s;
ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
ASSERT_EQ(s, "kremvax!boris");
@@ -225,9 +225,9 @@ TEST(RE2, Extract) {
TEST(RE2, Consume) {
RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
- string word;
+ std::string word;
- string s(" aaa b!@#$@#$cccc");
+ std::string s(" aaa b!@#$@#$cccc");
StringPiece input(s);
ASSERT_TRUE(RE2::Consume(&input, r, &word));
@@ -238,7 +238,7 @@ TEST(RE2, Consume) {
}
TEST(RE2, ConsumeN) {
- const string s(" one two three 4");
+ const std::string s(" one two three 4");
StringPiece input(s);
RE2::Arg argv[2];
@@ -248,7 +248,7 @@ TEST(RE2, ConsumeN) {
EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
// 1 arg
- string word;
+ std::string word;
argv[0] = &word;
EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
EXPECT_EQ("two", word);
@@ -263,9 +263,9 @@ TEST(RE2, ConsumeN) {
TEST(RE2, FindAndConsume) {
RE2 r("(\\w+)"); // matches a word
- string word;
+ std::string word;
- string s(" aaa b!@#$@#$cccc");
+ std::string s(" aaa b!@#$@#$cccc");
StringPiece input(s);
ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
@@ -285,7 +285,7 @@ TEST(RE2, FindAndConsume) {
}
TEST(RE2, FindAndConsumeN) {
- const string s(" one two three 4");
+ const std::string s(" one two three 4");
StringPiece input(s);
RE2::Arg argv[2];
@@ -295,7 +295,7 @@ TEST(RE2, FindAndConsumeN) {
EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
// 1 arg
- string word;
+ std::string word;
argv[0] = &word;
EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
EXPECT_EQ("two", word);
@@ -310,9 +310,9 @@ TEST(RE2, FindAndConsumeN) {
TEST(RE2, MatchNumberPeculiarity) {
RE2 r("(foo)|(bar)|(baz)");
- string word1;
- string word2;
- string word3;
+ std::string word1;
+ std::string word2;
+ std::string word3;
ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
ASSERT_EQ(word1, "foo");
@@ -328,7 +328,7 @@ TEST(RE2, MatchNumberPeculiarity) {
ASSERT_EQ(word3, "baz");
ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
- string a;
+ std::string a;
ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
ASSERT_EQ(a, "");
}
@@ -351,7 +351,7 @@ TEST(RE2, Match) {
ASSERT_EQ(group[2], "chrisr");
ASSERT_EQ(group[3], "9000");
- string all, host;
+ std::string all, host;
int port;
ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
ASSERT_EQ(all, "chrisr:9000");
@@ -361,7 +361,7 @@ TEST(RE2, Match) {
static void TestRecursion(int size, const char* pattern) {
// Fill up a string repeating the pattern given
- string domain;
+ std::string domain;
domain.resize(size);
size_t patlen = strlen(pattern);
for (int i = 0; i < size; i++) {
@@ -374,9 +374,9 @@ static void TestRecursion(int size, const char* pattern) {
// A meta-quoted string, interpreted as a pattern, should always match
// the original unquoted string.
-static void TestQuoteMeta(const string& unquoted,
+static void TestQuoteMeta(const std::string& unquoted,
const RE2::Options& options = RE2::DefaultOptions) {
- string quoted = RE2::QuoteMeta(unquoted);
+ std::string quoted = RE2::QuoteMeta(unquoted);
RE2 re(quoted, options);
EXPECT_TRUE(RE2::FullMatch(unquoted, re))
<< "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
@@ -385,9 +385,9 @@ static void TestQuoteMeta(const string& unquoted,
// A meta-quoted string, interpreted as a pattern, should always match
// the original unquoted string.
static void NegativeTestQuoteMeta(
- const string& unquoted, const string& should_not_match,
+ const std::string& unquoted, const std::string& should_not_match,
const RE2::Options& options = RE2::DefaultOptions) {
- string quoted = RE2::QuoteMeta(unquoted);
+ std::string quoted = RE2::QuoteMeta(unquoted);
RE2 re(quoted, options);
EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
<< "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
@@ -440,7 +440,7 @@ TEST(QuoteMeta, UTF8) {
}
TEST(QuoteMeta, HasNull) {
- string has_null;
+ std::string has_null;
// string with one null character
has_null += '\0';
@@ -543,14 +543,14 @@ TEST(Capture, NamedGroups) {
{
RE2 re("(hello world)");
ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
- const std::map<string, int>& m = re.NamedCapturingGroups();
+ const std::map<std::string, int>& m = re.NamedCapturingGroups();
ASSERT_EQ(m.size(), 0);
}
{
RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
- const std::map<string, int>& m = re.NamedCapturingGroups();
+ const std::map<std::string, int>& m = re.NamedCapturingGroups();
ASSERT_EQ(m.size(), 4);
ASSERT_EQ(m.find("A")->second, 1);
ASSERT_EQ(m.find("B")->second, 2);
@@ -563,7 +563,7 @@ TEST(RE2, CapturedGroupTest) {
RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
int num_groups = re.NumberOfCapturingGroups();
EXPECT_EQ(2, num_groups);
- string args[4];
+ std::string args[4];
RE2::Arg arg0(&args[0]);
RE2::Arg arg1(&args[1]);
RE2::Arg arg2(&args[2]);
@@ -572,7 +572,7 @@ TEST(RE2, CapturedGroupTest) {
const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
re, matches, num_groups));
- const std::map<string, int>& named_groups = re.NamedCapturingGroups();
+ const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
EXPECT_TRUE(named_groups.find("S") != named_groups.end());
EXPECT_TRUE(named_groups.find("D") != named_groups.end());
@@ -619,7 +619,7 @@ TEST(RE2, PartialMatchN) {
EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
// Multi-arg
- string s;
+ std::string s;
argv[1] = &s;
EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
EXPECT_EQ(42, i);
@@ -662,10 +662,10 @@ TEST(RE2, FullMatchIntegerArg) {
}
TEST(RE2, FullMatchStringArg) {
- string s;
+ std::string s;
// String-arg
ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
- ASSERT_EQ(s, string("ell"));
+ ASSERT_EQ(s, std::string("ell"));
}
TEST(RE2, FullMatchStringPieceArg) {
@@ -680,10 +680,10 @@ TEST(RE2, FullMatchStringPieceArg) {
TEST(RE2, FullMatchMultiArg) {
int i;
- string s;
+ std::string s;
// Multi-arg
ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
- ASSERT_EQ(s, string("ruby"));
+ ASSERT_EQ(s, std::string("ruby"));
ASSERT_EQ(i, 1234);
}
@@ -703,7 +703,7 @@ TEST(RE2, FullMatchN) {
EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
// Multi-arg
- string s;
+ std::string s;
argv[1] = &s;
EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
EXPECT_EQ(42, i);
@@ -713,26 +713,26 @@ TEST(RE2, FullMatchN) {
TEST(RE2, FullMatchIgnoredArg) {
int i;
- string s;
+ std::string s;
// Old-school NULL should be ignored.
ASSERT_TRUE(
RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
- ASSERT_EQ(s, string("ruby"));
+ ASSERT_EQ(s, std::string("ruby"));
ASSERT_EQ(i, 1234);
// C++11 nullptr should also be ignored.
ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
- ASSERT_EQ(s, string("rubz"));
+ ASSERT_EQ(s, std::string("rubz"));
ASSERT_EQ(i, 1235);
}
TEST(RE2, FullMatchTypedNullArg) {
- string s;
+ std::string s;
// Ignore non-void* NULL arg
ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
- ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
@@ -775,7 +775,7 @@ TEST(RE2, NULTerminated) {
TEST(RE2, FullMatchTypeTests) {
// Type tests
- string zeros(1000, '0');
+ std::string zeros(1000, '0');
{
char c;
ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
@@ -837,7 +837,7 @@ TEST(RE2, FullMatchTypeTests) {
int64_t v;
static const int64_t max = INT64_C(0x7fffffffffffffff);
static const int64_t min = -max - 1;
- string str;
+ std::string str;
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
@@ -862,7 +862,7 @@ TEST(RE2, FullMatchTypeTests) {
uint64_t v;
int64_t v2;
static const uint64_t max = UINT64_C(0xffffffffffffffff);
- string str;
+ std::string str;
ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
@@ -877,7 +877,7 @@ TEST(RE2, FullMatchTypeTests) {
}
TEST(RE2, FloatingPointFullMatchTypes) {
- string zeros(1000, '0');
+ std::string zeros(1000, '0');
{
float v;
ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
@@ -1056,7 +1056,7 @@ TEST(RE2, FullMatchArgCount) {
TEST(RE2, Accessors) {
// Check the pattern() accessor
{
- const string kPattern = "http://([^/]+)/.*";
+ const std::string kPattern = "http://([^/]+)/.*";
const RE2 re(kPattern);
ASSERT_EQ(kPattern, re.pattern());
}
@@ -1094,13 +1094,13 @@ TEST(RE2, UTF8) {
// Check that '.' matches one byte or UTF-8 character
// according to the mode.
- string s;
+ std::string s;
RE2 re_test3("(.)", RE2::Latin1);
ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
- ASSERT_EQ(s, string("\xe6"));
+ ASSERT_EQ(s, std::string("\xe6"));
RE2 re_test4("(.)");
ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
- ASSERT_EQ(s, string("\xe6\x97\xa5"));
+ ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
// Check that string matches itself in either mode
RE2 re_test5(utf8_string, RE2::Latin1);
@@ -1121,7 +1121,7 @@ TEST(RE2, UngreedyUTF8) {
{
// This code always worked.
const char* pattern = "\\w+X";
- const string target = "a aX";
+ const std::string target = "a aX";
RE2 match_sentence(pattern, RE2::Latin1);
RE2 match_sentence_re(pattern);
@@ -1130,7 +1130,7 @@ TEST(RE2, UngreedyUTF8) {
}
{
const char* pattern = "(?U)\\w+X";
- const string target = "a aX";
+ const std::string target = "a aX";
RE2 match_sentence(pattern, RE2::Latin1);
ASSERT_EQ(match_sentence.error(), "");
RE2 match_sentence_re(pattern);
@@ -1185,7 +1185,7 @@ TEST(RE2, NoCrash) {
{
RE2 re(".{512}x", RE2::Quiet);
ASSERT_TRUE(re.ok());
- string s;
+ std::string s;
s.append(515, 'c');
s.append("x");
ASSERT_TRUE(RE2::PartialMatch(s, re));
@@ -1210,7 +1210,7 @@ TEST(RE2, BigCountedRepetition) {
RE2 re(".{512}x", opt);
ASSERT_TRUE(re.ok());
- string s;
+ std::string s;
s.append(515, 'c');
s.append("x");
ASSERT_TRUE(RE2::PartialMatch(s, re));
@@ -1221,8 +1221,8 @@ TEST(RE2, DeepRecursion) {
// segmentation violation due to stack overflow before pcre was
// patched.
// Again, a PCRE legacy test. RE2 doesn't recurse.
- string comment("x*");
- string a(131072, 'a');
+ std::string comment("x*");
+ std::string a(131072, 'a');
comment += a;
comment += "*x";
RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
@@ -1232,8 +1232,8 @@ TEST(RE2, DeepRecursion) {
// Suggested by Josh Hyman. Failed when SearchOnePass was
// not implementing case-folding.
TEST(CaseInsensitive, MatchAndConsume) {
- string result;
- string text = "A fish named *Wanda*";
+ std::string result;
+ std::string text = "A fish named *Wanda*";
StringPiece sp(text);
EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
@@ -1243,7 +1243,7 @@ TEST(CaseInsensitive, MatchAndConsume) {
// RE2 should permit implicit conversions from string, StringPiece, const char*,
// and C string literals.
TEST(RE2, ImplicitConversions) {
- string re_string(".");
+ std::string re_string(".");
StringPiece re_stringpiece(".");
const char* re_cstring = ".";
EXPECT_TRUE(RE2::PartialMatch("e", re_string));
@@ -1255,12 +1255,12 @@ TEST(RE2, ImplicitConversions) {
// Bugs introduced by 8622304
TEST(RE2, CL8622304) {
// reported by ingow
- string dir;
+ std::string dir;
EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
// reported by jacobsa
- string key, val;
+ std::string key, val;
EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
"(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
&key,
@@ -1364,8 +1364,8 @@ TEST(RE2, BitstateCaptureBug) {
// C++ version of bug 609710.
TEST(RE2, UnicodeClasses) {
- const string str = "ABCDEFGHI譚永鋒";
- string a, b, c;
+ const std::string str = "ABCDEFGHI譚永鋒";
+ std::string a, b, c;
EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
@@ -1490,7 +1490,7 @@ TEST(RE2, NullVsEmptyStringSubmatches) {
TEST(RE2, Bug1816809) {
RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
StringPiece piece("llx-3;llx4");
- string x;
+ std::string x;
EXPECT_TRUE(RE2::Consume(&piece, re, &x));
}
@@ -1507,8 +1507,8 @@ TEST(RE2, CapturingGroupNames) {
// 12 3 45 6 7
RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
EXPECT_TRUE(re.ok());
- const std::map<int, string>& have = re.CapturingGroupNames();
- std::map<int, string> want;
+ const std::map<int, std::string>& have = re.CapturingGroupNames();
+ std::map<int, std::string> want;
want[3] = "G2";
want[6] = "G2";
want[7] = "G1";
@@ -1582,7 +1582,7 @@ TEST(RE2, Bug18523943) {
RE2 re((const char*)b, opt);
ASSERT_TRUE(re.ok());
- string s1;
+ std::string s1;
ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
}
@@ -1606,7 +1606,7 @@ TEST(RE2, Bug26356109) {
RE2 re("a\\C*?c|a\\C*?b");
ASSERT_TRUE(re.ok());
- string s = "abc";
+ std::string s = "abc";
StringPiece m;
ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
@@ -1620,7 +1620,7 @@ TEST(RE2, Issue104) {
// RE2::GlobalReplace always advanced by one byte when the empty string was
// matched, which would clobber any rune that is longer than one byte.
- string s = "bc";
+ std::string s = "bc";
ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
ASSERT_EQ("dbdcd", s);
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 8b82e0b..968fb86 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -34,6 +34,7 @@ void Test() {
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
+ CHECK(prog->CanBitState());
const char* text = "650-253-0001";
StringPiece sp[4];
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
@@ -61,6 +62,7 @@ void MemoryUsage() {
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
+ CHECK(prog->CanBitState());
fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
mc.Reset();
@@ -139,7 +141,7 @@ ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
// Generate random text that won't contain the search string,
// to test worst-case search behavior.
-void MakeText(string* text, int nbytes) {
+void MakeText(std::string* text, int nbytes) {
srand(1);
text->resize(nbytes);
for (int i = 0; i < nbytes; i++) {
@@ -156,7 +158,7 @@ void MakeText(string* text, int nbytes) {
// the text for regexp iters times.
void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, nbytes);
BenchmarkMemoryUsage();
StartBenchmarkTiming();
@@ -261,10 +263,10 @@ BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs(
void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
StopBenchmarkTiming();
- string s;
+ std::string s;
s.append(nbytes/2, 'x');
- string regexp = "^" + s + ".*$";
- string t;
+ std::string regexp = "^" + s + ".*$";
+ std::string t;
MakeText(&t, nbytes/2);
s += t;
BenchmarkMemoryUsage();
@@ -289,7 +291,7 @@ BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs
void FindAndConsume(int iters, int nbytes) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, nbytes);
s.append("Hello World");
StartBenchmarkTiming();
@@ -309,7 +311,7 @@ BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, nbytes);
BenchmarkMemoryUsage();
StartBenchmarkTiming();
@@ -383,7 +385,7 @@ BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPU
void SearchAltMatch(int iters, int nbytes, SearchImpl* search) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, nbytes);
BenchmarkMemoryUsage();
StartBenchmarkTiming();
@@ -604,7 +606,7 @@ BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
void Parse1SplitBig1(int iters,
void (*run)(int, const char*, const StringPiece&)) {
- string s;
+ std::string s;
s.append(100000, 'x');
s.append("650-253-0001");
BenchmarkMemoryUsage();
@@ -624,7 +626,7 @@ BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
void Parse1SplitBig2(int iters,
void (*run)(int, const char*, const StringPiece&)) {
- string s;
+ std::string s;
s.append("650-253-");
s.append(100000, '0');
BenchmarkMemoryUsage();
@@ -643,7 +645,7 @@ BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
// Benchmark: measure time required to parse (but not execute)
// a simple regular expression.
-void ParseRegexp(int iters, const string& regexp) {
+void ParseRegexp(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
@@ -651,7 +653,7 @@ void ParseRegexp(int iters, const string& regexp) {
}
}
-void SimplifyRegexp(int iters, const string& regexp) {
+void SimplifyRegexp(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
@@ -662,7 +664,7 @@ void SimplifyRegexp(int iters, const string& regexp) {
}
}
-void NullWalkRegexp(int iters, const string& regexp) {
+void NullWalkRegexp(int iters, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
for (int i = 0; i < iters; i++) {
@@ -671,7 +673,7 @@ void NullWalkRegexp(int iters, const string& regexp) {
re->Decref();
}
-void SimplifyCompileRegexp(int iters, const string& regexp) {
+void SimplifyCompileRegexp(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
@@ -685,7 +687,7 @@ void SimplifyCompileRegexp(int iters, const string& regexp) {
}
}
-void CompileRegexp(int iters, const string& regexp) {
+void CompileRegexp(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
@@ -696,7 +698,7 @@ void CompileRegexp(int iters, const string& regexp) {
}
}
-void CompileToProg(int iters, const string& regexp) {
+void CompileToProg(int iters, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
for (int i = 0; i < iters; i++) {
@@ -707,7 +709,7 @@ void CompileToProg(int iters, const string& regexp) {
re->Decref();
}
-void CompileByteMap(int iters, const string& regexp) {
+void CompileByteMap(int iters, const std::string& regexp) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
@@ -719,21 +721,22 @@ void CompileByteMap(int iters, const string& regexp) {
re->Decref();
}
-void CompilePCRE(int iters, const string& regexp) {
+void CompilePCRE(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
}
}
-void CompileRE2(int iters, const string& regexp) {
+void CompileRE2(int iters, const std::string& regexp) {
for (int i = 0; i < iters; i++) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
}
}
-void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
+void RunBuild(int iters, const std::string& regexp,
+ void (*run)(int, const std::string&)) {
run(iters, regexp);
SetBenchmarkItemsProcessed(iters);
}
@@ -770,7 +773,7 @@ BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
// the text for regexp iters times.
void SearchPhone(int iters, int nbytes, ParseImpl* search) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, nbytes);
s.append("(650) 253-0001");
BenchmarkMemoryUsage();
@@ -799,7 +802,7 @@ TODO(rsc): Make this work again.
// brute force method would generate a string of length n * 2^n, but this
// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-static string DeBruijnString(int n) {
+static std::string DeBruijnString(int n) {
CHECK_LT(n, 8*sizeof(int));
CHECK_GT(n, 0);
@@ -807,7 +810,7 @@ static string DeBruijnString(int n) {
for (int i = 0; i < 1<<n; i++)
did[i] = false;
- string s;
+ std::string s;
for (int i = 0; i < n-1; i++)
s.append("0");
int bits = 0;
@@ -828,8 +831,8 @@ static string DeBruijnString(int n) {
}
void CacheFill(int iters, int n, SearchImpl *srch) {
- string s = DeBruijnString(n+1);
- string t;
+ std::string s = DeBruijnString(n+1);
+ std::string t;
for (int i = n+1; i < 20; i++) {
t = s + s;
using std::swap;
@@ -932,6 +935,7 @@ void SearchBitState(int iters, const char* regexp, const StringPiece& text,
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
delete prog;
@@ -1019,6 +1023,7 @@ void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
for (int i = 0; i < iters; i++)
CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
expect_match);
@@ -1088,6 +1093,7 @@ void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
StringPiece sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
delete prog;
@@ -1158,6 +1164,7 @@ void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
StringPiece sp[4]; // 4 because sp[0] is whole match.
for (int i = 0; i < iters; i++)
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
@@ -1233,6 +1240,7 @@ void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
StringPiece sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
delete prog;
@@ -1290,6 +1298,7 @@ void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
+ CHECK(prog->CanBitState());
StringPiece sp[2]; // 2 because sp[0] is whole match.
for (int i = 0; i < iters; i++)
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
@@ -1403,7 +1412,7 @@ BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
#endif
BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
-static string http_text =
+static std::string http_text =
"GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
"alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
@@ -1428,7 +1437,7 @@ BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
#endif
BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
-static string smallhttp_text =
+static std::string smallhttp_text =
"GET /abc HTTP/1.1";
void SmallHTTPPartialMatchPCRE(int n) {
@@ -1496,7 +1505,7 @@ BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
void FullMatchPCRE(int iter, int n, const char *regexp) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, n);
s += "ABCDEFGHIJ";
BenchmarkMemoryUsage();
@@ -1509,7 +1518,7 @@ void FullMatchPCRE(int iter, int n, const char *regexp) {
void FullMatchRE2(int iter, int n, const char *regexp) {
StopBenchmarkTiming();
- string s;
+ std::string s;
MakeText(&s, n);
s += "ABCDEFGHIJ";
BenchmarkMemoryUsage();
@@ -1548,8 +1557,8 @@ void PossibleMatchRangeCommon(int iter, const char* regexp) {
StopBenchmarkTiming();
RE2 re(regexp);
StartBenchmarkTiming();
- string min;
- string max;
+ std::string min;
+ std::string max;
const int kMaxLen = 16;
for (int i = 0; i < iter; i++) {
CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index c0f26fe..1e4d3da 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -38,7 +38,7 @@
namespace re2 {
// Returns a vector of the egrep regexp operators.
-const std::vector<string>& RegexpGenerator::EgrepOps() {
+const std::vector<std::string>& RegexpGenerator::EgrepOps() {
static const char *ops[] = {
"%s%s",
"%s|%s",
@@ -47,13 +47,13 @@ const std::vector<string>& RegexpGenerator::EgrepOps() {
"%s?",
"%s\\C*",
};
- static std::vector<string> v(ops, ops + arraysize(ops));
+ static std::vector<std::string> v(ops, ops + arraysize(ops));
return v;
}
RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
- const std::vector<string>& atoms,
- const std::vector<string>& ops)
+ const std::vector<std::string>& atoms,
+ const std::vector<std::string>& ops)
: maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
// Degenerate case.
if (atoms_.empty())
@@ -65,7 +65,7 @@ RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
// Generates all possible regular expressions (within the parameters),
// calling HandleRegexp for each one.
void RegexpGenerator::Generate() {
- std::vector<string> postfix;
+ std::vector<std::string> postfix;
GeneratePostfix(&postfix, 0, 0, 0);
}
@@ -74,13 +74,13 @@ void RegexpGenerator::GenerateRandom(int32_t seed, int n) {
rng_.seed(seed);
for (int i = 0; i < n; i++) {
- std::vector<string> postfix;
+ std::vector<std::string> postfix;
GenerateRandomPostfix(&postfix, 0, 0, 0);
}
}
// Counts and returns the number of occurrences of "%s" in s.
-static int CountArgs(const string& s) {
+static int CountArgs(const std::string& s) {
const char *p = s.c_str();
int n = 0;
while ((p = strstr(p, "%s")) != NULL) {
@@ -103,8 +103,8 @@ static int CountArgs(const string& s) {
//
// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
//
-void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk,
- int ops, int atoms) {
+void RegexpGenerator::GeneratePostfix(std::vector<std::string>* post,
+ int nstk, int ops, int atoms) {
if (nstk == 1)
RunPostfix(*post);
@@ -126,7 +126,7 @@ void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk,
// Add operators if there are enough arguments.
if (ops < maxops_) {
for (size_t i = 0; i < ops_.size(); i++) {
- const string& fmt = ops_[i];
+ const std::string& fmt = ops_[i];
int nargs = CountArgs(fmt);
if (nargs <= nstk) {
post->push_back(fmt);
@@ -139,8 +139,8 @@ void RegexpGenerator::GeneratePostfix(std::vector<string>* post, int nstk,
// Generates a random postfix command sequence.
// Stops and returns true once a single sequence has been generated.
-bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk,
- int ops, int atoms) {
+bool RegexpGenerator::GenerateRandomPostfix(std::vector<std::string>* post,
+ int nstk, int ops, int atoms) {
std::uniform_int_distribution<int> random_stop(0, maxatoms_ - atoms);
std::uniform_int_distribution<int> random_bit(0, 1);
std::uniform_int_distribution<int> random_ops_index(
@@ -163,7 +163,7 @@ bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk,
// Add operators if there are enough arguments.
if (ops < maxops_ && random_bit(rng_) == 0) {
- const string& fmt = ops_[random_ops_index(rng_)];
+ const std::string& fmt = ops_[random_ops_index(rng_)];
int nargs = CountArgs(fmt);
if (nargs <= nstk) {
post->push_back(fmt);
@@ -189,8 +189,8 @@ bool RegexpGenerator::GenerateRandomPostfix(std::vector<string>* post, int nstk,
// Interprets the postfix command sequence to create a regular expression
// passed to HandleRegexp. The results of operators like %s|%s are wrapped
// in (?: ) to avoid needing to maintain a precedence table.
-void RegexpGenerator::RunPostfix(const std::vector<string>& post) {
- std::stack<string> regexps;
+void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
+ std::stack<std::string> regexps;
for (size_t i = 0; i < post.size(); i++) {
switch (CountArgs(post[i])) {
default:
@@ -199,15 +199,15 @@ void RegexpGenerator::RunPostfix(const std::vector<string>& post) {
regexps.push(post[i]);
break;
case 1: {
- string a = regexps.top();
+ std::string a = regexps.top();
regexps.pop();
regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
break;
}
case 2: {
- string b = regexps.top();
+ std::string b = regexps.top();
regexps.pop();
- string a = regexps.top();
+ std::string a = regexps.top();
regexps.pop();
regexps.push("(?:" +
StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
@@ -238,14 +238,14 @@ void RegexpGenerator::RunPostfix(const std::vector<string>& post) {
}
// Split s into an vector of strings, one for each UTF-8 character.
-std::vector<string> Explode(const StringPiece& s) {
- std::vector<string> v;
+std::vector<std::string> Explode(const StringPiece& s) {
+ std::vector<std::string> v;
for (const char *q = s.begin(); q < s.end(); ) {
const char* p = q;
Rune r;
q += chartorune(&r, q);
- v.push_back(string(p, q - p));
+ v.push_back(std::string(p, q - p));
}
return v;
@@ -253,8 +253,8 @@ std::vector<string> Explode(const StringPiece& s) {
// Split string everywhere a substring is found, returning
// vector of pieces.
-std::vector<string> Split(const StringPiece& sep, const StringPiece& s) {
- std::vector<string> v;
+std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
+ std::vector<std::string> v;
if (sep.size() == 0)
return Explode(s);
@@ -262,14 +262,14 @@ std::vector<string> Split(const StringPiece& sep, const StringPiece& s) {
const char *p = s.begin();
for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
if (StringPiece(q, sep.size()) == sep) {
- v.push_back(string(p, q - p));
+ v.push_back(std::string(p, q - p));
p = q + sep.size();
q = p - 1; // -1 for ++ in loop
continue;
}
}
if (p < s.end())
- v.push_back(string(p, s.end() - p));
+ v.push_back(std::string(p, s.end() - p));
return v;
}
diff --git a/re2/testing/regexp_generator.h b/re2/testing/regexp_generator.h
index b746399..7d72aff 100644
--- a/re2/testing/regexp_generator.h
+++ b/re2/testing/regexp_generator.h
@@ -29,8 +29,9 @@ namespace re2 {
//
class RegexpGenerator {
public:
- RegexpGenerator(int maxatoms, int maxops, const std::vector<string>& atoms,
- const std::vector<string>& ops);
+ RegexpGenerator(int maxatoms, int maxops,
+ const std::vector<std::string>& atoms,
+ const std::vector<std::string>& ops);
virtual ~RegexpGenerator() {}
// Generates all the regular expressions, calling HandleRegexp(re) for each.
@@ -40,22 +41,23 @@ class RegexpGenerator {
void GenerateRandom(int32_t seed, int n);
// Handles a regular expression. Must be provided by subclass.
- virtual void HandleRegexp(const string& regexp) = 0;
+ virtual void HandleRegexp(const std::string& regexp) = 0;
// The egrep regexp operators: * + ? | and concatenation.
- static const std::vector<string>& EgrepOps();
+ static const std::vector<std::string>& EgrepOps();
private:
- void RunPostfix(const std::vector<string>& post);
- void GeneratePostfix(std::vector<string>* post, int nstk, int ops, int lits);
- bool GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops,
- int lits);
-
- int maxatoms_; // Maximum number of atoms allowed in expr.
- int maxops_; // Maximum number of ops allowed in expr.
- std::vector<string> atoms_; // Possible atoms.
- std::vector<string> ops_; // Possible ops.
- std::minstd_rand0 rng_; // Random number generator.
+ void RunPostfix(const std::vector<std::string>& post);
+ void GeneratePostfix(std::vector<std::string>* post,
+ int nstk, int ops, int lits);
+ bool GenerateRandomPostfix(std::vector<std::string>* post,
+ int nstk, int ops, int lits);
+
+ int maxatoms_; // Maximum number of atoms allowed in expr.
+ int maxops_; // Maximum number of ops allowed in expr.
+ std::vector<std::string> atoms_; // Possible atoms.
+ std::vector<std::string> ops_; // Possible ops.
+ std::minstd_rand0 rng_; // Random number generator.
RegexpGenerator(const RegexpGenerator&) = delete;
RegexpGenerator& operator=(const RegexpGenerator&) = delete;
@@ -64,11 +66,11 @@ class RegexpGenerator {
// Helpers for preparing arguments to RegexpGenerator constructor.
// Returns one string for each character in s.
-std::vector<string> Explode(const StringPiece& s);
+std::vector<std::string> Explode(const StringPiece& s);
// Splits string everywhere sep is found, returning
// vector of pieces.
-std::vector<string> Split(const StringPiece& sep, const StringPiece& s);
+std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s);
} // namespace re2
diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc
index 7830322..f7e7e92 100644
--- a/re2/testing/regexp_test.cc
+++ b/re2/testing/regexp_test.cc
@@ -38,7 +38,7 @@ TEST(Regexp, BigConcat) {
ASSERT_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref();
Regexp* re = Regexp::Concat(v.data(), static_cast<int>(v.size()),
Regexp::NoParseFlags);
- ASSERT_EQ(re->ToString(), string(v.size(), 'x'));
+ ASSERT_EQ(re->ToString(), std::string(v.size(), 'x'));
re->Decref();
ASSERT_EQ(x->Ref(), 1) << x->Ref();
x->Decref();
@@ -51,11 +51,11 @@ TEST(Regexp, NamedCaptures) {
"(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
EXPECT_TRUE(status.ok());
EXPECT_EQ(4, x->NumCaptures());
- const std::map<string, int>* have = x->NamedCaptures();
+ const std::map<std::string, int>* have = x->NamedCaptures();
EXPECT_TRUE(have != NULL);
EXPECT_EQ(2, have->size()); // there are only two named groups in
// the regexp: 'g1' and 'g2'.
- std::map<string, int> want;
+ std::map<std::string, int> want;
want["g1"] = 1;
want["g2"] = 3;
EXPECT_EQ(want, *have);
@@ -70,10 +70,10 @@ TEST(Regexp, CaptureNames) {
"(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
EXPECT_TRUE(status.ok());
EXPECT_EQ(4, x->NumCaptures());
- const std::map<int, string>* have = x->CaptureNames();
+ const std::map<int, std::string>* have = x->CaptureNames();
EXPECT_TRUE(have != NULL);
EXPECT_EQ(3, have->size());
- std::map<int, string> want;
+ std::map<int, std::string> want;
want[1] = "g1";
want[3] = "g2";
want[4] = "g1";
diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
index 3f18d9b..749c5ad 100644
--- a/re2/testing/required_prefix_test.cc
+++ b/re2/testing/required_prefix_test.cc
@@ -49,18 +49,18 @@ TEST(RequiredPrefix, SimpleTests) {
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
ASSERT_TRUE(re != NULL) << " " << t.regexp;
- string p;
+ std::string p;
bool f;
Regexp* s;
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
<< " " << t.regexp << " " << (j==0 ? "latin1" : "utf")
<< " " << re->Dump();
if (t.return_value) {
- ASSERT_EQ(p, string(t.prefix))
+ ASSERT_EQ(p, std::string(t.prefix))
<< " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
ASSERT_EQ(f, t.foldcase)
<< " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
- ASSERT_EQ(s->ToString(), string(t.suffix))
+ ASSERT_EQ(s->ToString(), std::string(t.suffix))
<< " " << t.regexp << " " << (j==0 ? "latin1" : "utf");
s->Decref();
}
diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc
index 8adef6c..43a3952 100644
--- a/re2/testing/search_test.cc
+++ b/re2/testing/search_test.cc
@@ -307,6 +307,7 @@ RegexpTest simple_tests[] = {
// Former bugs.
{ "a\\C*|ba\\C", "baba" },
+ { "\\w*I\\w*", "Inc." },
};
TEST(Regexp, SearchTests) {
@@ -319,7 +320,7 @@ TEST(Regexp, SearchTests) {
if (LOGGING) {
// Build a dummy ExhaustiveTest call that will trigger just
// this one test, so that we log the test case.
- std::vector<string> atom, alpha, ops;
+ std::vector<std::string> atom, alpha, ops;
atom.push_back(t.regexp);
alpha.push_back(t.text);
ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");
diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc
index 5cdc11f..ad20ed7 100644
--- a/re2/testing/set_test.cc
+++ b/re2/testing/set_test.cc
@@ -204,7 +204,7 @@ TEST(Set, Prefix) {
TEST(Set, OutOfMemory) {
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
- string a(10000, 'a');
+ std::string a(10000, 'a');
ASSERT_EQ(s.Add(a, NULL), 0);
ASSERT_EQ(s.Compile(), true);
diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
index feef200..030cc45 100644
--- a/re2/testing/string_generator.cc
+++ b/re2/testing/string_generator.cc
@@ -18,7 +18,7 @@
namespace re2 {
StringGenerator::StringGenerator(int maxlen,
- const std::vector<string>& alphabet)
+ const std::vector<std::string>& alphabet)
: maxlen_(maxlen), alphabet_(alphabet),
generate_null_(false),
random_(false), nrandom_(0) {
diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h
index 5a36617..6184176 100644
--- a/re2/testing/string_generator.h
+++ b/re2/testing/string_generator.h
@@ -21,7 +21,7 @@ namespace re2 {
class StringGenerator {
public:
- StringGenerator(int maxlen, const std::vector<string>& alphabet);
+ StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
~StringGenerator() {}
const StringPiece& Next();
@@ -41,12 +41,12 @@ class StringGenerator {
bool RandomDigits();
// Global state.
- int maxlen_; // Maximum length string to generate.
- std::vector<string> alphabet_; // Alphabet, one string per letter.
+ int maxlen_; // Maximum length string to generate.
+ std::vector<std::string> alphabet_; // Alphabet, one string per letter.
// Iteration state.
StringPiece sp_; // Last StringPiece returned by Next().
- string s_; // String data in last StringPiece returned by Next().
+ std::string s_; // String data in last StringPiece returned by Next().
bool hasnext_; // Whether Next() can be called again.
std::vector<int> digits_; // Alphabet indices for next string.
bool generate_null_; // Whether to generate a NULL StringPiece next.
diff --git a/re2/testing/string_generator_test.cc b/re2/testing/string_generator_test.cc
index 2c040a3..d0f84f4 100644
--- a/re2/testing/string_generator_test.cc
+++ b/re2/testing/string_generator_test.cc
@@ -31,12 +31,12 @@ static int64_t IntegerPower(int i, int e) {
// If all of these hold, the StringGenerator is behaving.
// Assumes that the alphabet is sorted, so that the generated
// strings can just be compared lexicographically.
-static void RunTest(int len, const string& alphabet, bool donull) {
+static void RunTest(int len, const std::string& alphabet, bool donull) {
StringGenerator g(len, Explode(alphabet));
int n = 0;
int last_l = -1;
- string last_s;
+ std::string last_s;
if (donull) {
g.GenerateNULL();
@@ -47,7 +47,7 @@ static void RunTest(int len, const string& alphabet, bool donull) {
}
while (g.HasNext()) {
- string s = string(g.Next());
+ std::string s = std::string(g.Next());
n++;
// Check that all characters in s appear in alphabet.
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index c37aada..92b5972 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -66,7 +66,7 @@ static uint32_t Engines() {
cached_engines = ~0;
} else {
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
- if (FLAGS_regexp_engines.find(EngineName(i)) != string::npos)
+ if (FLAGS_regexp_engines.find(EngineName(i)) != std::string::npos)
cached_engines |= 1<<i;
}
@@ -97,7 +97,8 @@ typedef TestInstance::Result Result;
// Formats a single capture range s in text in the form (a,b)
// where a and b are the starting and ending offsets of s in text.
-static string FormatCapture(const StringPiece& text, const StringPiece& s) {
+static std::string FormatCapture(const StringPiece& text,
+ const StringPiece& s) {
if (s.begin() == NULL)
return "(?,?)";
return StringPrintf("(%td,%td)",
@@ -113,7 +114,7 @@ static bool NonASCII(const StringPiece& text) {
}
// Returns string representation of match kind.
-static string FormatKind(Prog::MatchKind kind) {
+static std::string FormatKind(Prog::MatchKind kind) {
switch (kind) {
case Prog::kFullMatch:
return "full match";
@@ -128,7 +129,7 @@ static string FormatKind(Prog::MatchKind kind) {
}
// Returns string representation of anchor kind.
-static string FormatAnchor(Prog::Anchor anchor) {
+static std::string FormatAnchor(Prog::Anchor anchor) {
switch (anchor) {
case Prog::kAnchored:
return "anchored";
@@ -140,7 +141,7 @@ static string FormatAnchor(Prog::Anchor anchor) {
struct ParseMode {
Regexp::ParseFlags parse_flags;
- string desc;
+ std::string desc;
};
static const Regexp::ParseFlags single_line =
@@ -156,7 +157,7 @@ static ParseMode parse_modes[] = {
{ multi_line|Regexp::Latin1, "multiline, latin1" },
};
-static string FormatMode(Regexp::ParseFlags flags) {
+static std::string FormatMode(Regexp::ParseFlags flags) {
for (int i = 0; i < arraysize(parse_modes); i++)
if (parse_modes[i].parse_flags == flags)
return parse_modes[i].desc;
@@ -220,7 +221,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
}
// Create re string that will be used for RE and RE2.
- string re = string(regexp_str);
+ std::string re = std::string(regexp_str);
// Accomodate flags.
// Regexp::Latin1 will be accomodated below.
if (!(flags & Regexp::OneLine))
@@ -364,8 +365,8 @@ void TestInstance::RunSearch(Engine type,
case kEngineOnePass:
if (prog_ == NULL ||
- anchor == Prog::kUnanchored ||
!prog_->IsOnePass() ||
+ anchor == Prog::kUnanchored ||
nsubmatch > Prog::kMaxOnePassCapture) {
result->skipped = true;
break;
@@ -376,7 +377,8 @@ void TestInstance::RunSearch(Engine type,
break;
case kEngineBitState:
- if (prog_ == NULL) {
+ if (prog_ == NULL ||
+ !prog_->CanBitState()) {
result->skipped = true;
break;
}
diff --git a/re2/tostring.cc b/re2/tostring.cc
index 278c310..2d06551 100644
--- a/re2/tostring.cc
+++ b/re2/tostring.cc
@@ -28,7 +28,7 @@ enum {
};
// Helper function. See description below.
-static void AppendCCRange(string* t, Rune lo, Rune hi);
+static void AppendCCRange(std::string* t, Rune lo, Rune hi);
// Walker to generate string in s_.
// The arg pointers are actually integers giving the
@@ -36,7 +36,7 @@ static void AppendCCRange(string* t, Rune lo, Rune hi);
// The child_args are always NULL.
class ToStringWalker : public Regexp::Walker<int> {
public:
- explicit ToStringWalker(string* t) : t_(t) {}
+ explicit ToStringWalker(std::string* t) : t_(t) {}
virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
@@ -46,14 +46,14 @@ class ToStringWalker : public Regexp::Walker<int> {
}
private:
- string* t_; // The string the walker appends to.
+ std::string* t_; // The string the walker appends to.
ToStringWalker(const ToStringWalker&) = delete;
ToStringWalker& operator=(const ToStringWalker&) = delete;
};
-string Regexp::ToString() {
- string t;
+std::string Regexp::ToString() {
+ std::string t;
ToStringWalker w(&t);
w.WalkExponential(this, PrecToplevel, 100000);
if (w.stopped_early())
@@ -126,7 +126,7 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
return nprec;
}
-static void AppendLiteral(string *t, Rune r, bool foldcase) {
+static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
t->append(1, '\\');
t->append(1, static_cast<char>(r));
@@ -303,7 +303,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
}
// Appends a rune for use in a character class to the string t.
-static void AppendCCChar(string* t, Rune r) {
+static void AppendCCChar(std::string* t, Rune r) {
if (0x20 <= r && r <= 0x7E) {
if (strchr("[]^-\\", r))
t->append("\\");
@@ -338,7 +338,7 @@ static void AppendCCChar(string* t, Rune r) {
StringAppendF(t, "\\x{%x}", static_cast<int>(r));
}
-static void AppendCCRange(string* t, Rune lo, Rune hi) {
+static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
if (lo > hi)
return;
AppendCCChar(t, lo);
diff --git a/util/flags.h b/util/flags.h
index 5af1320..e0f1f42 100644
--- a/util/flags.h
+++ b/util/flags.h
@@ -20,10 +20,10 @@
#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc)
-#define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc)
+#define DEFINE_string(name, deflt, desc) DEFINE_flag(std::string, name, deflt, desc)
#define DECLARE_bool(name) DECLARE_flag(bool, name)
#define DECLARE_int32(name) DECLARE_flag(int32_t, name)
-#define DECLARE_string(name) DECLARE_flag(string, name)
+#define DECLARE_string(name) DECLARE_flag(std::string, name)
#endif // UTIL_FLAGS_H_
diff --git a/util/logging.h b/util/logging.h
index c78f6c1..5b2217f 100644
--- a/util/logging.h
+++ b/util/logging.h
@@ -62,7 +62,7 @@ class LogMessage {
}
void Flush() {
stream() << "\n";
- string s = str_.str();
+ std::string s = str_.str();
size_t n = s.size();
if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
flushed_ = true;
diff --git a/util/pcre.cc b/util/pcre.cc
index 78de292..93ac90c 100644
--- a/util/pcre.cc
+++ b/util/pcre.cc
@@ -99,7 +99,7 @@ const PCRE::ConsumeFunctor PCRE::Consume = { };
const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
// If a regular expression has no error, its error_ field points here
-static const string empty_string;
+static const std::string empty_string;
void PCRE::Init(const char* pattern, Option options, int match_limit,
int stack_limit, bool report_errors) {
@@ -114,7 +114,7 @@ void PCRE::Init(const char* pattern, Option options, int match_limit,
re_partial_ = NULL;
if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
- error_ = new string("illegal regexp option");
+ error_ = new std::string("illegal regexp option");
PCREPORT(ERROR)
<< "Error compiling '" << pattern << "': illegal regexp option";
} else {
@@ -131,13 +131,13 @@ PCRE::PCRE(const char* pattern) {
PCRE::PCRE(const char* pattern, Option option) {
Init(pattern, option, 0, 0, true);
}
-PCRE::PCRE(const string& pattern) {
+PCRE::PCRE(const std::string& pattern) {
Init(pattern.c_str(), None, 0, 0, true);
}
-PCRE::PCRE(const string& pattern, Option option) {
+PCRE::PCRE(const std::string& pattern, Option option) {
Init(pattern.c_str(), option, 0, 0, true);
}
-PCRE::PCRE(const string& pattern, const PCRE_Options& re_option) {
+PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
re_option.stack_limit(), re_option.report_errors());
}
@@ -176,7 +176,7 @@ pcre* PCRE::Compile(Anchor anchor) {
} else {
// Tack a '\z' at the end of PCRE. Parenthesize it first so that
// the '\z' applies to all top-level alternatives in the regexp.
- string wrapped = "(?:"; // A non-counting grouping operator
+ std::string wrapped = "(?:"; // A non-counting grouping operator
wrapped += pattern_;
wrapped += ")\\z";
re = pcre_compile(wrapped.c_str(),
@@ -184,7 +184,7 @@ pcre* PCRE::Compile(Anchor anchor) {
&error, &eoffset, NULL);
}
if (re == NULL) {
- if (error_ == &empty_string) error_ = new string(error);
+ if (error_ == &empty_string) error_ = new std::string(error);
PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
}
return re;
@@ -376,7 +376,7 @@ done:
}
}
-bool PCRE::Replace(string *str,
+bool PCRE::Replace(std::string *str,
const PCRE& pattern,
const StringPiece& rewrite) {
int vec[kVecSize] = {};
@@ -384,7 +384,7 @@ bool PCRE::Replace(string *str,
if (matches == 0)
return false;
- string s;
+ std::string s;
if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
return false;
@@ -394,12 +394,12 @@ bool PCRE::Replace(string *str,
return true;
}
-int PCRE::GlobalReplace(string *str,
+int PCRE::GlobalReplace(std::string *str,
const PCRE& pattern,
const StringPiece& rewrite) {
int count = 0;
int vec[kVecSize] = {};
- string out;
+ std::string out;
size_t start = 0;
bool last_match_was_empty_string = false;
@@ -455,7 +455,7 @@ int PCRE::GlobalReplace(string *str,
bool PCRE::Extract(const StringPiece &text,
const PCRE& pattern,
const StringPiece &rewrite,
- string *out) {
+ std::string *out) {
int vec[kVecSize] = {};
int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
if (matches == 0)
@@ -464,8 +464,8 @@ bool PCRE::Extract(const StringPiece &text,
return pattern.Rewrite(out, rewrite, text, vec, matches);
}
-string PCRE::QuoteMeta(const StringPiece& unquoted) {
- string result;
+std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
+ std::string result;
result.reserve(unquoted.size() << 1);
// Escape any ascii character not in [A-Za-z_0-9].
@@ -669,7 +669,7 @@ bool PCRE::DoMatch(const StringPiece& text,
return b;
}
-bool PCRE::Rewrite(string *out, const StringPiece &rewrite,
+bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
const StringPiece &text, int *vec, int veclen) const {
int number_of_capturing_groups = NumberOfCapturingGroups();
for (const char *s = rewrite.data(), *end = s + rewrite.size();
@@ -705,7 +705,8 @@ bool PCRE::Rewrite(string *out, const StringPiece &rewrite,
return true;
}
-bool PCRE::CheckRewriteString(const StringPiece& rewrite, string* error) const {
+bool PCRE::CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -769,7 +770,7 @@ bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
if (dest == NULL) return true;
- reinterpret_cast<string*>(dest)->assign(str, n);
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
return true;
}
diff --git a/util/pcre.h b/util/pcre.h
index 10ec4f2..644dce6 100644
--- a/util/pcre.h
+++ b/util/pcre.h
@@ -67,7 +67,7 @@
//
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
-// string s;
+// std::string s;
// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
// Example: fails because string cannot be stored in integer
@@ -124,10 +124,10 @@
// which represents a sub-range of a real string.
//
// Example: read lines of the form "var = value" from a string.
-// string contents = ...; // Fill string somehow
+// std::string contents = ...; // Fill string somehow
// StringPiece input(contents); // Wrap a StringPiece around it
//
-// string var;
+// std::string var;
// int value;
// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
// ...;
@@ -212,21 +212,21 @@ class PCRE {
// pass in a string or a "const char*" wherever an "PCRE" is expected.
PCRE(const char* pattern);
PCRE(const char* pattern, Option option);
- PCRE(const string& pattern);
- PCRE(const string& pattern, Option option);
+ PCRE(const std::string& pattern);
+ PCRE(const std::string& pattern, Option option);
PCRE(const char *pattern, const PCRE_Options& re_option);
- PCRE(const string& pattern, const PCRE_Options& re_option);
+ PCRE(const std::string& pattern, const PCRE_Options& re_option);
~PCRE();
// The string specification for this PCRE. E.g.
// PCRE re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
- const string& pattern() const { return pattern_; }
+ const std::string& pattern() const { return pattern_; }
// If PCRE could not be created properly, returns an error string.
// Else returns the empty string.
- const string& error() const { return *error_; }
+ const std::string& error() const { return *error_; }
// Whether the PCRE has hit a match limit during execution.
// Not thread safe. Intended only for testing.
@@ -241,12 +241,12 @@ class PCRE {
// Matches "text" against "pattern". If pointer arguments are
// supplied, copies matched sub-patterns into them.
//
- // You can pass in a "const char*" or a "string" for "text".
- // You can pass in a "const char*" or a "string" or a "PCRE" for "pattern".
+ // You can pass in a "const char*" or a "std::string" for "text".
+ // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of:
- // string (matched piece is copied to string)
+ // std::string (matched piece is copied to string)
// StringPiece (StringPiece is mutated to point to matched piece)
// T (where "bool T::ParseFrom(const char*, size_t)" exists)
// (void*)NULL (the corresponding matched sub-pattern is not copied)
@@ -369,14 +369,14 @@ class PCRE {
// from the pattern. \0 in "rewrite" refers to the entire matching
// text. E.g.,
//
- // string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(PCRE::Replace(&s, "b+", "d"));
//
// will leave "s" containing "yada dabba doo"
//
// Returns true if the pattern matches and a replacement occurs,
// false otherwise.
- static bool Replace(string *str,
+ static bool Replace(std::string *str,
const PCRE& pattern,
const StringPiece& rewrite);
@@ -384,13 +384,13 @@ class PCRE {
// the string with the rewrite. Replacements are not subject to
// re-matching. E.g.,
//
- // string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
//
// will leave "s" containing "yada dada doo"
//
// Returns the number of replacements made.
- static int GlobalReplace(string *str,
+ static int GlobalReplace(std::string *str,
const PCRE& pattern,
const StringPiece& rewrite);
@@ -403,7 +403,7 @@ class PCRE {
static bool Extract(const StringPiece &text,
const PCRE& pattern,
const StringPiece &rewrite,
- string *out);
+ std::string *out);
// Check that the given @p rewrite string is suitable for use with
// this PCRE. It checks that:
@@ -418,7 +418,8 @@ class PCRE {
// @param error An error message is recorded here, iff we return false.
// Otherwise, it is unchanged.
// @return true, iff @p rewrite is suitable for use with the PCRE.
- bool CheckRewriteString(const StringPiece& rewrite, string* error) const;
+ bool CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const;
// Returns a copy of 'unquoted' with all potentially meaningful
// regexp characters backslash-escaped. The returned string, used
@@ -427,7 +428,7 @@ class PCRE {
// 1.5-2.0?
// becomes:
// 1\.5\-2\.0\?
- static string QuoteMeta(const StringPiece& unquoted);
+ static std::string QuoteMeta(const StringPiece& unquoted);
/***** Generic matching interface (not so nice to use) *****/
@@ -473,7 +474,7 @@ class PCRE {
// Append the "rewrite" string, with backslash subsitutions from "text"
// and "vec", to string "out".
- bool Rewrite(string *out,
+ bool Rewrite(std::string *out,
const StringPiece &rewrite,
const StringPiece &text,
int *vec,
@@ -491,15 +492,15 @@ class PCRE {
// Compile the regexp for the specified anchoring mode
pcre* Compile(Anchor anchor);
- string pattern_;
- Option options_;
- pcre* re_full_; // For full matches
- pcre* re_partial_; // For partial matches
- const string* error_; // Error indicator (or empty string)
- bool report_errors_; // Silences error logging if false
- int match_limit_; // Limit on execution resources
- int stack_limit_; // Limit on stack resources (bytes)
- mutable int32_t hit_limit_; // Hit limit during execution (bool)?
+ std::string pattern_;
+ Option options_;
+ pcre* re_full_; // For full matches
+ pcre* re_partial_; // For partial matches
+ const std::string* error_; // Error indicator (or empty string)
+ bool report_errors_; // Silences error logging if false
+ int match_limit_; // Limit on execution resources
+ int stack_limit_; // Limit on stack resources (bytes)
+ mutable int32_t hit_limit_; // Hit limit during execution (bool)
PCRE(const PCRE&) = delete;
PCRE& operator=(const PCRE&) = delete;
@@ -584,7 +585,7 @@ class PCRE::Arg {
MAKE_PARSER(unsigned char, parse_uchar);
MAKE_PARSER(float, parse_float);
MAKE_PARSER(double, parse_double);
- MAKE_PARSER(string, parse_string);
+ MAKE_PARSER(std::string, parse_string);
MAKE_PARSER(StringPiece, parse_stringpiece);
MAKE_PARSER(short, parse_short);
diff --git a/util/strutil.cc b/util/strutil.cc
index 8eabfa4..cc3b857 100644
--- a/util/strutil.cc
+++ b/util/strutil.cc
@@ -65,17 +65,17 @@ static size_t CEscapeString(const char* src, size_t src_len,
// Copies 'src' to result, escaping dangerous characters using
// C-style escape sequences. 'src' and 'dest' should not overlap.
// ----------------------------------------------------------------------
-string CEscape(const StringPiece& src) {
+std::string CEscape(const StringPiece& src) {
const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
char* dest = new char[dest_len];
const size_t used = CEscapeString(src.data(), src.size(),
dest, dest_len);
- string s = string(dest, used);
+ std::string s = std::string(dest, used);
delete[] dest;
return s;
}
-void PrefixSuccessor(string* prefix) {
+void PrefixSuccessor(std::string* prefix) {
// We can increment the last character in the string and be done
// unless that character is 255, in which case we have to erase the
// last character and increment the previous character, unless that
@@ -92,7 +92,7 @@ void PrefixSuccessor(string* prefix) {
}
}
-static void StringAppendV(string* dst, const char* format, va_list ap) {
+static void StringAppendV(std::string* dst, const char* format, va_list ap) {
// First try with a small fixed size buffer
char space[1024];
@@ -137,16 +137,16 @@ static void StringAppendV(string* dst, const char* format, va_list ap) {
}
}
-string StringPrintf(const char* format, ...) {
+std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
- string result;
+ std::string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
}
-void SStringPrintf(string* dst, const char* format, ...) {
+void SStringPrintf(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
dst->clear();
@@ -154,7 +154,7 @@ void SStringPrintf(string* dst, const char* format, ...) {
va_end(ap);
}
-void StringAppendF(string* dst, const char* format, ...) {
+void StringAppendF(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
StringAppendV(dst, format, ap);
diff --git a/util/strutil.h b/util/strutil.h
index 2c3c104..b16981e 100644
--- a/util/strutil.h
+++ b/util/strutil.h
@@ -12,11 +12,11 @@
namespace re2 {
-string CEscape(const StringPiece& src);
-void PrefixSuccessor(string* prefix);
-string StringPrintf(const char* format, ...);
-void SStringPrintf(string* dst, const char* format, ...);
-void StringAppendF(string* dst, const char* format, ...);
+std::string CEscape(const StringPiece& src);
+void PrefixSuccessor(std::string* prefix);
+std::string StringPrintf(const char* format, ...);
+void SStringPrintf(std::string* dst, const char* format, ...);
+void StringAppendF(std::string* dst, const char* format, ...);
} // namespace re2
diff --git a/util/util.h b/util/util.h
index 33d100a..3f75794 100644
--- a/util/util.h
+++ b/util/util.h
@@ -5,10 +5,6 @@
#ifndef UTIL_UTIL_H_
#define UTIL_UTIL_H_
-// TODO(junyer): Get rid of this.
-#include <string>
-using std::string;
-
#define arraysize(array) (int)(sizeof(array)/sizeof((array)[0]))
#ifndef ATTRIBUTE_NORETURN