summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2020-09-23 15:41:35 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2020-09-23 15:41:35 +0900
commitc07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11 (patch)
treef7ab832818b226eb03bcd9752a834ed9ee23f8c7
parentf052642278b0c5ca48bdb9bbef1cafeaafc509c4 (diff)
downloadre2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.gz
re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.bz2
re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.zip
Imported Upstream version 20191201upstream/20191201
-rw-r--r--re2/bitstate.cc4
-rw-r--r--re2/nfa.cc23
-rw-r--r--re2/parse.cc62
-rw-r--r--re2/re2.cc4
-rw-r--r--re2/testing/backtrack.cc4
-rw-r--r--re2/testing/regexp_generator.cc2
-rw-r--r--re2/testing/tester.cc2
-rw-r--r--re2/walker-inl.h2
8 files changed, 64 insertions, 39 deletions
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 5cbc070..f15c1e4 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -342,6 +342,10 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ if (p == NULL)
+ break;
}
return false;
}
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 3889f11..77fb5fb 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -382,10 +382,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
break;
case kInstMatch: {
- // Avoid invoking undefined behavior when p happens
- // to be null - and p-1 would be meaningless anyway.
- if (p == NULL)
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by storing p instead of p-1. (What would the latter even mean?!)
+ // This complements the special case in NFA::Search().
+ if (p == NULL) {
+ CopyCapture(match_, t->capture);
+ match_[1] = p;
+ matched_ = true;
break;
+ }
if (endmatch_ && p-1 != etext_)
break;
@@ -593,6 +598,18 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
fprintf(stderr, "dead\n");
break;
}
+
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ // This complements the special case in NFA::Step().
+ if (p == NULL) {
+ (void)Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
+ DCHECK_EQ(runq->size(), 0);
+ using std::swap;
+ swap(nextq, runq);
+ nextq->clear();
+ break;
+ }
}
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
diff --git a/re2/parse.cc b/re2/parse.cc
index 11f8b3b..50dfdac 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -1323,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
// Parses a decimal integer, storing it in *np.
// Sets *s to span the remainder of the string.
static bool ParseInteger(StringPiece* s, int* np) {
- if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
+ if (s->empty() || !isdigit((*s)[0] & 0xFF))
return false;
// Disallow leading zeros.
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
return false;
int n = 0;
int c;
- while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
+ while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
// Avoid overflow.
if (n >= 100000000)
return false;
@@ -1352,16 +1352,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
// s must NOT be edited unless MaybeParseRepetition returns true.
static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
StringPiece s = *sp;
- if (s.size() == 0 || s[0] != '{')
+ if (s.empty() || s[0] != '{')
return false;
s.remove_prefix(1); // '{'
if (!ParseInteger(&s, lo))
return false;
- if (s.size() == 0)
+ if (s.empty())
return false;
if (s[0] == ',') {
s.remove_prefix(1); // ','
- if (s.size() == 0)
+ if (s.empty())
return false;
if (s[0] == '}') {
// {2,} means at least 2
@@ -1375,7 +1375,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
// {2} means exactly two
*hi = *lo;
}
- if (s.size() == 0 || s[0] != '}')
+ if (s.empty() || s[0] != '}')
return false;
s.remove_prefix(1); // '}'
*sp = s;
@@ -1416,7 +1416,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
StringPiece t = s;
Rune r;
- while (t.size() > 0) {
+ while (!t.empty()) {
if (StringPieceToRune(&r, &t, status) < 0)
return false;
}
@@ -1448,13 +1448,13 @@ static int UnHex(int c) {
static bool ParseEscape(StringPiece* s, Rune* rp,
RegexpStatus* status, int rune_max) {
const char* begin = s->data();
- if (s->size() < 1 || (*s)[0] != '\\') {
+ if (s->empty() || (*s)[0] != '\\') {
// Should not happen - caller always checks.
status->set_code(kRegexpInternalError);
status->set_error_arg(StringPiece());
return false;
}
- if (s->size() < 2) {
+ if (s->size() == 1) {
status->set_code(kRegexpTrailingBackslash);
status->set_error_arg(StringPiece());
return false;
@@ -1485,16 +1485,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
case '6':
case '7':
// Single non-zero octal digit is a backreference; not supported.
- if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
+ if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
goto BadEscape;
FALLTHROUGH_INTENDED;
case '0':
// consume up to three octal digits; already have one.
code = c - '0';
- if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
+ if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
code = code * 8 + c - '0';
s->remove_prefix(1); // digit
- if (s->size() > 0) {
+ if (!s->empty()) {
c = (*s)[0];
if ('0' <= c && c <= '7') {
code = code * 8 + c - '0';
@@ -1509,7 +1509,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Hexadecimal escapes
case 'x':
- if (s->size() == 0)
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c, s, status) < 0)
return false;
@@ -1529,7 +1529,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
code = code * 16 + UnHex(c);
if (code > rune_max)
goto BadEscape;
- if (s->size() == 0)
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c, s, status) < 0)
return false;
@@ -1540,7 +1540,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
return true;
}
// Easy case: two hex digits.
- if (s->size() == 0)
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c1, s, status) < 0)
return false;
@@ -1771,7 +1771,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Chop seq where s now begins.
seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
- if (name.size() > 0 && name[0] == '^') {
+ if (!name.empty() && name[0] == '^') {
sign = -sign;
name.remove_prefix(1); // '^'
}
@@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
const StringPiece& whole_class,
RegexpStatus* status) {
- if (s->size() == 0) {
+ if (s->empty()) {
status->set_code(kRegexpMissingBracket);
status->set_error_arg(whole_class);
return false;
@@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
// Allow regular escape sequences even though
// many need not be escaped in this context.
- if (s->size() >= 1 && (*s)[0] == '\\')
+ if ((*s)[0] == '\\')
return ParseEscape(s, rp, status, rune_max_);
// Otherwise take the next rune.
@@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
Regexp** out_re,
RegexpStatus* status) {
StringPiece whole_class = *s;
- if (s->size() == 0 || (*s)[0] != '[') {
+ if (s->empty() || (*s)[0] != '[') {
// Caller checked this.
status->set_code(kRegexpInternalError);
status->set_error_arg(StringPiece());
@@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
re->ccb_ = new CharClassBuilder;
s->remove_prefix(1); // '['
- if (s->size() > 0 && (*s)[0] == '^') {
+ if (!s->empty() && (*s)[0] == '^') {
s->remove_prefix(1); // '^'
negated = true;
if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
@@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
}
}
bool first = true; // ] is okay as first char in class
- while (s->size() > 0 && ((*s)[0] != ']' || first)) {
+ while (!s->empty() && ((*s)[0] != ']' || first)) {
// - is only okay unescaped as first or last in class.
// Except that Perl allows - anywhere.
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
@@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
// in the flags.
re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
}
- if (s->size() == 0) {
+ if (s->empty()) {
status->set_code(kRegexpMissingBracket);
status->set_error_arg(whole_class);
re->Decref();
@@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
// Python rejects names starting with digits.
// We don't enforce either of those.
static bool IsValidCaptureName(const StringPiece& name) {
- if (name.size() == 0)
+ if (name.empty())
return false;
for (size_t i = 0; i < name.size(); i++) {
int c = name[i];
@@ -2099,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
int nflags = flags_;
Rune c;
for (bool done = false; !done; ) {
- if (t.size() == 0)
+ if (t.empty())
goto BadPerlOp;
if (StringPieceToRune(&c, &t, status_) < 0)
return false;
@@ -2217,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (global_flags & Literal) {
// Special parse loop for literal string.
- while (t.size() > 0) {
+ while (!t.empty()) {
Rune r;
if (StringPieceToRune(&r, &t, status) < 0)
return NULL;
@@ -2228,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
}
StringPiece lastunary = StringPiece();
- while (t.size() > 0) {
+ while (!t.empty()) {
StringPiece isunary = StringPiece();
switch (t[0]) {
default: {
@@ -2312,11 +2312,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
bool nongreedy = false;
t.remove_prefix(1); // '*' or '+' or '?'
if (ps.flags() & PerlX) {
- if (t.size() > 0 && t[0] == '?') {
+ if (!t.empty() && t[0] == '?') {
nongreedy = true;
t.remove_prefix(1); // '?'
}
- if (lastunary.size() > 0) {
+ if (!lastunary.empty()) {
// In Perl it is not allowed to stack repetition operators:
// a** is a syntax error, not a double-star.
// (and a++ means something else entirely, which we don't support!)
@@ -2347,11 +2347,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
}
bool nongreedy = false;
if (ps.flags() & PerlX) {
- if (t.size() > 0 && t[0] == '?') {
+ if (!t.empty() && t[0] == '?') {
nongreedy = true;
t.remove_prefix(1); // '?'
}
- if (lastunary.size() > 0) {
+ if (!lastunary.empty()) {
// Not allowed to stack repetition operators.
status->set_code(kRegexpRepeatOp);
status->set_error_arg(StringPiece(
@@ -2405,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
t.remove_prefix(2); // '\\', 'Q'
- while (t.size() > 0) {
+ while (!t.empty()) {
if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
t.remove_prefix(2); // '\\', 'E'
break;
diff --git a/re2/re2.cc b/re2/re2.cc
index 5156fe5..a8dd24b 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -408,7 +408,7 @@ int RE2::GlobalReplace(std::string* str,
break;
if (p < vec[0].data())
out.append(p, vec[0].data() - p);
- if (vec[0].data() == lastend && vec[0].size() == 0) {
+ if (vec[0].data() == lastend && vec[0].empty()) {
// Disallow empty match at end of last match: skip ahead.
//
// fullrune() takes int, not ptrdiff_t. However, it just looks
@@ -934,7 +934,7 @@ bool RE2::Rewrite(std::string* out,
return false;
}
StringPiece snip = vec[n];
- if (snip.size() > 0)
+ if (!snip.empty())
out->append(snip.data(), snip.size());
} else if (c == '\\') {
out->push_back('\\');
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index 6cde42d..1e888da 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -148,6 +148,10 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
cap_[0] = p;
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
return true;
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ if (p == NULL)
+ break;
}
return false;
}
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index d156c88..3eeda25 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -256,7 +256,7 @@ std::vector<std::string> Explode(const StringPiece& s) {
std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
std::vector<std::string> v;
- if (sep.size() == 0)
+ if (sep.empty())
return Explode(s);
const char *p = s.data();
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index 86eec53..67d262c 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -645,7 +645,7 @@ static Prog::Anchor anchors[] = {
bool Tester::TestInput(const StringPiece& text) {
bool okay = TestInputInContext(text, text);
- if (text.size() > 0) {
+ if (!text.empty()) {
StringPiece sp;
sp = text;
sp.remove_prefix(1);
diff --git a/re2/walker-inl.h b/re2/walker-inl.h
index 032b8ac..310be54 100644
--- a/re2/walker-inl.h
+++ b/re2/walker-inl.h
@@ -150,7 +150,7 @@ template<typename T> void Regexp::Walker<T>::Reset() {
if (stack_ && stack_->size() > 0) {
LOG(DFATAL) << "Stack not empty.";
while (stack_->size() > 0) {
- delete stack_->top().child_args;
+ delete[] stack_->top().child_args;
stack_->pop();
}
}