Imported Upstream version 20191201upstream/20191201

author: DongHun Kwak <dh0128.kwak@samsung.com> 2020-09-23 15:41:35 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2020-09-23 15:41:35 +0900
commit: c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11 (patch)
tree: f7ab832818b226eb03bcd9752a834ed9ee23f8c7
parent: f052642278b0c5ca48bdb9bbef1cafeaafc509c4 (diff)
download: re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.gz
re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.bz2
re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.zip
8 files changed, 64 insertions, 39 deletions
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 5cbc070..f15c1e4 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -342,6 +342,10 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
     cap_[0] = p;
     if (TrySearch(prog_->start(), p))  // Match must be leftmost; done.
       return true;
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    if (p == NULL)
+      break;
   }
   return false;
 }
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 3889f11..77fb5fb 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -382,10 +382,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
         break;
 
       case kInstMatch: {
-        // Avoid invoking undefined behavior when p happens
-        // to be null - and p-1 would be meaningless anyway.
-        if (p == NULL)
+        // Avoid invoking undefined behavior (arithmetic on a null pointer)
+        // by storing p instead of p-1. (What would the latter even mean?!)
+        // This complements the special case in NFA::Search().
+        if (p == NULL) {
+          CopyCapture(match_, t->capture);
+          match_[1] = p;
+          matched_ = true;
           break;
+        }
 
         if (endmatch_ && p-1 != etext_)
           break;
@@ -593,6 +598,18 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
         fprintf(stderr, "dead\n");
       break;
     }
+
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    // This complements the special case in NFA::Step().
+    if (p == NULL) {
+      (void)Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
+      DCHECK_EQ(runq->size(), 0);
+      using std::swap;
+      swap(nextq, runq);
+      nextq->clear();
+      break;
+    }
   }
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
diff --git a/re2/parse.cc b/re2/parse.cc
index 11f8b3b..50dfdac 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -1323,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
 // Parses a decimal integer, storing it in *np.
 // Sets *s to span the remainder of the string.
 static bool ParseInteger(StringPiece* s, int* np) {
-  if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
+  if (s->empty() || !isdigit((*s)[0] & 0xFF))
     return false;
   // Disallow leading zeros.
   if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
     return false;
   int n = 0;
   int c;
-  while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
+  while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
     // Avoid overflow.
     if (n >= 100000000)
       return false;
@@ -1352,16 +1352,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
 // s must NOT be edited unless MaybeParseRepetition returns true.
 static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
   StringPiece s = *sp;
-  if (s.size() == 0 || s[0] != '{')
+  if (s.empty() || s[0] != '{')
     return false;
   s.remove_prefix(1);  // '{'
   if (!ParseInteger(&s, lo))
     return false;
-  if (s.size() == 0)
+  if (s.empty())
     return false;
   if (s[0] == ',') {
     s.remove_prefix(1);  // ','
-    if (s.size() == 0)
+    if (s.empty())
       return false;
     if (s[0] == '}') {
       // {2,} means at least 2
@@ -1375,7 +1375,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
     // {2} means exactly two
     *hi = *lo;
   }
-  if (s.size() == 0 || s[0] != '}')
+  if (s.empty() || s[0] != '}')
     return false;
   s.remove_prefix(1);  // '}'
   *sp = s;
@@ -1416,7 +1416,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
 static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
   StringPiece t = s;
   Rune r;
-  while (t.size() > 0) {
+  while (!t.empty()) {
     if (StringPieceToRune(&r, &t, status) < 0)
       return false;
   }
@@ -1448,13 +1448,13 @@ static int UnHex(int c) {
 static bool ParseEscape(StringPiece* s, Rune* rp,
                         RegexpStatus* status, int rune_max) {
   const char* begin = s->data();
-  if (s->size() < 1 || (*s)[0] != '\\') {
+  if (s->empty() || (*s)[0] != '\\') {
     // Should not happen - caller always checks.
     status->set_code(kRegexpInternalError);
     status->set_error_arg(StringPiece());
     return false;
   }
-  if (s->size() < 2) {
+  if (s->size() == 1) {
     status->set_code(kRegexpTrailingBackslash);
     status->set_error_arg(StringPiece());
     return false;
@@ -1485,16 +1485,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
     case '6':
     case '7':
       // Single non-zero octal digit is a backreference; not supported.
-      if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
+      if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
         goto BadEscape;
       FALLTHROUGH_INTENDED;
     case '0':
       // consume up to three octal digits; already have one.
       code = c - '0';
-      if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
+      if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
         code = code * 8 + c - '0';
         s->remove_prefix(1);  // digit
-        if (s->size() > 0) {
+        if (!s->empty()) {
           c = (*s)[0];
           if ('0' <= c && c <= '7') {
             code = code * 8 + c - '0';
@@ -1509,7 +1509,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
 
     // Hexadecimal escapes
     case 'x':
-      if (s->size() == 0)
+      if (s->empty())
         goto BadEscape;
       if (StringPieceToRune(&c, s, status) < 0)
         return false;
@@ -1529,7 +1529,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
           code = code * 16 + UnHex(c);
           if (code > rune_max)
             goto BadEscape;
-          if (s->size() == 0)
+          if (s->empty())
             goto BadEscape;
           if (StringPieceToRune(&c, s, status) < 0)
             return false;
@@ -1540,7 +1540,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
         return true;
       }
       // Easy case: two hex digits.
-      if (s->size() == 0)
+      if (s->empty())
         goto BadEscape;
       if (StringPieceToRune(&c1, s, status) < 0)
         return false;
@@ -1771,7 +1771,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
   // Chop seq where s now begins.
   seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
 
-  if (name.size() > 0 && name[0] == '^') {
+  if (!name.empty() && name[0] == '^') {
     sign = -sign;
     name.remove_prefix(1);  // '^'
   }
@@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
 bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
                                           const StringPiece& whole_class,
                                           RegexpStatus* status) {
-  if (s->size() == 0) {
+  if (s->empty()) {
     status->set_code(kRegexpMissingBracket);
     status->set_error_arg(whole_class);
     return false;
@@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
 
   // Allow regular escape sequences even though
   // many need not be escaped in this context.
-  if (s->size() >= 1 && (*s)[0] == '\\')
+  if ((*s)[0] == '\\')
     return ParseEscape(s, rp, status, rune_max_);
 
   // Otherwise take the next rune.
@@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
                                         Regexp** out_re,
                                         RegexpStatus* status) {
   StringPiece whole_class = *s;
-  if (s->size() == 0 || (*s)[0] != '[') {
+  if (s->empty() || (*s)[0] != '[') {
     // Caller checked this.
     status->set_code(kRegexpInternalError);
     status->set_error_arg(StringPiece());
@@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
   Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
   re->ccb_ = new CharClassBuilder;
   s->remove_prefix(1);  // '['
-  if (s->size() > 0 && (*s)[0] == '^') {
+  if (!s->empty() && (*s)[0] == '^') {
     s->remove_prefix(1);  // '^'
     negated = true;
     if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
@@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
     }
   }
   bool first = true;  // ] is okay as first char in class
-  while (s->size() > 0 && ((*s)[0] != ']' || first)) {
+  while (!s->empty() && ((*s)[0] != ']' || first)) {
     // - is only okay unescaped as first or last in class.
     // Except that Perl allows - anywhere.
     if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
@@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
     // in the flags.
     re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
   }
-  if (s->size() == 0) {
+  if (s->empty()) {
     status->set_code(kRegexpMissingBracket);
     status->set_error_arg(whole_class);
     re->Decref();
@@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
 // Python rejects names starting with digits.
 // We don't enforce either of those.
 static bool IsValidCaptureName(const StringPiece& name) {
-  if (name.size() == 0)
+  if (name.empty())
     return false;
   for (size_t i = 0; i < name.size(); i++) {
     int c = name[i];
@@ -2099,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
   int nflags = flags_;
   Rune c;
   for (bool done = false; !done; ) {
-    if (t.size() == 0)
+    if (t.empty())
       goto BadPerlOp;
     if (StringPieceToRune(&c, &t, status_) < 0)
       return false;
@@ -2217,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
 
   if (global_flags & Literal) {
     // Special parse loop for literal string.
-    while (t.size() > 0) {
+    while (!t.empty()) {
       Rune r;
       if (StringPieceToRune(&r, &t, status) < 0)
         return NULL;
@@ -2228,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
   }
 
   StringPiece lastunary = StringPiece();
-  while (t.size() > 0) {
+  while (!t.empty()) {
     StringPiece isunary = StringPiece();
     switch (t[0]) {
       default: {
@@ -2312,11 +2312,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
         bool nongreedy = false;
         t.remove_prefix(1);  // '*' or '+' or '?'
         if (ps.flags() & PerlX) {
-          if (t.size() > 0 && t[0] == '?') {
+          if (!t.empty() && t[0] == '?') {
             nongreedy = true;
             t.remove_prefix(1);  // '?'
           }
-          if (lastunary.size() > 0) {
+          if (!lastunary.empty()) {
             // In Perl it is not allowed to stack repetition operators:
             //   a** is a syntax error, not a double-star.
             // (and a++ means something else entirely, which we don't support!)
@@ -2347,11 +2347,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
         }
         bool nongreedy = false;
         if (ps.flags() & PerlX) {
-          if (t.size() > 0 && t[0] == '?') {
+          if (!t.empty() && t[0] == '?') {
             nongreedy = true;
             t.remove_prefix(1);  // '?'
           }
-          if (lastunary.size() > 0) {
+          if (!lastunary.empty()) {
             // Not allowed to stack repetition operators.
             status->set_code(kRegexpRepeatOp);
             status->set_error_arg(StringPiece(
@@ -2405,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
 
           if (t[1] == 'Q') {  // \Q ... \E: the ... is always literals
             t.remove_prefix(2);  // '\\', 'Q'
-            while (t.size() > 0) {
+            while (!t.empty()) {
               if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
                 t.remove_prefix(2);  // '\\', 'E'
                 break;
diff --git a/re2/re2.cc b/re2/re2.cc
index 5156fe5..a8dd24b 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -408,7 +408,7 @@ int RE2::GlobalReplace(std::string* str,
       break;
     if (p < vec[0].data())
       out.append(p, vec[0].data() - p);
-    if (vec[0].data() == lastend && vec[0].size() == 0) {
+    if (vec[0].data() == lastend && vec[0].empty()) {
       // Disallow empty match at end of last match: skip ahead.
       //
       // fullrune() takes int, not ptrdiff_t. However, it just looks
@@ -934,7 +934,7 @@ bool RE2::Rewrite(std::string* out,
         return false;
       }
       StringPiece snip = vec[n];
-      if (snip.size() > 0)
+      if (!snip.empty())
         out->append(snip.data(), snip.size());
     } else if (c == '\\') {
       out->push_back('\\');
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index 6cde42d..1e888da 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -148,6 +148,10 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
     cap_[0] = p;
     if (Visit(prog_->start(), p))  // Match must be leftmost; done.
       return true;
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    if (p == NULL)
+      break;
   }
   return false;
 }
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index d156c88..3eeda25 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -256,7 +256,7 @@ std::vector<std::string> Explode(const StringPiece& s) {
 std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
   std::vector<std::string> v;
 
-  if (sep.size() == 0)
+  if (sep.empty())
     return Explode(s);
 
   const char *p = s.data();
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index 86eec53..67d262c 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -645,7 +645,7 @@ static Prog::Anchor anchors[] = {
 
 bool Tester::TestInput(const StringPiece& text) {
   bool okay = TestInputInContext(text, text);
-  if (text.size() > 0) {
+  if (!text.empty()) {
     StringPiece sp;
     sp = text;
     sp.remove_prefix(1);
diff --git a/re2/walker-inl.h b/re2/walker-inl.h
index 032b8ac..310be54 100644
--- a/re2/walker-inl.h
+++ b/re2/walker-inl.h
@@ -150,7 +150,7 @@ template<typename T> void Regexp::Walker<T>::Reset() {
   if (stack_ && stack_->size() > 0) {
     LOG(DFATAL) << "Stack not empty.";
     while (stack_->size() > 0) {
-      delete stack_->top().child_args;
+      delete[] stack_->top().child_args;
       stack_->pop();
     }
   }
author	DongHun Kwak <dh0128.kwak@samsung.com>	2020-09-23 15:41:35 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2020-09-23 15:41:35 +0900
commit	c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11 (patch)
tree	f7ab832818b226eb03bcd9752a834ed9ee23f8c7
parent	f052642278b0c5ca48bdb9bbef1cafeaafc509c4 (diff)
download	re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.gz re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.bz2 re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.zip