diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2020-09-23 15:41:35 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2020-09-23 15:41:35 +0900 |
commit | c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11 (patch) | |
tree | f7ab832818b226eb03bcd9752a834ed9ee23f8c7 | |
parent | f052642278b0c5ca48bdb9bbef1cafeaafc509c4 (diff) | |
download | re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.gz re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.tar.bz2 re2-c07c8060012e4e99ad7fb2b1cd3c00b0e0b43a11.zip |
Imported Upstream version 20191201upstream/20191201
-rw-r--r-- | re2/bitstate.cc | 4 | ||||
-rw-r--r-- | re2/nfa.cc | 23 | ||||
-rw-r--r-- | re2/parse.cc | 62 | ||||
-rw-r--r-- | re2/re2.cc | 4 | ||||
-rw-r--r-- | re2/testing/backtrack.cc | 4 | ||||
-rw-r--r-- | re2/testing/regexp_generator.cc | 2 | ||||
-rw-r--r-- | re2/testing/tester.cc | 2 | ||||
-rw-r--r-- | re2/walker-inl.h | 2 |
8 files changed, 64 insertions, 39 deletions
diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 5cbc070..f15c1e4 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -342,6 +342,10 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, cap_[0] = p; if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. return true; + // Avoid invoking undefined behavior (arithmetic on a null pointer) + // by simply not continuing the loop. + if (p == NULL) + break; } return false; } @@ -382,10 +382,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, break; case kInstMatch: { - // Avoid invoking undefined behavior when p happens - // to be null - and p-1 would be meaningless anyway. - if (p == NULL) + // Avoid invoking undefined behavior (arithmetic on a null pointer) + // by storing p instead of p-1. (What would the latter even mean?!) + // This complements the special case in NFA::Search(). + if (p == NULL) { + CopyCapture(match_, t->capture); + match_[1] = p; + matched_ = true; break; + } if (endmatch_ && p-1 != etext_) break; @@ -593,6 +598,18 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, fprintf(stderr, "dead\n"); break; } + + // Avoid invoking undefined behavior (arithmetic on a null pointer) + // by simply not continuing the loop. + // This complements the special case in NFA::Step(). + if (p == NULL) { + (void)Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p); + DCHECK_EQ(runq->size(), 0); + using std::swap; + swap(nextq, runq); + nextq->clear(); + break; + } } for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) diff --git a/re2/parse.cc b/re2/parse.cc index 11f8b3b..50dfdac 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -1323,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { // Parses a decimal integer, storing it in *np. // Sets *s to span the remainder of the string. static bool ParseInteger(StringPiece* s, int* np) { - if (s->size() == 0 || !isdigit((*s)[0] & 0xFF)) + if (s->empty() || !isdigit((*s)[0] & 0xFF)) return false; // Disallow leading zeros. if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) return false; int n = 0; int c; - while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) { + while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) { // Avoid overflow. if (n >= 100000000) return false; @@ -1352,16 +1352,16 @@ static bool ParseInteger(StringPiece* s, int* np) { // s must NOT be edited unless MaybeParseRepetition returns true. static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { StringPiece s = *sp; - if (s.size() == 0 || s[0] != '{') + if (s.empty() || s[0] != '{') return false; s.remove_prefix(1); // '{' if (!ParseInteger(&s, lo)) return false; - if (s.size() == 0) + if (s.empty()) return false; if (s[0] == ',') { s.remove_prefix(1); // ',' - if (s.size() == 0) + if (s.empty()) return false; if (s[0] == '}') { // {2,} means at least 2 @@ -1375,7 +1375,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { // {2} means exactly two *hi = *lo; } - if (s.size() == 0 || s[0] != '}') + if (s.empty() || s[0] != '}') return false; s.remove_prefix(1); // '}' *sp = s; @@ -1416,7 +1416,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) { StringPiece t = s; Rune r; - while (t.size() > 0) { + while (!t.empty()) { if (StringPieceToRune(&r, &t, status) < 0) return false; } @@ -1448,13 +1448,13 @@ static int UnHex(int c) { static bool ParseEscape(StringPiece* s, Rune* rp, RegexpStatus* status, int rune_max) { const char* begin = s->data(); - if (s->size() < 1 || (*s)[0] != '\\') { + if (s->empty() || (*s)[0] != '\\') { // Should not happen - caller always checks. status->set_code(kRegexpInternalError); status->set_error_arg(StringPiece()); return false; } - if (s->size() < 2) { + if (s->size() == 1) { status->set_code(kRegexpTrailingBackslash); status->set_error_arg(StringPiece()); return false; @@ -1485,16 +1485,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp, case '6': case '7': // Single non-zero octal digit is a backreference; not supported. - if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7') + if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7') goto BadEscape; FALLTHROUGH_INTENDED; case '0': // consume up to three octal digits; already have one. code = c - '0'; - if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') { + if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') { code = code * 8 + c - '0'; s->remove_prefix(1); // digit - if (s->size() > 0) { + if (!s->empty()) { c = (*s)[0]; if ('0' <= c && c <= '7') { code = code * 8 + c - '0'; @@ -1509,7 +1509,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, // Hexadecimal escapes case 'x': - if (s->size() == 0) + if (s->empty()) goto BadEscape; if (StringPieceToRune(&c, s, status) < 0) return false; @@ -1529,7 +1529,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, code = code * 16 + UnHex(c); if (code > rune_max) goto BadEscape; - if (s->size() == 0) + if (s->empty()) goto BadEscape; if (StringPieceToRune(&c, s, status) < 0) return false; @@ -1540,7 +1540,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, return true; } // Easy case: two hex digits. - if (s->size() == 0) + if (s->empty()) goto BadEscape; if (StringPieceToRune(&c1, s, status) < 0) return false; @@ -1771,7 +1771,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, // Chop seq where s now begins. seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data())); - if (name.size() > 0 && name[0] == '^') { + if (!name.empty() && name[0] == '^') { sign = -sign; name.remove_prefix(1); // '^' } @@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp, const StringPiece& whole_class, RegexpStatus* status) { - if (s->size() == 0) { + if (s->empty()) { status->set_code(kRegexpMissingBracket); status->set_error_arg(whole_class); return false; @@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp, // Allow regular escape sequences even though // many need not be escaped in this context. - if (s->size() >= 1 && (*s)[0] == '\\') + if ((*s)[0] == '\\') return ParseEscape(s, rp, status, rune_max_); // Otherwise take the next rune. @@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, Regexp** out_re, RegexpStatus* status) { StringPiece whole_class = *s; - if (s->size() == 0 || (*s)[0] != '[') { + if (s->empty() || (*s)[0] != '[') { // Caller checked this. status->set_code(kRegexpInternalError); status->set_error_arg(StringPiece()); @@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); re->ccb_ = new CharClassBuilder; s->remove_prefix(1); // '[' - if (s->size() > 0 && (*s)[0] == '^') { + if (!s->empty() && (*s)[0] == '^') { s->remove_prefix(1); // '^' negated = true; if (!(flags_ & ClassNL) || (flags_ & NeverNL)) { @@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, } } bool first = true; // ] is okay as first char in class - while (s->size() > 0 && ((*s)[0] != ']' || first)) { + while (!s->empty() && ((*s)[0] != ']' || first)) { // - is only okay unescaped as first or last in class. // Except that Perl allows - anywhere. if ((*s)[0] == '-' && !first && !(flags_&PerlX) && @@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, // in the flags. re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL); } - if (s->size() == 0) { + if (s->empty()) { status->set_code(kRegexpMissingBracket); status->set_error_arg(whole_class); re->Decref(); @@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, // Python rejects names starting with digits. // We don't enforce either of those. static bool IsValidCaptureName(const StringPiece& name) { - if (name.size() == 0) + if (name.empty()) return false; for (size_t i = 0; i < name.size(); i++) { int c = name[i]; @@ -2099,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { int nflags = flags_; Rune c; for (bool done = false; !done; ) { - if (t.size() == 0) + if (t.empty()) goto BadPerlOp; if (StringPieceToRune(&c, &t, status_) < 0) return false; @@ -2217,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, if (global_flags & Literal) { // Special parse loop for literal string. - while (t.size() > 0) { + while (!t.empty()) { Rune r; if (StringPieceToRune(&r, &t, status) < 0) return NULL; @@ -2228,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, } StringPiece lastunary = StringPiece(); - while (t.size() > 0) { + while (!t.empty()) { StringPiece isunary = StringPiece(); switch (t[0]) { default: { @@ -2312,11 +2312,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, bool nongreedy = false; t.remove_prefix(1); // '*' or '+' or '?' if (ps.flags() & PerlX) { - if (t.size() > 0 && t[0] == '?') { + if (!t.empty() && t[0] == '?') { nongreedy = true; t.remove_prefix(1); // '?' } - if (lastunary.size() > 0) { + if (!lastunary.empty()) { // In Perl it is not allowed to stack repetition operators: // a** is a syntax error, not a double-star. // (and a++ means something else entirely, which we don't support!) @@ -2347,11 +2347,11 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, } bool nongreedy = false; if (ps.flags() & PerlX) { - if (t.size() > 0 && t[0] == '?') { + if (!t.empty() && t[0] == '?') { nongreedy = true; t.remove_prefix(1); // '?' } - if (lastunary.size() > 0) { + if (!lastunary.empty()) { // Not allowed to stack repetition operators. status->set_code(kRegexpRepeatOp); status->set_error_arg(StringPiece( @@ -2405,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, if (t[1] == 'Q') { // \Q ... \E: the ... is always literals t.remove_prefix(2); // '\\', 'Q' - while (t.size() > 0) { + while (!t.empty()) { if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') { t.remove_prefix(2); // '\\', 'E' break; @@ -408,7 +408,7 @@ int RE2::GlobalReplace(std::string* str, break; if (p < vec[0].data()) out.append(p, vec[0].data() - p); - if (vec[0].data() == lastend && vec[0].size() == 0) { + if (vec[0].data() == lastend && vec[0].empty()) { // Disallow empty match at end of last match: skip ahead. // // fullrune() takes int, not ptrdiff_t. However, it just looks @@ -934,7 +934,7 @@ bool RE2::Rewrite(std::string* out, return false; } StringPiece snip = vec[n]; - if (snip.size() > 0) + if (!snip.empty()) out->append(snip.data(), snip.size()); } else if (c == '\\') { out->push_back('\\'); diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc index 6cde42d..1e888da 100644 --- a/re2/testing/backtrack.cc +++ b/re2/testing/backtrack.cc @@ -148,6 +148,10 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context, cap_[0] = p; if (Visit(prog_->start(), p)) // Match must be leftmost; done. return true; + // Avoid invoking undefined behavior (arithmetic on a null pointer) + // by simply not continuing the loop. + if (p == NULL) + break; } return false; } diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc index d156c88..3eeda25 100644 --- a/re2/testing/regexp_generator.cc +++ b/re2/testing/regexp_generator.cc @@ -256,7 +256,7 @@ std::vector<std::string> Explode(const StringPiece& s) { std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) { std::vector<std::string> v; - if (sep.size() == 0) + if (sep.empty()) return Explode(s); const char *p = s.data(); diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc index 86eec53..67d262c 100644 --- a/re2/testing/tester.cc +++ b/re2/testing/tester.cc @@ -645,7 +645,7 @@ static Prog::Anchor anchors[] = { bool Tester::TestInput(const StringPiece& text) { bool okay = TestInputInContext(text, text); - if (text.size() > 0) { + if (!text.empty()) { StringPiece sp; sp = text; sp.remove_prefix(1); diff --git a/re2/walker-inl.h b/re2/walker-inl.h index 032b8ac..310be54 100644 --- a/re2/walker-inl.h +++ b/re2/walker-inl.h @@ -150,7 +150,7 @@ template<typename T> void Regexp::Walker<T>::Reset() { if (stack_ && stack_->size() > 0) { LOG(DFATAL) << "Stack not empty."; while (stack_->size() > 0) { - delete stack_->top().child_args; + delete[] stack_->top().child_args; stack_->pop(); } } |