Imported Upstream version 20180101upstream/20180101

Change-Id: I927f049290b4c0f3c9ffc5da1d2deda29d583f8a Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
author: DongHun Kwak <dh0128.kwak@samsung.com> 2018-02-20 15:37:38 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2018-02-20 15:37:38 +0900
commit: a3a5ffe62bc766304e422d5ce04d22638cf6eacd (patch)
tree: 2c260ddbe400507216779578de476cfef3736225
parent: 4214b4091520393b9c06dca78eb304cfbcee2c04 (diff)
download: re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.gz
re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.bz2
re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.zip
9 files changed, 239 insertions, 132 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 884873b..f87c4ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,8 +22,8 @@ option(RE2_BUILD_TESTING "enable testing for RE2" ON)
 set(EXTRA_TARGET_LINK_LIBRARIES)
 
 if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-  if(MSVC_VERSION LESS 1800)
-    message(FATAL_ERROR "you need Visual Studio 2013 or later")
+  if(MSVC_VERSION LESS 1900)
+    message(FATAL_ERROR "you need Visual Studio 2015 or later")
   endif()
   if(BUILD_SHARED_LIBS)
     # See http://www.kitware.com/blog/home/post/939 for details.
@@ -33,6 +33,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
   # CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
   # so we disable various warnings that aren't particularly helpful.
   add_compile_options(/wd4100 /wd4201 /wd4456 /wd4457 /wd4702 /wd4815)
+  # Without a byte order mark (BOM), Visual Studio assumes that the source
+  # file is encoded using the current user code page, so we specify UTF-8.
+  add_compile_options(/utf-8)
 elseif(CYGWIN OR MINGW)
   # See https://stackoverflow.com/questions/38139631 for details.
   add_compile_options(-std=gnu++11)
diff --git a/kokoro/windows-cmake.bat b/kokoro/windows-cmake.bat
index b6d2f6f..6b04b65 100755
--- a/kokoro/windows-cmake.bat
+++ b/kokoro/windows-cmake.bat
@@ -1,10 +1,10 @@
 CD git/re2                                                            || EXIT /B 1
 
-cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 12 2013" -A x64 .   || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 14 2015" -A x64 .   || EXIT /B 1
 cmake --build . --config Debug --clean-first                          || EXIT /B 1
 ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random         || EXIT /B 1
 
-cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 14 2015" -A x64 . || EXIT /B 1
 cmake --build . --config Release --clean-first                        || EXIT /B 1
 ctest -C Release --output-on-failure -E dfa^|exhaustive^|random       || EXIT /B 1
 
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 2720a5e..bc81fea 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -126,7 +126,7 @@ class DFA {
                         // into this state, along with kFlagMatch if this
                         // is a matching state.
 
-// Work around the bug affecting flexible array members in GCC 6.1 and 6.2.
+// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
 // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
     std::atomic<State*> next_[0];   // Outgoing arrows from State,
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index e0aa96a..d3cd145 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -56,7 +56,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
 
 // Entry point for libFuzzer.
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
-  if (size == 0 || size > 1024)
+  if (size == 0 || size > 512)
     return 0;
 
   // Crudely limit the use of \p and \P.
diff --git a/re2/parse.cc b/re2/parse.cc
index 2b46e3e..3374345 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -23,6 +23,7 @@
 #include <algorithm>
 #include <map>
 #include <string>
+#include <vector>
 
 #include "util/util.h"
 #include "util/logging.h"
@@ -865,59 +866,180 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
   }
 }
 
+// In the context of factoring alternations, a Splice is: a factored prefix or
+// merged character class computed by one iteration of one round of factoring;
+// the span of subexpressions of the alternation to be "spliced" (i.e. removed
+// and replaced); and, for a factored prefix, the number of suffixes after any
+// factoring that might have subsequently been performed on them. For a merged
+// character class, there are no suffixes, of course, so the field is ignored.
+struct Splice {
+  Splice(Regexp* prefix, Regexp** sub, int nsub)
+      : prefix(prefix),
+        sub(sub),
+        nsub(nsub),
+        nsuffix(-1) {}
+
+  Regexp* prefix;
+  Regexp** sub;
+  int nsub;
+  int nsuffix;
+};
+
+// Named so because it is used to implement an explicit stack, a Frame is: the
+// span of subexpressions of the alternation to be factored; the current round
+// of factoring; any Splices computed; and, for a factored prefix, an iterator
+// to the next Splice to be factored (i.e. in another Frame) because suffixes.
+struct Frame {
+  Frame(Regexp** sub, int nsub)
+      : sub(sub),
+        nsub(nsub),
+        round(0) {}
+
+  Regexp** sub;
+  int nsub;
+  int round;
+  std::vector<Splice> splices;
+  std::vector<Splice>::iterator spliceiter;
+};
+
+// Bundled into a class for friend access to Regexp without needing to declare
+// (or define) Splice in regexp.h.
+class FactorAlternationImpl {
+ public:
+  static void Round1(Regexp** sub, int nsub,
+                     Regexp::ParseFlags flags,
+                     std::vector<Splice>* splices);
+  static void Round2(Regexp** sub, int nsub,
+                     Regexp::ParseFlags flags,
+                     std::vector<Splice>* splices);
+  static void Round3(Regexp** sub, int nsub,
+                     Regexp::ParseFlags flags,
+                     std::vector<Splice>* splices);
+};
+
 // Factors common prefixes from alternation.
 // For example,
 //     ABC|ABD|AEF|BCX|BCY
 // simplifies to
 //     A(B(C|D)|EF)|BC(X|Y)
-// which the normal parse state routines will further simplify to
+// and thence to
 //     A(B[CD]|EF)|BC[XY]
 //
 // Rewrites sub to contain simplified list to alternate and returns
 // the new length of sub.  Adjusts reference counts accordingly
 // (incoming sub[i] decremented, outgoing sub[i] incremented).
+int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
+  std::vector<Frame> stk;
+  stk.emplace_back(sub, nsub);
+
+  for (;;) {
+    auto& sub = stk.back().sub;
+    auto& nsub = stk.back().nsub;
+    auto& round = stk.back().round;
+    auto& splices = stk.back().splices;
+    auto& spliceiter = stk.back().spliceiter;
+
+    if (splices.empty()) {
+      // Advance to the next round of factoring. Note that this covers
+      // the initialised state: when splices is empty and round is 0.
+      round++;
+    } else if (spliceiter != splices.end()) {
+      // We have at least one more Splice to factor. Recurse logically.
+      stk.emplace_back(spliceiter->sub, spliceiter->nsub);
+      continue;
+    } else {
+      // We have no more Splices to factor. Apply them.
+      auto iter = splices.begin();
+      int out = 0;
+      for (int i = 0; i < nsub; ) {
+        // Copy until we reach where the next Splice begins.
+        while (sub + i < iter->sub)
+          sub[out++] = sub[i++];
+        switch (round) {
+          case 1:
+          case 2: {
+            // Assemble the Splice prefix and the suffixes.
+            Regexp* re[2];
+            re[0] = iter->prefix;
+            re[1] = Regexp::AlternateNoFactor(iter->sub, iter->nsuffix, flags);
+            sub[out++] = Regexp::Concat(re, 2, flags);
+            i += iter->nsub;
+            break;
+          }
+          case 3:
+            // Just use the Splice prefix.
+            sub[out++] = iter->prefix;
+            i += iter->nsub;
+            break;
+          default:
+            LOG(DFATAL) << "unknown round: " << round;
+            break;
+        }
+        // If we are done, copy until the end of sub.
+        if (++iter == splices.end()) {
+          while (i < nsub)
+            sub[out++] = sub[i++];
+        }
+      }
+      splices.clear();
+      nsub = out;
+      // Advance to the next round of factoring.
+      round++;
+    }
 
-// It's too much of a pain to write this code with an explicit stack,
-// so instead we let the caller specify a maximum depth and
-// don't simplify beyond that.  There are around 15 words of local
-// variables and parameters in the frame, so allowing 8 levels
-// on a 64-bit machine is still less than a kilobyte of stack and
-// probably enough benefit for practical uses.
-const int kFactorAlternationMaxDepth = 8;
-
-int Regexp::FactorAlternation(
-    Regexp** sub, int n,
-    Regexp::ParseFlags altflags) {
-  return FactorAlternationRecursive(sub, n, altflags,
-                                    kFactorAlternationMaxDepth);
-}
-
-int Regexp::FactorAlternationRecursive(
-    Regexp** sub, int n,
-    Regexp::ParseFlags altflags,
-    int maxdepth) {
+    switch (round) {
+      case 1:
+        FactorAlternationImpl::Round1(sub, nsub, flags, &splices);
+        break;
+      case 2:
+        FactorAlternationImpl::Round2(sub, nsub, flags, &splices);
+        break;
+      case 3:
+        FactorAlternationImpl::Round3(sub, nsub, flags, &splices);
+        break;
+      case 4:
+        if (stk.size() == 1) {
+          // We are at the top of the stack. Just return.
+          return nsub;
+        } else {
+          // Pop the stack and set the number of suffixes.
+          // (Note that references will be invalidated!)
+          int nsuffix = nsub;
+          stk.pop_back();
+          stk.back().spliceiter->nsuffix = nsuffix;
+          ++stk.back().spliceiter;
+          continue;
+        }
+      default:
+        LOG(DFATAL) << "unknown round: " << round;
+        break;
+    }
 
-  if (maxdepth <= 0)
-    return n;
+    // Set spliceiter depending on whether we have Splices to factor.
+    if (splices.empty() || round == 3) {
+      spliceiter = splices.end();
+    } else {
+      spliceiter = splices.begin();
+    }
+  }
+}
 
+void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
+                                   Regexp::ParseFlags flags,
+                                   std::vector<Splice>* splices) {
   // Round 1: Factor out common literal prefixes.
-  Rune *rune = NULL;
+  int start = 0;
+  Rune* rune = NULL;
   int nrune = 0;
   Regexp::ParseFlags runeflags = Regexp::NoParseFlags;
-  int start = 0;
-  int out = 0;
-  for (int i = 0; i <= n; i++) {
-    // Invariant: what was in sub[0:start] has been Decref'ed
-    // and that space has been reused for sub[0:out] (out <= start).
-    //
-    // Invariant: sub[start:i] consists of regexps that all begin
-    // with the string rune[0:nrune].
-
+  for (int i = 0; i <= nsub; i++) {
+    // Invariant: sub[start:i] consists of regexps that all
+    // begin with rune[0:nrune].
     Rune* rune_i = NULL;
     int nrune_i = 0;
     Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags;
-    if (i < n) {
-      rune_i = LeadingString(sub[i], &nrune_i, &runeflags_i);
+    if (i < nsub) {
+      rune_i = Regexp::LeadingString(sub[i], &nrune_i, &runeflags_i);
       if (runeflags_i == runeflags) {
         int same = 0;
         while (same < nrune && same < nrune_i && rune[same] == rune_i[same])
@@ -931,37 +1053,32 @@ int Regexp::FactorAlternationRecursive(
     }
 
     // Found end of a run with common leading literal string:
-    // sub[start:i] all begin with rune[0:nrune] but sub[i]
-    // does not even begin with rune[0].
-    //
-    // Factor out common string and append factored expression to sub[0:out].
+    // sub[start:i] all begin with rune[0:nrune],
+    // but sub[i] does not even begin with rune[0].
     if (i == start) {
       // Nothing to do - first iteration.
     } else if (i == start+1) {
       // Just one: don't bother factoring.
-      sub[out++] = sub[start];
     } else {
-      // Construct factored form: prefix(suffix1|suffix2|...)
-      Regexp* x[2];  // x[0] = prefix, x[1] = suffix1|suffix2|...
-      x[0] = LiteralString(rune, nrune, runeflags);
+      Regexp* prefix = Regexp::LiteralString(rune, nrune, runeflags);
       for (int j = start; j < i; j++)
-        RemoveLeadingString(sub[j], nrune);
-      int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
-                                          maxdepth - 1);
-      x[1] = AlternateNoFactor(sub + start, nn, altflags);
-      sub[out++] = Concat(x, 2, altflags);
+        Regexp::RemoveLeadingString(sub[j], nrune);
+      splices->emplace_back(prefix, sub + start, i - start);
     }
 
-    // Prepare for next round (if there is one).
-    if (i < n) {
+    // Prepare for next iteration (if there is one).
+    if (i < nsub) {
       start = i;
       rune = rune_i;
       nrune = nrune_i;
       runeflags = runeflags_i;
     }
   }
-  n = out;
+}
 
+void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
+                                   Regexp::ParseFlags flags,
+                                   std::vector<Splice>* splices) {
   // Round 2: Factor out common simple prefixes,
   // just the first piece of each concatenation.
   // This will be good enough a lot of the time.
@@ -970,19 +1087,15 @@ int Regexp::FactorAlternationRecursive(
   // are not safe to factor because that collapses their
   // distinct paths through the automaton, which affects
   // correctness in some cases.
-  start = 0;
-  out = 0;
+  int start = 0;
   Regexp* first = NULL;
-  for (int i = 0; i <= n; i++) {
-    // Invariant: what was in sub[0:start] has been Decref'ed
-    // and that space has been reused for sub[0:out] (out <= start).
-    //
-    // Invariant: sub[start:i] consists of regexps that all begin with first.
-
+  for (int i = 0; i <= nsub; i++) {
+    // Invariant: sub[start:i] consists of regexps that all
+    // begin with first.
     Regexp* first_i = NULL;
-    if (i < n) {
-      first_i = LeadingRegexp(sub[i]);
-      if (first != NULL && Regexp::Equal(first, first_i) &&
+    if (i < nsub) {
+      first_i = Regexp::LeadingRegexp(sub[i]);
+      if (first != NULL &&
           // first must be an empty-width op
           // OR a char class, any char or any byte
           // OR a fixed repeat of a literal, char class, any char or any byte.
@@ -1000,63 +1113,61 @@ int Regexp::FactorAlternationRecursive(
             (first->sub()[0]->op() == kRegexpLiteral ||
              first->sub()[0]->op() == kRegexpCharClass ||
              first->sub()[0]->op() == kRegexpAnyChar ||
-             first->sub()[0]->op() == kRegexpAnyByte)))) {
+             first->sub()[0]->op() == kRegexpAnyByte))) &&
+          Regexp::Equal(first, first_i))
         continue;
-      }
     }
 
     // Found end of a run with common leading regexp:
-    // sub[start:i] all begin with first but sub[i] does not.
-    //
-    // Factor out common regexp and append factored expression to sub[0:out].
+    // sub[start:i] all begin with first,
+    // but sub[i] does not.
     if (i == start) {
       // Nothing to do - first iteration.
     } else if (i == start+1) {
       // Just one: don't bother factoring.
-      sub[out++] = sub[start];
     } else {
-      // Construct factored form: prefix(suffix1|suffix2|...)
-      Regexp* x[2];  // x[0] = prefix, x[1] = suffix1|suffix2|...
-      x[0] = first->Incref();
+      Regexp* prefix = first->Incref();
       for (int j = start; j < i; j++)
-        sub[j] = RemoveLeadingRegexp(sub[j]);
-      int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
-                                   maxdepth - 1);
-      x[1] = AlternateNoFactor(sub + start, nn, altflags);
-      sub[out++] = Concat(x, 2, altflags);
+        sub[j] = Regexp::RemoveLeadingRegexp(sub[j]);
+      splices->emplace_back(prefix, sub + start, i - start);
     }
 
-    // Prepare for next round (if there is one).
-    if (i < n) {
+    // Prepare for next iteration (if there is one).
+    if (i < nsub) {
       start = i;
       first = first_i;
     }
   }
-  n = out;
-
-  // Round 3: Collapse runs of single literals into character classes.
-  start = 0;
-  out = 0;
-  for (int i = 0; i <= n; i++) {
-    // Invariant: what was in sub[0:start] has been Decref'ed
-    // and that space has been reused for sub[0:out] (out <= start).
-    //
-    // Invariant: sub[start:i] consists of regexps that are either
-    // literal runes or character classes.
+}
 
-    if (i < n &&
-        (sub[i]->op() == kRegexpLiteral ||
-         sub[i]->op() == kRegexpCharClass))
-      continue;
+void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
+                                   Regexp::ParseFlags flags,
+                                   std::vector<Splice>* splices) {
+  // Round 3: Merge runs of literals and/or character classes.
+  int start = 0;
+  Regexp* first = NULL;
+  for (int i = 0; i <= nsub; i++) {
+    // Invariant: sub[start:i] consists of regexps that all
+    // are either literals (i.e. runes) or character classes.
+    Regexp* first_i = NULL;
+    if (i < nsub) {
+      first_i = sub[i];
+      if (first != NULL &&
+          (first->op() == kRegexpLiteral ||
+           first->op() == kRegexpCharClass) &&
+          (first_i->op() == kRegexpLiteral ||
+           first_i->op() == kRegexpCharClass))
+        continue;
+    }
 
-    // sub[i] is not a char or char class;
-    // emit char class for sub[start:i]...
+    // Found end of a run of Literal/CharClass:
+    // sub[start:i] all are either one or the other,
+    // but sub[i] is not.
     if (i == start) {
-      // Nothing to do.
+      // Nothing to do - first iteration.
     } else if (i == start+1) {
-      sub[out++] = sub[start];
+      // Just one: don't bother factoring.
     } else {
-      // Make new char class.
       CharClassBuilder ccb;
       for (int j = start; j < i; j++) {
         Regexp* re = sub[j];
@@ -1072,31 +1183,16 @@ int Regexp::FactorAlternationRecursive(
         }
         re->Decref();
       }
-      sub[out++] = NewCharClass(ccb.GetCharClass(), altflags);
+      Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
+      splices->emplace_back(re, sub + start, i - start);
     }
 
-    // ... and then emit sub[i].
-    if (i < n)
-      sub[out++] = sub[i];
-    start = i+1;
-  }
-  n = out;
-
-  // Round 4: Collapse runs of empty matches into single empty match.
-  start = 0;
-  out = 0;
-  for (int i = 0; i < n; i++) {
-    if (i + 1 < n &&
-        sub[i]->op() == kRegexpEmptyMatch &&
-        sub[i+1]->op() == kRegexpEmptyMatch) {
-      sub[i]->Decref();
-      continue;
+    // Prepare for next iteration (if there is one).
+    if (i < nsub) {
+      start = i;
+      first = first_i;
     }
-    sub[out++] = sub[i];
   }
-  n = out;
-
-  return n;
 }
 
 // Collapse the regexps on top of the stack, down to the
@@ -1294,8 +1390,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
 static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
   // fullrune() takes int, not size_t. However, it just looks
   // at the leading byte and treats any length >= 4 the same.
-  if (fullrune(sp->data(), static_cast<int>(std::min(static_cast<size_t>(4),
-                                                     sp->size())))) {
+  if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
     int n = chartorune(r, sp->data());
     // Some copies of chartorune have a bug that accepts
     // encodings of values in (10FFFF, 1FFFFF] as valid.
diff --git a/re2/regexp.h b/re2/regexp.h
index fcc7c0f..2ca96cd 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -494,8 +494,7 @@ class Regexp {
   // Simplifies an alternation of literal strings by factoring out
   // common prefixes.
   static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
-  static int FactorAlternationRecursive(Regexp** sub, int nsub,
-                                        ParseFlags flags, int maxdepth);
+  friend class FactorAlternationImpl;
 
   // Is a == b?  Only efficient on regexps that have not been through
   // Simplify yet - the expansion of a kRegexpRepeat will make this
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 1db7769..7f0feea 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -79,7 +79,7 @@ TEST(SingleThreaded, BuildEntireDFA) {
   CHECK(re);
 
   for (int i = 17; i < 24; i++) {
-    int64_t limit = 1<<i;
+    int64_t limit = int64_t{1}<<i;
     int64_t usage;
     //int64_t progusage, dfamem;
     {
@@ -120,7 +120,7 @@ static string DeBruijnString(int n) {
   CHECK_LT(n, static_cast<int>(8*sizeof(int)));
   CHECK_GT(n, 0);
 
-  std::vector<bool> did(1<<n);
+  std::vector<bool> did(size_t{1}<<n);
   for (int i = 0; i < 1<<n; i++)
     did[i] = false;
 
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index 1e6929d..b039e13 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -53,7 +53,7 @@ static Test tests[] = {
   { "a{2,3}?", "nrep{2,3 lit{a}}" },
   { "a{2,}?", "nrep{2,-1 lit{a}}" },
   { "", "emp{}" },
-  { "|", "emp{}" },  // alt{emp{}emp{}} but got factored
+  { "|", "alt{emp{}emp{}}" },
   { "|x|", "alt{emp{}lit{x}emp{}}" },
   { ".", "dot{}" },
   { "^", "bol{}" },
@@ -336,6 +336,16 @@ Test prefix_tests[] = {
     "cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
   { "^/a/bc|^/a/de",
     "cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
+  // In the past, factoring was limited to kFactorAlternationMaxDepth (8).
+  { "a|aa|aaa|aaaa|aaaaa|aaaaaa|aaaaaaa|aaaaaaaa|aaaaaaaaa|aaaaaaaaaa",
+    "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+    "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+    "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+    "lit{a}}}}}}}}}}}}}}}}}}}" },
+  { "a|aardvark|aardvarks|abaci|aback|abacus|abacuses|abaft|abalone|abalones",
+    "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
+    "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
+    "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
 };
 
 // Test that prefix factoring works.
diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc
index 6bdb2af..43b9656 100644
--- a/re2/testing/simplify_test.cc
+++ b/re2/testing/simplify_test.cc
@@ -125,7 +125,7 @@ static Test tests[] = {
   // explicit (?:) in place of non-parenthesized empty strings,
   // to make them easier to spot for other parsers.
   { "(a|b|)", "([a-b]|(?:))" },
-  { "(|)", "()" },
+  { "(|)", "((?:)|(?:))" },
   { "a()", "a()" },
   { "(()|())", "(()|())" },
   { "(a|)", "(a|(?:))" },
author	DongHun Kwak <dh0128.kwak@samsung.com>	2018-02-20 15:37:38 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2018-02-20 15:37:38 +0900
commit	a3a5ffe62bc766304e422d5ce04d22638cf6eacd (patch)
tree	2c260ddbe400507216779578de476cfef3736225
parent	4214b4091520393b9c06dca78eb304cfbcee2c04 (diff)
download	re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.gz re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.bz2 re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.zip