summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2018-02-20 15:37:38 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2018-02-20 15:37:38 +0900
commita3a5ffe62bc766304e422d5ce04d22638cf6eacd (patch)
tree2c260ddbe400507216779578de476cfef3736225
parent4214b4091520393b9c06dca78eb304cfbcee2c04 (diff)
downloadre2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.gz
re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.tar.bz2
re2-a3a5ffe62bc766304e422d5ce04d22638cf6eacd.zip
Imported Upstream version 20180101upstream/20180101
Change-Id: I927f049290b4c0f3c9ffc5da1d2deda29d583f8a Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r--CMakeLists.txt7
-rwxr-xr-xkokoro/windows-cmake.bat4
-rw-r--r--re2/dfa.cc2
-rw-r--r--re2/fuzzing/re2_fuzzer.cc2
-rw-r--r--re2/parse.cc335
-rw-r--r--re2/regexp.h3
-rw-r--r--re2/testing/dfa_test.cc4
-rw-r--r--re2/testing/parse_test.cc12
-rw-r--r--re2/testing/simplify_test.cc2
9 files changed, 239 insertions, 132 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 884873b..f87c4ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,8 +22,8 @@ option(RE2_BUILD_TESTING "enable testing for RE2" ON)
set(EXTRA_TARGET_LINK_LIBRARIES)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- if(MSVC_VERSION LESS 1800)
- message(FATAL_ERROR "you need Visual Studio 2013 or later")
+ if(MSVC_VERSION LESS 1900)
+ message(FATAL_ERROR "you need Visual Studio 2015 or later")
endif()
if(BUILD_SHARED_LIBS)
# See http://www.kitware.com/blog/home/post/939 for details.
@@ -33,6 +33,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
# CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
# so we disable various warnings that aren't particularly helpful.
add_compile_options(/wd4100 /wd4201 /wd4456 /wd4457 /wd4702 /wd4815)
+ # Without a byte order mark (BOM), Visual Studio assumes that the source
+ # file is encoded using the current user code page, so we specify UTF-8.
+ add_compile_options(/utf-8)
elseif(CYGWIN OR MINGW)
# See https://stackoverflow.com/questions/38139631 for details.
add_compile_options(-std=gnu++11)
diff --git a/kokoro/windows-cmake.bat b/kokoro/windows-cmake.bat
index b6d2f6f..6b04b65 100755
--- a/kokoro/windows-cmake.bat
+++ b/kokoro/windows-cmake.bat
@@ -1,10 +1,10 @@
CD git/re2 || EXIT /B 1
-cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Debug -G "Visual Studio 14 2015" -A x64 . || EXIT /B 1
cmake --build . --config Debug --clean-first || EXIT /B 1
ctest -C Debug --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
-cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 12 2013" -A x64 . || EXIT /B 1
+cmake -D CMAKE_BUILD_TYPE=Release -G "Visual Studio 14 2015" -A x64 . || EXIT /B 1
cmake --build . --config Release --clean-first || EXIT /B 1
ctest -C Release --output-on-failure -E dfa^|exhaustive^|random || EXIT /B 1
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 2720a5e..bc81fea 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -126,7 +126,7 @@ class DFA {
// into this state, along with kFlagMatch if this
// is a matching state.
-// Work around the bug affecting flexible array members in GCC 6.1 and 6.2.
+// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
std::atomic<State*> next_[0]; // Outgoing arrows from State,
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index e0aa96a..d3cd145 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -56,7 +56,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
// Entry point for libFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- if (size == 0 || size > 1024)
+ if (size == 0 || size > 512)
return 0;
// Crudely limit the use of \p and \P.
diff --git a/re2/parse.cc b/re2/parse.cc
index 2b46e3e..3374345 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -23,6 +23,7 @@
#include <algorithm>
#include <map>
#include <string>
+#include <vector>
#include "util/util.h"
#include "util/logging.h"
@@ -865,59 +866,180 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
}
}
+// In the context of factoring alternations, a Splice is: a factored prefix or
+// merged character class computed by one iteration of one round of factoring;
+// the span of subexpressions of the alternation to be "spliced" (i.e. removed
+// and replaced); and, for a factored prefix, the number of suffixes after any
+// factoring that might have subsequently been performed on them. For a merged
+// character class, there are no suffixes, of course, so the field is ignored.
+struct Splice {
+ Splice(Regexp* prefix, Regexp** sub, int nsub)
+ : prefix(prefix),
+ sub(sub),
+ nsub(nsub),
+ nsuffix(-1) {}
+
+ Regexp* prefix;
+ Regexp** sub;
+ int nsub;
+ int nsuffix;
+};
+
+// Named so because it is used to implement an explicit stack, a Frame is: the
+// span of subexpressions of the alternation to be factored; the current round
+// of factoring; any Splices computed; and, for a factored prefix, an iterator
+// to the next Splice to be factored (i.e. in another Frame) because suffixes.
+struct Frame {
+ Frame(Regexp** sub, int nsub)
+ : sub(sub),
+ nsub(nsub),
+ round(0) {}
+
+ Regexp** sub;
+ int nsub;
+ int round;
+ std::vector<Splice> splices;
+ std::vector<Splice>::iterator spliceiter;
+};
+
+// Bundled into a class for friend access to Regexp without needing to declare
+// (or define) Splice in regexp.h.
+class FactorAlternationImpl {
+ public:
+ static void Round1(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+ static void Round2(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+ static void Round3(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+};
+
// Factors common prefixes from alternation.
// For example,
// ABC|ABD|AEF|BCX|BCY
// simplifies to
// A(B(C|D)|EF)|BC(X|Y)
-// which the normal parse state routines will further simplify to
+// and thence to
// A(B[CD]|EF)|BC[XY]
//
// Rewrites sub to contain simplified list to alternate and returns
// the new length of sub. Adjusts reference counts accordingly
// (incoming sub[i] decremented, outgoing sub[i] incremented).
+int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
+ std::vector<Frame> stk;
+ stk.emplace_back(sub, nsub);
+
+ for (;;) {
+ auto& sub = stk.back().sub;
+ auto& nsub = stk.back().nsub;
+ auto& round = stk.back().round;
+ auto& splices = stk.back().splices;
+ auto& spliceiter = stk.back().spliceiter;
+
+ if (splices.empty()) {
+ // Advance to the next round of factoring. Note that this covers
+ // the initialised state: when splices is empty and round is 0.
+ round++;
+ } else if (spliceiter != splices.end()) {
+ // We have at least one more Splice to factor. Recurse logically.
+ stk.emplace_back(spliceiter->sub, spliceiter->nsub);
+ continue;
+ } else {
+ // We have no more Splices to factor. Apply them.
+ auto iter = splices.begin();
+ int out = 0;
+ for (int i = 0; i < nsub; ) {
+ // Copy until we reach where the next Splice begins.
+ while (sub + i < iter->sub)
+ sub[out++] = sub[i++];
+ switch (round) {
+ case 1:
+ case 2: {
+ // Assemble the Splice prefix and the suffixes.
+ Regexp* re[2];
+ re[0] = iter->prefix;
+ re[1] = Regexp::AlternateNoFactor(iter->sub, iter->nsuffix, flags);
+ sub[out++] = Regexp::Concat(re, 2, flags);
+ i += iter->nsub;
+ break;
+ }
+ case 3:
+ // Just use the Splice prefix.
+ sub[out++] = iter->prefix;
+ i += iter->nsub;
+ break;
+ default:
+ LOG(DFATAL) << "unknown round: " << round;
+ break;
+ }
+ // If we are done, copy until the end of sub.
+ if (++iter == splices.end()) {
+ while (i < nsub)
+ sub[out++] = sub[i++];
+ }
+ }
+ splices.clear();
+ nsub = out;
+ // Advance to the next round of factoring.
+ round++;
+ }
-// It's too much of a pain to write this code with an explicit stack,
-// so instead we let the caller specify a maximum depth and
-// don't simplify beyond that. There are around 15 words of local
-// variables and parameters in the frame, so allowing 8 levels
-// on a 64-bit machine is still less than a kilobyte of stack and
-// probably enough benefit for practical uses.
-const int kFactorAlternationMaxDepth = 8;
-
-int Regexp::FactorAlternation(
- Regexp** sub, int n,
- Regexp::ParseFlags altflags) {
- return FactorAlternationRecursive(sub, n, altflags,
- kFactorAlternationMaxDepth);
-}
-
-int Regexp::FactorAlternationRecursive(
- Regexp** sub, int n,
- Regexp::ParseFlags altflags,
- int maxdepth) {
+ switch (round) {
+ case 1:
+ FactorAlternationImpl::Round1(sub, nsub, flags, &splices);
+ break;
+ case 2:
+ FactorAlternationImpl::Round2(sub, nsub, flags, &splices);
+ break;
+ case 3:
+ FactorAlternationImpl::Round3(sub, nsub, flags, &splices);
+ break;
+ case 4:
+ if (stk.size() == 1) {
+ // We are at the top of the stack. Just return.
+ return nsub;
+ } else {
+ // Pop the stack and set the number of suffixes.
+ // (Note that references will be invalidated!)
+ int nsuffix = nsub;
+ stk.pop_back();
+ stk.back().spliceiter->nsuffix = nsuffix;
+ ++stk.back().spliceiter;
+ continue;
+ }
+ default:
+ LOG(DFATAL) << "unknown round: " << round;
+ break;
+ }
- if (maxdepth <= 0)
- return n;
+ // Set spliceiter depending on whether we have Splices to factor.
+ if (splices.empty() || round == 3) {
+ spliceiter = splices.end();
+ } else {
+ spliceiter = splices.begin();
+ }
+ }
+}
+void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
// Round 1: Factor out common literal prefixes.
- Rune *rune = NULL;
+ int start = 0;
+ Rune* rune = NULL;
int nrune = 0;
Regexp::ParseFlags runeflags = Regexp::NoParseFlags;
- int start = 0;
- int out = 0;
- for (int i = 0; i <= n; i++) {
- // Invariant: what was in sub[0:start] has been Decref'ed
- // and that space has been reused for sub[0:out] (out <= start).
- //
- // Invariant: sub[start:i] consists of regexps that all begin
- // with the string rune[0:nrune].
-
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // begin with rune[0:nrune].
Rune* rune_i = NULL;
int nrune_i = 0;
Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags;
- if (i < n) {
- rune_i = LeadingString(sub[i], &nrune_i, &runeflags_i);
+ if (i < nsub) {
+ rune_i = Regexp::LeadingString(sub[i], &nrune_i, &runeflags_i);
if (runeflags_i == runeflags) {
int same = 0;
while (same < nrune && same < nrune_i && rune[same] == rune_i[same])
@@ -931,37 +1053,32 @@ int Regexp::FactorAlternationRecursive(
}
// Found end of a run with common leading literal string:
- // sub[start:i] all begin with rune[0:nrune] but sub[i]
- // does not even begin with rune[0].
- //
- // Factor out common string and append factored expression to sub[0:out].
+ // sub[start:i] all begin with rune[0:nrune],
+ // but sub[i] does not even begin with rune[0].
if (i == start) {
// Nothing to do - first iteration.
} else if (i == start+1) {
// Just one: don't bother factoring.
- sub[out++] = sub[start];
} else {
- // Construct factored form: prefix(suffix1|suffix2|...)
- Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
- x[0] = LiteralString(rune, nrune, runeflags);
+ Regexp* prefix = Regexp::LiteralString(rune, nrune, runeflags);
for (int j = start; j < i; j++)
- RemoveLeadingString(sub[j], nrune);
- int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
- maxdepth - 1);
- x[1] = AlternateNoFactor(sub + start, nn, altflags);
- sub[out++] = Concat(x, 2, altflags);
+ Regexp::RemoveLeadingString(sub[j], nrune);
+ splices->emplace_back(prefix, sub + start, i - start);
}
- // Prepare for next round (if there is one).
- if (i < n) {
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
start = i;
rune = rune_i;
nrune = nrune_i;
runeflags = runeflags_i;
}
}
- n = out;
+}
+void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
// Round 2: Factor out common simple prefixes,
// just the first piece of each concatenation.
// This will be good enough a lot of the time.
@@ -970,19 +1087,15 @@ int Regexp::FactorAlternationRecursive(
// are not safe to factor because that collapses their
// distinct paths through the automaton, which affects
// correctness in some cases.
- start = 0;
- out = 0;
+ int start = 0;
Regexp* first = NULL;
- for (int i = 0; i <= n; i++) {
- // Invariant: what was in sub[0:start] has been Decref'ed
- // and that space has been reused for sub[0:out] (out <= start).
- //
- // Invariant: sub[start:i] consists of regexps that all begin with first.
-
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // begin with first.
Regexp* first_i = NULL;
- if (i < n) {
- first_i = LeadingRegexp(sub[i]);
- if (first != NULL && Regexp::Equal(first, first_i) &&
+ if (i < nsub) {
+ first_i = Regexp::LeadingRegexp(sub[i]);
+ if (first != NULL &&
// first must be an empty-width op
// OR a char class, any char or any byte
// OR a fixed repeat of a literal, char class, any char or any byte.
@@ -1000,63 +1113,61 @@ int Regexp::FactorAlternationRecursive(
(first->sub()[0]->op() == kRegexpLiteral ||
first->sub()[0]->op() == kRegexpCharClass ||
first->sub()[0]->op() == kRegexpAnyChar ||
- first->sub()[0]->op() == kRegexpAnyByte)))) {
+ first->sub()[0]->op() == kRegexpAnyByte))) &&
+ Regexp::Equal(first, first_i))
continue;
- }
}
// Found end of a run with common leading regexp:
- // sub[start:i] all begin with first but sub[i] does not.
- //
- // Factor out common regexp and append factored expression to sub[0:out].
+ // sub[start:i] all begin with first,
+ // but sub[i] does not.
if (i == start) {
// Nothing to do - first iteration.
} else if (i == start+1) {
// Just one: don't bother factoring.
- sub[out++] = sub[start];
} else {
- // Construct factored form: prefix(suffix1|suffix2|...)
- Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
- x[0] = first->Incref();
+ Regexp* prefix = first->Incref();
for (int j = start; j < i; j++)
- sub[j] = RemoveLeadingRegexp(sub[j]);
- int nn = FactorAlternationRecursive(sub + start, i - start, altflags,
- maxdepth - 1);
- x[1] = AlternateNoFactor(sub + start, nn, altflags);
- sub[out++] = Concat(x, 2, altflags);
+ sub[j] = Regexp::RemoveLeadingRegexp(sub[j]);
+ splices->emplace_back(prefix, sub + start, i - start);
}
- // Prepare for next round (if there is one).
- if (i < n) {
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
start = i;
first = first_i;
}
}
- n = out;
-
- // Round 3: Collapse runs of single literals into character classes.
- start = 0;
- out = 0;
- for (int i = 0; i <= n; i++) {
- // Invariant: what was in sub[0:start] has been Decref'ed
- // and that space has been reused for sub[0:out] (out <= start).
- //
- // Invariant: sub[start:i] consists of regexps that are either
- // literal runes or character classes.
+}
- if (i < n &&
- (sub[i]->op() == kRegexpLiteral ||
- sub[i]->op() == kRegexpCharClass))
- continue;
+void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
+ // Round 3: Merge runs of literals and/or character classes.
+ int start = 0;
+ Regexp* first = NULL;
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // are either literals (i.e. runes) or character classes.
+ Regexp* first_i = NULL;
+ if (i < nsub) {
+ first_i = sub[i];
+ if (first != NULL &&
+ (first->op() == kRegexpLiteral ||
+ first->op() == kRegexpCharClass) &&
+ (first_i->op() == kRegexpLiteral ||
+ first_i->op() == kRegexpCharClass))
+ continue;
+ }
- // sub[i] is not a char or char class;
- // emit char class for sub[start:i]...
+ // Found end of a run of Literal/CharClass:
+ // sub[start:i] all are either one or the other,
+ // but sub[i] is not.
if (i == start) {
- // Nothing to do.
+ // Nothing to do - first iteration.
} else if (i == start+1) {
- sub[out++] = sub[start];
+ // Just one: don't bother factoring.
} else {
- // Make new char class.
CharClassBuilder ccb;
for (int j = start; j < i; j++) {
Regexp* re = sub[j];
@@ -1072,31 +1183,16 @@ int Regexp::FactorAlternationRecursive(
}
re->Decref();
}
- sub[out++] = NewCharClass(ccb.GetCharClass(), altflags);
+ Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
+ splices->emplace_back(re, sub + start, i - start);
}
- // ... and then emit sub[i].
- if (i < n)
- sub[out++] = sub[i];
- start = i+1;
- }
- n = out;
-
- // Round 4: Collapse runs of empty matches into single empty match.
- start = 0;
- out = 0;
- for (int i = 0; i < n; i++) {
- if (i + 1 < n &&
- sub[i]->op() == kRegexpEmptyMatch &&
- sub[i+1]->op() == kRegexpEmptyMatch) {
- sub[i]->Decref();
- continue;
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
+ start = i;
+ first = first_i;
}
- sub[out++] = sub[i];
}
- n = out;
-
- return n;
}
// Collapse the regexps on top of the stack, down to the
@@ -1294,8 +1390,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
// fullrune() takes int, not size_t. However, it just looks
// at the leading byte and treats any length >= 4 the same.
- if (fullrune(sp->data(), static_cast<int>(std::min(static_cast<size_t>(4),
- sp->size())))) {
+ if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
int n = chartorune(r, sp->data());
// Some copies of chartorune have a bug that accepts
// encodings of values in (10FFFF, 1FFFFF] as valid.
diff --git a/re2/regexp.h b/re2/regexp.h
index fcc7c0f..2ca96cd 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -494,8 +494,7 @@ class Regexp {
// Simplifies an alternation of literal strings by factoring out
// common prefixes.
static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
- static int FactorAlternationRecursive(Regexp** sub, int nsub,
- ParseFlags flags, int maxdepth);
+ friend class FactorAlternationImpl;
// Is a == b? Only efficient on regexps that have not been through
// Simplify yet - the expansion of a kRegexpRepeat will make this
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 1db7769..7f0feea 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -79,7 +79,7 @@ TEST(SingleThreaded, BuildEntireDFA) {
CHECK(re);
for (int i = 17; i < 24; i++) {
- int64_t limit = 1<<i;
+ int64_t limit = int64_t{1}<<i;
int64_t usage;
//int64_t progusage, dfamem;
{
@@ -120,7 +120,7 @@ static string DeBruijnString(int n) {
CHECK_LT(n, static_cast<int>(8*sizeof(int)));
CHECK_GT(n, 0);
- std::vector<bool> did(1<<n);
+ std::vector<bool> did(size_t{1}<<n);
for (int i = 0; i < 1<<n; i++)
did[i] = false;
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index 1e6929d..b039e13 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -53,7 +53,7 @@ static Test tests[] = {
{ "a{2,3}?", "nrep{2,3 lit{a}}" },
{ "a{2,}?", "nrep{2,-1 lit{a}}" },
{ "", "emp{}" },
- { "|", "emp{}" }, // alt{emp{}emp{}} but got factored
+ { "|", "alt{emp{}emp{}}" },
{ "|x|", "alt{emp{}lit{x}emp{}}" },
{ ".", "dot{}" },
{ "^", "bol{}" },
@@ -336,6 +336,16 @@ Test prefix_tests[] = {
"cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
{ "^/a/bc|^/a/de",
"cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
+ // In the past, factoring was limited to kFactorAlternationMaxDepth (8).
+ { "a|aa|aaa|aaaa|aaaaa|aaaaaa|aaaaaaa|aaaaaaaa|aaaaaaaaa|aaaaaaaaaa",
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "lit{a}}}}}}}}}}}}}}}}}}}" },
+ { "a|aardvark|aardvarks|abaci|aback|abacus|abacuses|abaft|abalone|abalones",
+ "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
+ "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
+ "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
};
// Test that prefix factoring works.
diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc
index 6bdb2af..43b9656 100644
--- a/re2/testing/simplify_test.cc
+++ b/re2/testing/simplify_test.cc
@@ -125,7 +125,7 @@ static Test tests[] = {
// explicit (?:) in place of non-parenthesized empty strings,
// to make them easier to spot for other parsers.
{ "(a|b|)", "([a-b]|(?:))" },
- { "(|)", "()" },
+ { "(|)", "((?:)|(?:))" },
{ "a()", "a()" },
{ "(()|())", "(()|())" },
{ "(a|)", "(a|(?:))" },