diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:37:02 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2019-09-25 15:37:02 +0900 |
commit | 036d6d0c2f3d30b6e783bd6c82ab27b704f90888 (patch) | |
tree | 4c749a361166b564edaa18f2bec15c38eac7c5f5 | |
parent | 6e7d66a40428088282c43d15e7ecf9aa66078096 (diff) | |
download | re2-036d6d0c2f3d30b6e783bd6c82ab27b704f90888.tar.gz re2-036d6d0c2f3d30b6e783bd6c82ab27b704f90888.tar.bz2 re2-036d6d0c2f3d30b6e783bd6c82ab27b704f90888.zip |
Imported Upstream version 20190901upstream/20190901
-rw-r--r-- | .travis.yml | 61 | ||||
-rw-r--r-- | BUILD | 124 | ||||
-rw-r--r-- | CMakeLists.txt | 18 | ||||
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | WORKSPACE | 9 | ||||
-rwxr-xr-x | kokoro/bazel.sh | 12 | ||||
-rwxr-xr-x | kokoro/cmake.sh | 2 | ||||
-rw-r--r-- | kokoro/ubuntu-cmake.cfg | 1 | ||||
-rwxr-xr-x | kokoro/ubuntu-cmake.sh | 4 | ||||
-rw-r--r-- | re2.pc | 2 | ||||
-rw-r--r-- | re2/bitstate.cc | 21 | ||||
-rw-r--r-- | re2/dfa.cc | 26 | ||||
-rw-r--r-- | re2/nfa.cc | 31 | ||||
-rw-r--r-- | re2/onepass.cc | 6 | ||||
-rw-r--r-- | re2/parse.cc | 31 | ||||
-rw-r--r-- | re2/prog.cc | 12 | ||||
-rw-r--r-- | re2/re2.cc | 12 | ||||
-rw-r--r-- | re2/testing/backtrack.cc | 21 | ||||
-rw-r--r-- | re2/testing/exhaustive_tester.cc | 2 | ||||
-rw-r--r-- | re2/testing/regexp_generator.cc | 10 | ||||
-rw-r--r-- | re2/testing/tester.cc | 8 | ||||
-rw-r--r-- | re2_test.bzl | 12 |
22 files changed, 240 insertions, 191 deletions
diff --git a/.travis.yml b/.travis.yml index f89c96d..71561ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: cpp sudo: false -dist: trusty +dist: xenial script: - make - make test @@ -76,37 +76,8 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.5 - packages: - - clang-3.5 - env: - - MATRIX_EVAL="CC=clang-3.5 CXX=clang++-3.5" - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.6 - packages: - - clang-3.6 - env: - - MATRIX_EVAL="CC=clang-3.6 CXX=clang++-3.6" - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.7 - packages: - - clang-3.7 - env: - - MATRIX_EVAL="CC=clang-3.7 CXX=clang++-3.7" - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.8 + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-3.8 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-3.8 env: @@ -116,7 +87,8 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.9 + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-3.9 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-3.9 env: @@ -126,7 +98,8 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-trusty-4.0 + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-4.0 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-4.0 env: @@ -136,7 +109,8 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-trusty-5.0 + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-5.0 env: @@ -146,7 +120,7 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main' + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-6.0 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-6.0 @@ -157,7 +131,7 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main' + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-7 @@ -168,12 +142,23 @@ matrix: apt: sources: - ubuntu-toolchain-r-test - - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main' + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-8 main' key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' packages: - clang-8 env: - MATRIX_EVAL="CC=clang-8 CXX=clang++-8" + - os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - sourceline: 'deb https://apt.llvm.org/xenial/ llvm-toolchain-xenial-9 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + packages: + - clang-9 + env: + - MATRIX_EVAL="CC=clang-9 CXX=clang++-9" before_install: - eval "${MATRIX_EVAL}" @@ -23,6 +23,8 @@ config_setting( values = {"cpu": "x64_windows_msvc"}, ) +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") + cc_library( name = "re2", srcs = [ @@ -122,106 +124,144 @@ cc_library( deps = [":testing"], ) -load(":re2_test.bzl", "re2_test") - -re2_test( - "charclass_test", +cc_test( + name = "charclass_test", size = "small", + srcs = ["re2/testing/charclass_test.cc"], + deps = [":test"], ) -re2_test( - "compile_test", +cc_test( + name = "compile_test", size = "small", + srcs = ["re2/testing/compile_test.cc"], + deps = [":test"], ) -re2_test( - "filtered_re2_test", +cc_test( + name = "filtered_re2_test", size = "small", + srcs = ["re2/testing/filtered_re2_test.cc"], + deps = [":test"], ) -re2_test( - "mimics_pcre_test", +cc_test( + name = "mimics_pcre_test", size = "small", + srcs = ["re2/testing/mimics_pcre_test.cc"], + deps = [":test"], ) -re2_test( - "parse_test", +cc_test( + name = "parse_test", size = "small", + srcs = ["re2/testing/parse_test.cc"], + deps = [":test"], ) -re2_test( - "possible_match_test", +cc_test( + name = "possible_match_test", size = "small", + srcs = ["re2/testing/possible_match_test.cc"], + deps = [":test"], ) -re2_test( - "re2_arg_test", +cc_test( + name = "re2_arg_test", size = "small", + srcs = ["re2/testing/re2_arg_test.cc"], + deps = [":test"], ) -re2_test( - "re2_test", +cc_test( + name = "re2_test", size = "small", + srcs = ["re2/testing/re2_test.cc"], + deps = [":test"], ) -re2_test( - "regexp_test", +cc_test( + name = "regexp_test", size = "small", + srcs = ["re2/testing/regexp_test.cc"], + deps = [":test"], ) -re2_test( - "required_prefix_test", +cc_test( + name = "required_prefix_test", size = "small", + srcs = ["re2/testing/required_prefix_test.cc"], + deps = [":test"], ) -re2_test( - "search_test", +cc_test( + name = "search_test", size = "small", + srcs = ["re2/testing/search_test.cc"], + deps = [":test"], ) -re2_test( - "set_test", +cc_test( + name = "set_test", size = "small", + srcs = ["re2/testing/set_test.cc"], + deps = [":test"], ) -re2_test( - "simplify_test", +cc_test( + name = "simplify_test", size = "small", + srcs = ["re2/testing/simplify_test.cc"], + deps = [":test"], ) -re2_test( - "string_generator_test", +cc_test( + name = "string_generator_test", size = "small", + srcs = ["re2/testing/string_generator_test.cc"], + deps = [":test"], ) -re2_test( - "dfa_test", +cc_test( + name = "dfa_test", size = "large", + srcs = ["re2/testing/dfa_test.cc"], + deps = [":test"], ) -re2_test( - "exhaustive1_test", +cc_test( + name = "exhaustive1_test", size = "large", + srcs = ["re2/testing/exhaustive1_test.cc"], + deps = [":test"], ) -re2_test( - "exhaustive2_test", +cc_test( + name = "exhaustive2_test", size = "large", + srcs = ["re2/testing/exhaustive2_test.cc"], + deps = [":test"], ) -re2_test( - "exhaustive3_test", +cc_test( + name = "exhaustive3_test", size = "large", + srcs = ["re2/testing/exhaustive3_test.cc"], + deps = [":test"], ) -re2_test( - "exhaustive_test", +cc_test( + name = "exhaustive_test", size = "large", + srcs = ["re2/testing/exhaustive_test.cc"], + deps = [":test"], ) -re2_test( - "random_test", +cc_test( + name = "random_test", size = "large", + srcs = ["re2/testing/random_test.cc"], + deps = [":test"], ) cc_library( diff --git a/CMakeLists.txt b/CMakeLists.txt index 639c715..5c980f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# Old enough to support Ubuntu Trusty. -cmake_minimum_required(VERSION 2.8.12) +# Old enough to support Ubuntu Xenial. +cmake_minimum_required(VERSION 3.5.1) if(POLICY CMP0048) cmake_policy(SET CMP0048 NEW) @@ -11,6 +11,7 @@ endif() project(RE2 CXX) include(CTest) +include(GNUInstallDirs) option(BUILD_SHARED_LIBS "build shared libraries" OFF) option(USEPCRE "use PCRE in tests and benchmarks" OFF) @@ -27,7 +28,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") endif() if(BUILD_SHARED_LIBS) # See http://www.kitware.com/blog/home/post/939 for details. - cmake_minimum_required(VERSION 3.4) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() # CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX, @@ -147,6 +147,12 @@ set(RE2_HEADERS re2/stringpiece.h ) -install(FILES ${RE2_HEADERS} DESTINATION include/re2) -install(TARGETS re2 EXPORT re2Config ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin INCLUDES DESTINATION include) -install(EXPORT re2Config DESTINATION lib/cmake/re2 NAMESPACE re2::) +install(FILES ${RE2_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2) +install(TARGETS re2 EXPORT re2Config + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(EXPORT re2Config + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::) @@ -276,10 +276,8 @@ install: obj/libre2.a obj/so/libre2.$(SOEXT) ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER) ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT) $(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc - $(SED_INPLACE) -e "s#@prefix@#${prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc - $(SED_INPLACE) -e "s#@exec_prefix@#${exec_prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc - $(SED_INPLACE) -e "s#@includedir@#${includedir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc - $(SED_INPLACE) -e "s#@libdir@#${libdir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc + $(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc testinstall: static-testinstall shared-testinstall @echo @@ -3,4 +3,13 @@ # license that can be found in the LICENSE file. # Bazel (http://bazel.io/) WORKSPACE file for RE2. + workspace(name = "com_googlesource_code_re2") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_cc", + strip_prefix = "rules_cc-master", + urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], +) diff --git a/kokoro/bazel.sh b/kokoro/bazel.sh index 95aee2e..75edc02 100755 --- a/kokoro/bazel.sh +++ b/kokoro/bazel.sh @@ -3,6 +3,18 @@ set -eux cd git/re2 +case "${KOKORO_JOB_NAME}" in + */windows-*) + choco upgrade bazel -y -i + # Pin to Visual Studio 2015, which is the minimum that we support. + export BAZEL_VC='C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC' + ;; + *) + # Use the script provided by Kokoro. + use_bazel.sh latest + ;; +esac + bazel clean bazel build --compilation_mode=dbg -- //:all bazel test --compilation_mode=dbg --test_output=errors -- //:all \ diff --git a/kokoro/cmake.sh b/kokoro/cmake.sh index 999fbfe..7e355f8 100755 --- a/kokoro/cmake.sh +++ b/kokoro/cmake.sh @@ -5,6 +5,8 @@ cd git/re2 case "${KOKORO_JOB_NAME}" in */windows-*) + export PATH+=':/cygdrive/c/Program Files/CMake/bin' + # Pin to Visual Studio 2015, which is the minimum that we support. CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64') ;; *) diff --git a/kokoro/ubuntu-cmake.cfg b/kokoro/ubuntu-cmake.cfg new file mode 100644 index 0000000..63d4f91 --- /dev/null +++ b/kokoro/ubuntu-cmake.cfg @@ -0,0 +1 @@ +build_file: "re2/kokoro/ubuntu-cmake.sh" diff --git a/kokoro/ubuntu-cmake.sh b/kokoro/ubuntu-cmake.sh new file mode 100755 index 0000000..ef4b7dc --- /dev/null +++ b/kokoro/ubuntu-cmake.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -eux +bash git/re2/kokoro/cmake.sh +exit $? @@ -1,5 +1,3 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ diff --git a/re2/bitstate.cc b/re2/bitstate.cc index 6f045b1..317b26f 100644 --- a/re2/bitstate.cc +++ b/re2/bitstate.cc @@ -86,7 +86,7 @@ BitState::BitState(Prog* prog) // we don't repeat the visit. bool BitState::ShouldVisit(int id, const char* p) { int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) + - static_cast<int>(p-text_.begin()); + static_cast<int>(p-text_.data()); if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1)))) return false; visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1)); @@ -134,7 +134,7 @@ void BitState::Push(int id, const char* p) { // Return whether it succeeded. bool BitState::TrySearch(int id0, const char* p0) { bool matched = false; - const char* end = text_.end(); + const char* end = text_.data() + text_.size(); njob_ = 0; // Push() no longer checks ShouldVisit(), // so we must perform the check ourselves. @@ -251,7 +251,7 @@ bool BitState::TrySearch(int id0, const char* p0) { matched = true; cap_[1] = p; if (submatch_[0].data() == NULL || - (longest_ && p > submatch_[0].end())) { + (longest_ && p > submatch_[0].data() + submatch_[0].size())) { for (int i = 0; i < nsubmatch_; i++) submatch_[i] = StringPiece(cap_[2 * i], @@ -288,7 +288,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // Search parameters. text_ = text; context_ = context; - if (context_.begin() == NULL) + if (context_.data() == NULL) context_ = text; if (prog_->anchor_start() && context_.begin() != text.begin()) return false; @@ -319,8 +319,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // Anchored search must start at text.begin(). if (anchored_) { - cap_[0] = text.begin(); - return TrySearch(prog_->start(), text.begin()); + cap_[0] = text.data(); + return TrySearch(prog_->start(), text.data()); } // Unanchored search, starting from each possible text position. @@ -329,13 +329,14 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // This looks like it's quadratic in the size of the text, // but we are not clearing visited_ between calls to TrySearch, // so no work is duplicated and it ends up still being linear. - for (const char* p = text.begin(); p <= text.end(); p++) { + for (const char* p = text.data(); p <= text.data() + text.size(); p++) { // Try to use memchr to find the first byte quickly. int fb = prog_->first_byte(); - if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) { - p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); + if (fb >= 0 && p < text.data() + text.size() && (p[0] & 0xFF) != fb) { + p = reinterpret_cast<const char*>( + memchr(p, fb, text.data() + text.size() - p)); if (p == NULL) - p = text.end(); + p = text.data() + text.size(); } cap_[0] = p; @@ -1138,8 +1138,8 @@ DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) { mu_->ReaderLock(); } -// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations -// does not support lock upgrade. +// This function is marked as NO_THREAD_SAFETY_ANALYSIS because +// the annotations don't support lock upgrade. void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { if (!writing_) { mu_->ReaderUnlock(); @@ -1328,10 +1328,11 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, bool want_earliest_match, bool run_forward) { State* start = params->start; - const uint8_t* bp = BytePtr(params->text.begin()); // start of text - const uint8_t* p = bp; // text scanning point - const uint8_t* ep = BytePtr(params->text.end()); // end of text - const uint8_t* resetp = NULL; // p at last cache reset + const uint8_t* bp = BytePtr(params->text.data()); // start of text + const uint8_t* p = bp; // text scanning point + const uint8_t* ep = BytePtr(params->text.data() + + params->text.size()); // end of text + const uint8_t* resetp = NULL; // p at last cache reset if (!run_forward) { using std::swap; swap(p, ep); @@ -1798,9 +1799,9 @@ bool DFA::Search(const StringPiece& text, return false; if (params.start == FullMatchState) { if (run_forward == want_earliest_match) - *epp = text.begin(); + *epp = text.data(); else - *epp = text.end(); + *epp = text.data() + text.size(); return true; } if (ExtraDebug) @@ -1863,7 +1864,7 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, *failed = false; StringPiece context = const_context; - if (context.begin() == NULL) + if (context.data() == NULL) context = text; bool carat = anchor_start(); bool dollar = anchor_end(); @@ -1910,7 +1911,7 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, return false; if (!matched) return false; - if (endmatch && ep != (reversed_ ? text.begin() : text.end())) + if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size())) return false; // If caller cares, record the boundary of the match. @@ -1918,10 +1919,11 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, // as the beginning. if (match0) { if (reversed_) - *match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep)); + *match0 = + StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep)); else *match0 = - StringPiece(text.begin(), static_cast<size_t>(ep - text.begin())); + StringPiece(text.data(), static_cast<size_t>(ep - text.data())); } return true; } @@ -448,7 +448,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, return false; StringPiece context = const_context; - if (context.begin() == NULL) + if (context.data() == NULL) context = text; // Sanity check: make sure that text lies within context. @@ -465,7 +465,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (prog_->anchor_end()) { longest = true; endmatch_ = true; - etext_ = text.end(); } if (nsubmatch < 0) { @@ -488,7 +487,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, matched_ = false; // For debugging prints. - btext_ = context.begin(); + btext_ = context.data(); + // For convenience. + etext_ = text.data() + text.size(); if (ExtraDebug) fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", @@ -503,14 +504,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, memset(&match_[0], 0, ncapture_*sizeof match_[0]); // Loop over the text, stepping the machine. - for (const char* p = text.begin();; p++) { + for (const char* p = text.data();; p++) { if (ExtraDebug) { int c = 0; - if (p == context.begin()) + if (p == btext_) c = '^'; - else if (p > text.end()) + else if (p > etext_) c = '$'; - else if (p < text.end()) + else if (p < etext_) c = p[0] & 0xFF; fprintf(stderr, "%c:", c); @@ -524,14 +525,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, } // This is a no-op the first time around the loop because runq is empty. - int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p); + int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p); DCHECK_EQ(runq->size(), 0); using std::swap; swap(nextq, runq); nextq->clear(); if (id != 0) { // We're done: full match ahead. - p = text.end(); + p = etext_; for (;;) { Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { @@ -559,30 +560,30 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, break; } - if (p > text.end()) + if (p > etext_) break; // Start a new thread if there have not been any matches. // (No point in starting a new thread if there have been // matches, since it would be to the right of the match // we already found.) - if (!matched_ && (!anchored || p == text.begin())) { + if (!matched_ && (!anchored || p == text.data())) { // If there's a required first byte for an unanchored search // and we're not in the middle of any possible matches, // use memchr to search for the byte quickly. int fb = prog_->first_byte(); if (!anchored && runq->size() == 0 && - fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) { - p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); + fb >= 0 && p < etext_ && (p[0] & 0xFF) != fb) { + p = reinterpret_cast<const char*>(memchr(p, fb, etext_ - p)); if (p == NULL) { - p = text.end(); + p = etext_; } } Thread* t = AllocThread(); CopyCapture(t->capture, match_); t->capture[0] = p; - AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p, + AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p, t); Decref(t); } diff --git a/re2/onepass.cc b/re2/onepass.cc index d615893..7a774ce 100644 --- a/re2/onepass.cc +++ b/re2/onepass.cc @@ -235,7 +235,7 @@ bool Prog::SearchOnePass(const StringPiece& text, matchcap[i] = NULL; StringPiece context = const_context; - if (context.begin() == NULL) + if (context.data() == NULL) context = text; if (anchor_start() && context.begin() != text.begin()) return false; @@ -249,8 +249,8 @@ bool Prog::SearchOnePass(const StringPiece& text, // start() is always mapped to the zeroth OneState. OneState* state = IndexToNode(nodes, statesize, 0); uint8_t* bytemap = bytemap_; - const char* bp = text.begin(); - const char* ep = text.end(); + const char* bp = text.data(); + const char* ep = text.data() + text.size(); const char* p; bool matched = false; matchcap[0] = bp; diff --git a/re2/parse.cc b/re2/parse.cc index 93b922a..03b53c7 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -1447,7 +1447,7 @@ static int UnHex(int c) { // Sets *rp to the named character. static bool ParseEscape(StringPiece* s, Rune* rp, RegexpStatus* status, int rune_max) { - const char* begin = s->begin(); + const char* begin = s->data(); if (s->size() < 1 || (*s)[0] != '\\') { // Should not happen - caller always checks. status->set_code(kRegexpInternalError); @@ -1590,7 +1590,7 @@ BadEscape: // Unrecognized escape sequence. status->set_code(kRegexpBadEscape); status->set_error_arg( - StringPiece(begin, static_cast<size_t>(s->begin() - begin))); + StringPiece(begin, static_cast<size_t>(s->data() - begin))); return false; } @@ -1710,7 +1710,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl return NULL; // Could use StringPieceToRune, but there aren't // any non-ASCII Perl group names. - StringPiece name(s->begin(), 2); + StringPiece name(s->data(), 2); const UGroup *g = LookupPerlGroup(name); if (g == NULL) return NULL; @@ -1750,8 +1750,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, return kParseError; if (c != '{') { // Name is the bit of string we just skipped over for c. - const char* p = seq.begin() + 2; - name = StringPiece(p, static_cast<size_t>(s->begin() - p)); + const char* p = seq.data() + 2; + name = StringPiece(p, static_cast<size_t>(s->data() - p)); } else { // Name is in braces. Look for closing } size_t end = s->find('}', 0); @@ -1762,14 +1762,14 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, status->set_error_arg(seq); return kParseError; } - name = StringPiece(s->begin(), end); // without '}' + name = StringPiece(s->data(), end); // without '}' s->remove_prefix(end + 1); // with '}' if (!IsValidUTF8(name, status)) return kParseError; } // Chop seq where s now begins. - seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin())); + seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data())); if (name.size() > 0 && name[0] == '^') { sign = -sign; @@ -2074,8 +2074,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { } // t is "P<name>...", t[end] == '>' - StringPiece capture(t.begin()-2, end+3); // "(?P<name>" - StringPiece name(t.begin()+2, end-2); // "name" + StringPiece capture(t.data()-2, end+3); // "(?P<name>" + StringPiece name(t.data()+2, end-2); // "name" if (!IsValidUTF8(name, status_)) return false; if (!IsValidCaptureName(name)) { @@ -2089,7 +2089,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { return false; } - s->remove_prefix(static_cast<size_t>(capture.end() - s->begin())); + s->remove_prefix( + static_cast<size_t>(capture.data() + capture.size() - s->data())); return true; } @@ -2173,7 +2174,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { BadPerlOp: status_->set_code(kRegexpBadPerlOp); status_->set_error_arg( - StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin()))); + StringPiece(s->data(), static_cast<size_t>(t.data() - s->data()))); return false; } @@ -2321,8 +2322,8 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // (and a++ means something else entirely, which we don't support!) status->set_code(kRegexpRepeatOp); status->set_error_arg(StringPiece( - lastunary.begin(), - static_cast<size_t>(t.begin() - lastunary.begin()))); + lastunary.data(), + static_cast<size_t>(t.data() - lastunary.data()))); return NULL; } } @@ -2354,8 +2355,8 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // Not allowed to stack repetition operators. status->set_code(kRegexpRepeatOp); status->set_error_arg(StringPiece( - lastunary.begin(), - static_cast<size_t>(t.begin() - lastunary.begin()))); + lastunary.data(), + static_cast<size_t>(t.data() - lastunary.data()))); return NULL; } } diff --git a/re2/prog.cc b/re2/prog.cc index 5155943..cc35917 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -288,24 +288,24 @@ uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { int flags = 0; // ^ and \A - if (p == text.begin()) + if (p == text.data()) flags |= kEmptyBeginText | kEmptyBeginLine; else if (p[-1] == '\n') flags |= kEmptyBeginLine; // $ and \z - if (p == text.end()) + if (p == text.data() + text.size()) flags |= kEmptyEndText | kEmptyEndLine; - else if (p < text.end() && p[0] == '\n') + else if (p < text.data() + text.size() && p[0] == '\n') flags |= kEmptyEndLine; // \b and \B - if (p == text.begin() && p == text.end()) { + if (p == text.data() && p == text.data() + text.size()) { // no word boundary here - } else if (p == text.begin()) { + } else if (p == text.data()) { if (IsWordChar(p[0])) flags |= kEmptyWordBoundary; - } else if (p == text.end()) { + } else if (p == text.data() + text.size()) { if (IsWordChar(p[-1])) flags |= kEmptyWordBoundary; } else { @@ -377,8 +377,8 @@ bool RE2::Replace(std::string* str, if (!re.Rewrite(&s, rewrite, vec, nvec)) return false; - assert(vec[0].begin() >= str->data()); - assert(vec[0].end() <= str->data()+str->size()); + assert(vec[0].data() >= str->data()); + assert(vec[0].data() + vec[0].size() <= str->data() + str->size()); str->replace(vec[0].data() - str->data(), vec[0].size(), s); return true; } @@ -406,9 +406,9 @@ int RE2::GlobalReplace(std::string* str, if (!re.Match(*str, static_cast<size_t>(p - str->data()), str->size(), UNANCHORED, vec, nvec)) break; - if (p < vec[0].begin()) - out.append(p, vec[0].begin() - p); - if (vec[0].begin() == lastend && vec[0].size() == 0) { + if (p < vec[0].data()) + out.append(p, vec[0].data() - p); + if (vec[0].data() == lastend && vec[0].size() == 0) { // Disallow empty match at end of last match: skip ahead. // // fullrune() takes int, not ptrdiff_t. However, it just looks @@ -439,7 +439,7 @@ int RE2::GlobalReplace(std::string* str, continue; } re.Rewrite(&out, rewrite, vec, nvec); - p = vec[0].end(); + p = vec[0].data() + vec[0].size(); lastend = p; count++; } diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc index ae9fd82..6cde42d 100644 --- a/re2/testing/backtrack.cc +++ b/re2/testing/backtrack.cc @@ -105,7 +105,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context, StringPiece* submatch, int nsubmatch) { text_ = text; context_ = context; - if (context_.begin() == NULL) + if (context_.data() == NULL) context_ = text; if (prog_->anchor_start() && text.begin() > context_.begin()) return false; @@ -137,14 +137,14 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context, // Anchored search must start at text.begin(). if (anchored_) { - cap_[0] = text.begin(); - return Visit(prog_->start(), text.begin()); + cap_[0] = text.data(); + return Visit(prog_->start(), text.data()); } // Unanchored search, starting from each possible text position. // Notice that we have to try the empty string at the end of // the text, so the loop condition is p <= text.end(), not p < text.end(). - for (const char* p = text.begin(); p <= text.end(); p++) { + for (const char* p = text.data(); p <= text.data() + text.size(); p++) { cap_[0] = p; if (Visit(prog_->start(), p)) // Match must be leftmost; done. return true; @@ -158,8 +158,8 @@ bool Backtracker::Visit(int id, const char* p) { // Check bitmap. If we've already explored from here, // either it didn't match or it did but we're hoping for a better match. // Either way, don't go down that road again. - CHECK(p <= text_.end()); - size_t n = id*(text_.size()+1) + (p - text_.begin()); + CHECK(p <= text_.data() + text_.size()); + size_t n = id*(text_.size()+1) + (p - text_.data()); CHECK_LT(n/32, nvisited_); if (visited_[n/32] & (1 << (n&31))) return false; @@ -182,7 +182,7 @@ bool Backtracker::Try(int id, const char* p) { // Pick out byte at current position. If at end of string, // have to explore in hope of finishing a match. Use impossible byte -1. int c = -1; - if (p < text_.end()) + if (p < text_.data() + text_.size()) c = *p & 0xFF; Prog::Inst* ip = prog_->inst(id); @@ -224,11 +224,12 @@ bool Backtracker::Try(int id, const char* p) { case kInstMatch: // We found a match. If it's the best so far, record the // parameters in the caller's submatch_ array. - if (endmatch_ && p != context_.end()) + if (endmatch_ && p != context_.data() + context_.size()) return false; cap_[1] = p; - if (submatch_[0].data() == NULL || // First match so far ... - (longest_ && p > submatch_[0].end())) { // ... or better match + if (submatch_[0].data() == NULL || + (longest_ && p > submatch_[0].data() + submatch_[0].size())) { + // First match so far - or better match. for (int i = 0; i < nsubmatch_; i++) submatch_[i] = StringPiece( cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc index 47950ba..cadd2b4 100644 --- a/re2/testing/exhaustive_tester.cc +++ b/re2/testing/exhaustive_tester.cc @@ -62,7 +62,7 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc for (int i = 0; i < n; i++) { if (i > 0) printf(" "); - if (m[i].begin() == NULL) + if (m[i].data() == NULL) printf("-"); else printf("%td-%td", diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc index 1e4d3da..d156c88 100644 --- a/re2/testing/regexp_generator.cc +++ b/re2/testing/regexp_generator.cc @@ -241,7 +241,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) { std::vector<std::string> Explode(const StringPiece& s) { std::vector<std::string> v; - for (const char *q = s.begin(); q < s.end(); ) { + for (const char *q = s.data(); q < s.data() + s.size(); ) { const char* p = q; Rune r; q += chartorune(&r, q); @@ -259,8 +259,8 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) { if (sep.size() == 0) return Explode(s); - const char *p = s.begin(); - for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) { + const char *p = s.data(); + for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) { if (StringPiece(q, sep.size()) == sep) { v.push_back(std::string(p, q - p)); p = q + sep.size(); @@ -268,8 +268,8 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) { continue; } } - if (p < s.end()) - v.push_back(std::string(p, s.end() - p)); + if (p < s.data() + s.size()) + v.push_back(std::string(p, s.data() + s.size() - p)); return v; } diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc index d676d9a..f08d717 100644 --- a/re2/testing/tester.cc +++ b/re2/testing/tester.cc @@ -99,7 +99,7 @@ typedef TestInstance::Result Result; // where a and b are the starting and ending offsets of s in text. static std::string FormatCapture(const StringPiece& text, const StringPiece& s) { - if (s.begin() == NULL) + if (s.data() == NULL) return "(?,?)"; return StringPrintf("(%td,%td)", s.begin() - text.begin(), s.end() - text.begin()); @@ -489,7 +489,7 @@ static bool ResultOkay(const Result& r, const Result& correct) { return false; if (r.have_submatch || r.have_submatch0) { for (int i = 0; i < kMaxSubmatch; i++) { - if (correct.submatch[i].begin() != r.submatch[i].begin() || + if (correct.submatch[i].data() != r.submatch[i].data() || correct.submatch[i].size() != r.submatch[i].size()) return false; if (!r.have_submatch) @@ -555,8 +555,8 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context, } } for (int i = 0; i < 1+num_captures_; i++) { - if (r.submatch[i].begin() != correct.submatch[i].begin() || - r.submatch[i].end() != correct.submatch[i].end()) { + if (r.submatch[i].data() != correct.submatch[i].data() || + r.submatch[i].size() != correct.submatch[i].size()) { LOG(INFO) << StringPrintf(" $%d: should be %s is %s", i, diff --git a/re2_test.bzl b/re2_test.bzl deleted file mode 100644 index c0eb654..0000000 --- a/re2_test.bzl +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2009 The RE2 Authors. All Rights Reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# Defines a Bazel macro that instantiates a native cc_test rule for an RE2 test. -def re2_test(name, deps=[], size="medium"): - native.cc_test( - name=name, - srcs=["re2/testing/%s.cc" % (name)], - deps=[":test"] + deps, - size=size, - ) |