summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2018-12-05 10:35:59 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2018-12-05 10:36:02 +0900
commit41cf41822489f66bc95b5e0572f5eee10a524274 (patch)
treebdb583bd391fe4c93fb2c1264b8eac24ca706434
parente8eecdb736ef26dfe61b1d6c48d6b637acfb1bdf (diff)
downloadre2-41cf41822489f66bc95b5e0572f5eee10a524274.tar.gz
re2-41cf41822489f66bc95b5e0572f5eee10a524274.tar.bz2
re2-41cf41822489f66bc95b5e0572f5eee10a524274.zip
Imported Upstream version 20181001upstream/20181001
Change-Id: Ib09ca152823a59e8772d52faec140a0bc8c99051 Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r--.travis.yml11
-rw-r--r--re2/fuzzing/re2_fuzzer.cc18
-rw-r--r--re2/re2.h5
3 files changed, 28 insertions, 6 deletions
diff --git a/.travis.yml b/.travis.yml
index 456083c..674cda5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -143,6 +143,17 @@ matrix:
- clang-6.0
env:
- MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0"
+ - os: linux
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
+ key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
+ packages:
+ - clang-7
+ env:
+ - MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
before_install:
- eval "${MATRIX_EVAL}"
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 5d2820b..3ce4d1b 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -56,21 +56,31 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
// Entry point for libFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- if (size == 0 || size > 512)
+ if (size == 0 || size > 999)
return 0;
- // Crudely limit the use of \p and \P.
+ // Crudely limit the use of ., \p and \P.
// Otherwise, we will waste time on inputs that have long runs of Unicode
// character classes. The fuzzer has shown itself to be easily capable of
// generating such patterns that fall within the other limits, but result
// in timeouts nonetheless. The marginal cost is high - even more so when
// counted repetition is involved - whereas the marginal benefit is zero.
+ int dot = 0;
int backslash_p = 0;
for (size_t i = 0; i < size; i++) {
- if (data[i] == '\\' && i+1 < size && (data[i+1] == 'p' || data[i+1] == 'P'))
+ if (data[i] == '.')
+ dot++;
+ if (data[i] != '\\')
+ continue;
+ i++;
+ if (i >= size)
+ break;
+ if (data[i] == 'p' || data[i] == 'P')
backslash_p++;
}
- if (backslash_p > 10)
+ if (dot > 99)
+ return 0;
+ if (backslash_p > 1)
return 0;
// The one-at-a-time hash by Bob Jenkins.
diff --git a/re2/re2.h b/re2/re2.h
index 37ab8f1..2a012e1 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -549,8 +549,9 @@ class RE2 {
// with (?i) unless in posix_syntax mode)
//
// The following options are only consulted when posix_syntax == true.
- // (When posix_syntax == false these features are always enabled and
- // cannot be turned off.)
+ // When posix_syntax == false, these features are always enabled and
+ // cannot be turned off; to perform multi-line matching in that case,
+ // begin the regexp with (?m).
// perl_classes (false) allow Perl's \d \s \w \D \S \W
// word_boundary (false) allow Perl's \b \B (word boundary and not)
// one_line (false) ^ and $ only match beginning and end of text