diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2018-12-05 10:35:59 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2018-12-05 10:36:02 +0900 |
commit | 41cf41822489f66bc95b5e0572f5eee10a524274 (patch) | |
tree | bdb583bd391fe4c93fb2c1264b8eac24ca706434 | |
parent | e8eecdb736ef26dfe61b1d6c48d6b637acfb1bdf (diff) | |
download | re2-41cf41822489f66bc95b5e0572f5eee10a524274.tar.gz re2-41cf41822489f66bc95b5e0572f5eee10a524274.tar.bz2 re2-41cf41822489f66bc95b5e0572f5eee10a524274.zip |
Imported Upstream version 20181001upstream/20181001
Change-Id: Ib09ca152823a59e8772d52faec140a0bc8c99051
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r-- | .travis.yml | 11 | ||||
-rw-r--r-- | re2/fuzzing/re2_fuzzer.cc | 18 | ||||
-rw-r--r-- | re2/re2.h | 5 |
3 files changed, 28 insertions, 6 deletions
diff --git a/.travis.yml b/.travis.yml index 456083c..674cda5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -143,6 +143,17 @@ matrix: - clang-6.0 env: - MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0" + - os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + packages: + - clang-7 + env: + - MATRIX_EVAL="CC=clang-7 CXX=clang++-7" before_install: - eval "${MATRIX_EVAL}" diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 5d2820b..3ce4d1b 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -56,21 +56,31 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { // Entry point for libFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - if (size == 0 || size > 512) + if (size == 0 || size > 999) return 0; - // Crudely limit the use of \p and \P. + // Crudely limit the use of ., \p and \P. // Otherwise, we will waste time on inputs that have long runs of Unicode // character classes. The fuzzer has shown itself to be easily capable of // generating such patterns that fall within the other limits, but result // in timeouts nonetheless. The marginal cost is high - even more so when // counted repetition is involved - whereas the marginal benefit is zero. + int dot = 0; int backslash_p = 0; for (size_t i = 0; i < size; i++) { - if (data[i] == '\\' && i+1 < size && (data[i+1] == 'p' || data[i+1] == 'P')) + if (data[i] == '.') + dot++; + if (data[i] != '\\') + continue; + i++; + if (i >= size) + break; + if (data[i] == 'p' || data[i] == 'P') backslash_p++; } - if (backslash_p > 10) + if (dot > 99) + return 0; + if (backslash_p > 1) return 0; // The one-at-a-time hash by Bob Jenkins. @@ -549,8 +549,9 @@ class RE2 { // with (?i) unless in posix_syntax mode) // // The following options are only consulted when posix_syntax == true. - // (When posix_syntax == false these features are always enabled and - // cannot be turned off.) + // When posix_syntax == false, these features are always enabled and + // cannot be turned off; to perform multi-line matching in that case, + // begin the regexp with (?m). // perl_classes (false) allow Perl's \d \s \w \D \S \W // word_boundary (false) allow Perl's \b \B (word boundary and not) // one_line (false) ^ and $ only match beginning and end of text |