diff options
-rw-r--r-- | .travis.yml | 11 | ||||
-rw-r--r-- | re2/fuzzing/re2_fuzzer.cc | 18 | ||||
-rw-r--r-- | re2/re2.h | 5 |
3 files changed, 28 insertions, 6 deletions
diff --git a/.travis.yml b/.travis.yml index 456083c..674cda5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -143,6 +143,17 @@ matrix: - clang-6.0 env: - MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0" + - os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + packages: + - clang-7 + env: + - MATRIX_EVAL="CC=clang-7 CXX=clang++-7" before_install: - eval "${MATRIX_EVAL}" diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 5d2820b..3ce4d1b 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -56,21 +56,31 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { // Entry point for libFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - if (size == 0 || size > 512) + if (size == 0 || size > 999) return 0; - // Crudely limit the use of \p and \P. + // Crudely limit the use of ., \p and \P. // Otherwise, we will waste time on inputs that have long runs of Unicode // character classes. The fuzzer has shown itself to be easily capable of // generating such patterns that fall within the other limits, but result // in timeouts nonetheless. The marginal cost is high - even more so when // counted repetition is involved - whereas the marginal benefit is zero. + int dot = 0; int backslash_p = 0; for (size_t i = 0; i < size; i++) { - if (data[i] == '\\' && i+1 < size && (data[i+1] == 'p' || data[i+1] == 'P')) + if (data[i] == '.') + dot++; + if (data[i] != '\\') + continue; + i++; + if (i >= size) + break; + if (data[i] == 'p' || data[i] == 'P') backslash_p++; } - if (backslash_p > 10) + if (dot > 99) + return 0; + if (backslash_p > 1) return 0; // The one-at-a-time hash by Bob Jenkins. @@ -549,8 +549,9 @@ class RE2 { // with (?i) unless in posix_syntax mode) // // The following options are only consulted when posix_syntax == true. - // (When posix_syntax == false these features are always enabled and - // cannot be turned off.) + // When posix_syntax == false, these features are always enabled and + // cannot be turned off; to perform multi-line matching in that case, + // begin the regexp with (?m). // perl_classes (false) allow Perl's \d \s \w \D \S \W // word_boundary (false) allow Perl's \b \B (word boundary and not) // one_line (false) ^ and $ only match beginning and end of text |