summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:56:53 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2016-11-21 16:56:53 +0900
commit485249b5a02cf59571cde61f83d10a6a9ec36b3d (patch)
tree1866e7ca3fe6c30538b6bbe29a81a5836f53678d
parentb1560c7299051a0740ae99189cfbded5bde01a4d (diff)
downloadre2-485249b5a02cf59571cde61f83d10a6a9ec36b3d.tar.gz
re2-485249b5a02cf59571cde61f83d10a6a9ec36b3d.tar.bz2
re2-485249b5a02cf59571cde61f83d10a6a9ec36b3d.zip
Imported Upstream version 20160201upstream/20160201
Change-Id: I9008d23e0e80414f725fdd41819d9856e8a041cc Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
-rw-r--r--BUILD22
-rw-r--r--CMakeLists.txt1
-rw-r--r--Makefile25
-rw-r--r--WORKSPACE5
-rw-r--r--doc/syntax.txt230
-rw-r--r--re2.pc2
-rw-r--r--re2/bitstate.cc5
-rw-r--r--re2/compile.cc224
-rw-r--r--re2/dfa.cc10
-rw-r--r--re2/filtered_re2.cc2
-rw-r--r--re2/filtered_re2.h2
-rw-r--r--re2/nfa.cc3
-rw-r--r--re2/onepass.cc3
-rw-r--r--re2/parse.cc70
-rw-r--r--re2/prefilter.cc2
-rw-r--r--re2/prefilter_tree.cc21
-rw-r--r--re2/prog.h2
-rw-r--r--re2/re2.cc43
-rw-r--r--re2/re2.h7
-rw-r--r--re2/regexp.cc5
-rw-r--r--re2/regexp.h2
-rw-r--r--re2/set.cc4
-rw-r--r--re2/simplify.cc8
-rw-r--r--re2/stringpiece.cc23
-rw-r--r--re2/stringpiece.h12
-rw-r--r--re2/testing/backtrack.cc7
-rw-r--r--re2/testing/compile_test.cc86
-rw-r--r--re2/testing/dfa_test.cc23
-rw-r--r--re2/testing/exhaustive2_test.cc2
-rw-r--r--re2/testing/exhaustive3_test.cc2
-rw-r--r--re2/testing/filtered_re2_test.cc10
-rw-r--r--re2/testing/parse_test.cc12
-rw-r--r--re2/testing/re2_arg_test.cc2
-rw-r--r--re2/testing/re2_test.cc81
-rw-r--r--re2/testing/regexp_benchmark.cc16
-rw-r--r--re2/testing/regexp_generator.cc6
-rw-r--r--re2/testing/regexp_test.cc3
-rw-r--r--re2/testing/string_generator.cc7
-rw-r--r--re2/testing/tester.cc11
-rw-r--r--re2/tostring.cc6
-rw-r--r--re2/unicode.py2
-rw-r--r--re2/unicode_casefold.cc58
-rw-r--r--re2/unicode_groups.cc1069
-rw-r--r--re2_test.bzl5
-rw-r--r--util/atomicops.h44
-rw-r--r--util/benchmark.cc4
-rw-r--r--util/logging.h9
-rw-r--r--util/mutex.h4
-rw-r--r--util/pcre.cc27
-rw-r--r--util/sparse_array.h30
-rw-r--r--util/sparse_set.h27
-rw-r--r--util/strutil.cc2
-rw-r--r--util/test.cc12
-rw-r--r--util/test.h9
-rw-r--r--util/util.h5
-rw-r--r--util/valgrind.cc14
56 files changed, 1665 insertions, 663 deletions
diff --git a/BUILD b/BUILD
index 96c59a1..2fb0d22 100644
--- a/BUILD
+++ b/BUILD
@@ -6,16 +6,6 @@
licenses(["notice"])
-# stringpiece is a standalone library so that it can be used without pulling in
-# all of the other parts of RE2.
-cc_library(
- name = "stringpiece",
- srcs = ["re2/stringpiece.cc"],
- hdrs = ["re2/stringpiece.h"],
- includes = ["."],
- visibility = ["//visibility:public"],
-)
-
cc_library(
name = "re2",
srcs = [
@@ -39,6 +29,7 @@ cc_library(
"re2/regexp.h",
"re2/set.cc",
"re2/simplify.cc",
+ "re2/stringpiece.cc",
"re2/tostring.cc",
"re2/unicode_casefold.cc",
"re2/unicode_casefold.h",
@@ -65,14 +56,13 @@ cc_library(
"re2/filtered_re2.h",
"re2/re2.h",
"re2/set.h",
+ "re2/stringpiece.h",
"re2/variadic_function.h",
],
+ copts = ["-pthread"],
includes = ["."],
linkopts = ["-pthread"],
visibility = ["//visibility:public"],
- deps = [
- ":stringpiece",
- ],
)
cc_library(
@@ -102,9 +92,7 @@ cc_library(
"util/thread.h",
],
includes = ["."],
- deps = [
- ":re2",
- ],
+ deps = [":re2"],
)
load("re2_test", "re2_test")
@@ -129,6 +117,6 @@ re2_test("exhaustive1_test")
re2_test("exhaustive2_test")
re2_test("exhaustive3_test")
re2_test("exhaustive_test")
-re2_test("random_test")
+re2_test("random_test", size="large")
# TODO: Add support for regexp_benchmark.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c6c6060..1c980df 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ if(WIN32)
add_definitions(-DUNICODE -D_UNICODE -DSTRICT -DNOMINMAX)
set(THREADING threadwin)
else()
+ add_definitions(-pthread)
set(THREADING thread)
list(APPEND EXTRA_TARGET_LINK_LIBRARIES -pthread)
endif()
diff --git a/Makefile b/Makefile
index 43a0eed..fb5d5dc 100644
--- a/Makefile
+++ b/Makefile
@@ -8,8 +8,8 @@
# LDPCRE=-L/usr/local/lib -lpcre
CXX?=g++
-CXXFLAGS?=-Wall -O3 -g -pthread # can override
-RE2_CXXFLAGS?=-Wsign-compare -c -I. $(CCPCRE) # required
+CXXFLAGS?=-O3 -g -pthread # can override
+RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCPCRE) # required
LDFLAGS?=-pthread
AR?=ar
ARFLAGS?=rsc
@@ -158,7 +158,7 @@ BIGTESTS=\
obj/test/random_test\
SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))
-STESTOFILES=$(patsubst obj/%,obj/so/%,$(TESTOFILES))
+# We use TESTOFILES for testing the shared lib, only it is built differently.
STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))
SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))
@@ -169,15 +169,15 @@ DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
obj/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
- $(CXX) -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
+ $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
obj/dbg/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
- $(CXX) -o $@ -fPIC $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) $*.cc
+ $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) $*.cc
obj/so/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
- $(CXX) -o $@ -fPIC $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
+ $(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(CXXFLAGS) $(RE2_CXXFLAGS) -DNDEBUG $*.cc
obj/libre2.a: $(OFILES)
@mkdir -p obj
@@ -192,17 +192,18 @@ obj/so/libre2.$(SOEXT): $(SOFILES)
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
ln -sf libre2.$(SOEXTVER) $@
-obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
- @mkdir -p obj/test
- $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(LDFLAGS) $(LDPCRE)
-
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
@mkdir -p obj/dbg/test
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(LDFLAGS) $(LDPCRE)
-obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/so/re2/testing/%.o $(STESTOFILES) obj/so/util/test.o
+obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
+ @mkdir -p obj/test
+ $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(LDFLAGS) $(LDPCRE)
+
+# Test the shared lib, falling back to the static lib for private symbols
+obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
@mkdir -p obj/so/test
- $(CXX) -o $@ obj/so/re2/testing/$*.o $(STESTOFILES) obj/so/util/test.o -Lobj/so -lre2 obj/libre2.a $(LDFLAGS) $(LDPCRE)
+ $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(LDFLAGS) $(LDPCRE)
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
@mkdir -p obj/test
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..393f5e6
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,5 @@
+# Copyright 2009 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Bazel (http://bazel.io/) WORKSPACE file for RE2.
diff --git a/doc/syntax.txt b/doc/syntax.txt
index e9c6ff4..09b7e88 100644
--- a/doc/syntax.txt
+++ b/doc/syntax.txt
@@ -230,105 +230,137 @@ Zp paragraph separator
Zs space separator
Unicode character class names--scripts:
-Arabic Arabic
-Armenian Armenian
-Balinese Balinese
-Bamum Bamum
-Batak Batak
-Bengali Bengali
-Bopomofo Bopomofo
-Brahmi Brahmi
-Braille Braille
-Buginese Buginese
-Buhid Buhid
-Canadian_Aboriginal Canadian Aboriginal
-Carian Carian
-Chakma Chakma
-Cham Cham
-Cherokee Cherokee
-Common characters not specific to one script
-Coptic Coptic
-Cuneiform Cuneiform
-Cypriot Cypriot
-Cyrillic Cyrillic
-Deseret Deseret
-Devanagari Devanagari
-Egyptian_Hieroglyphs Egyptian Hieroglyphs
-Ethiopic Ethiopic
-Georgian Georgian
-Glagolitic Glagolitic
-Gothic Gothic
-Greek Greek
-Gujarati Gujarati
-Gurmukhi Gurmukhi
-Han Han
-Hangul Hangul
-Hanunoo Hanunoo
-Hebrew Hebrew
-Hiragana Hiragana
-Imperial_Aramaic Imperial Aramaic
-Inherited inherit script from previous character
-Inscriptional_Pahlavi Inscriptional Pahlavi
-Inscriptional_Parthian Inscriptional Parthian
-Javanese Javanese
-Kaithi Kaithi
-Kannada Kannada
-Katakana Katakana
-Kayah_Li Kayah Li
-Kharoshthi Kharoshthi
-Khmer Khmer
-Lao Lao
-Latin Latin
-Lepcha Lepcha
-Limbu Limbu
-Linear_B Linear B
-Lycian Lycian
-Lydian Lydian
-Malayalam Malayalam
-Mandaic Mandaic
-Meetei_Mayek Meetei Mayek
-Meroitic_Cursive Meroitic Cursive
-Meroitic_Hieroglyphs Meroitic Hieroglyphs
-Miao Miao
-Mongolian Mongolian
-Myanmar Myanmar
-New_Tai_Lue New Tai Lue (aka Simplified Tai Lue)
-Nko Nko
-Ogham Ogham
-Ol_Chiki Ol Chiki
-Old_Italic Old Italic
-Old_Persian Old Persian
-Old_South_Arabian Old South Arabian
-Old_Turkic Old Turkic
-Oriya Oriya
-Osmanya Osmanya
-Phags_Pa 'Phags Pa
-Phoenician Phoenician
-Rejang Rejang
-Runic Runic
-Saurashtra Saurashtra
-Sharada Sharada
-Shavian Shavian
-Sinhala Sinhala
-Sora_Sompeng Sora Sompeng
-Sundanese Sundanese
-Syloti_Nagri Syloti Nagri
-Syriac Syriac
-Tagalog Tagalog
-Tagbanwa Tagbanwa
-Tai_Le Tai Le
-Tai_Tham Tai Tham
-Tai_Viet Tai Viet
-Takri Takri
-Tamil Tamil
-Telugu Telugu
-Thaana Thaana
-Thai Thai
-Tibetan Tibetan
-Tifinagh Tifinagh
-Ugaritic Ugaritic
-Vai Vai
-Yi Yi
+Ahom
+Anatolian_Hieroglyphs
+Arabic
+Armenian
+Avestan
+Balinese
+Bamum
+Bassa_Vah
+Batak
+Bengali
+Bopomofo
+Brahmi
+Braille
+Buginese
+Buhid
+Canadian_Aboriginal
+Carian
+Caucasian_Albanian
+Chakma
+Cham
+Cherokee
+Common
+Coptic
+Cuneiform
+Cypriot
+Cyrillic
+Deseret
+Devanagari
+Duployan
+Egyptian_Hieroglyphs
+Elbasan
+Ethiopic
+Georgian
+Glagolitic
+Gothic
+Grantha
+Greek
+Gujarati
+Gurmukhi
+Han
+Hangul
+Hanunoo
+Hatran
+Hebrew
+Hiragana
+Imperial_Aramaic
+Inherited
+Inscriptional_Pahlavi
+Inscriptional_Parthian
+Javanese
+Kaithi
+Kannada
+Katakana
+Kayah_Li
+Kharoshthi
+Khmer
+Khojki
+Khudawadi
+Lao
+Latin
+Lepcha
+Limbu
+Linear_A
+Linear_B
+Lisu
+Lycian
+Lydian
+Mahajani
+Malayalam
+Mandaic
+Manichaean
+Meetei_Mayek
+Mende_Kikakui
+Meroitic_Cursive
+Meroitic_Hieroglyphs
+Miao
+Modi
+Mongolian
+Mro
+Multani
+Myanmar
+Nabataean
+New_Tai_Lue
+Nko
+Ogham
+Ol_Chiki
+Old_Hungarian
+Old_Italic
+Old_North_Arabian
+Old_Permic
+Old_Persian
+Old_South_Arabian
+Old_Turkic
+Oriya
+Osmanya
+Pahawh_Hmong
+Palmyrene
+Pau_Cin_Hau
+Phags_Pa
+Phoenician
+Psalter_Pahlavi
+Rejang
+Runic
+Samaritan
+Saurashtra
+Sharada
+Shavian
+Siddham
+SignWriting
+Sinhala
+Sora_Sompeng
+Sundanese
+Syloti_Nagri
+Syriac
+Tagalog
+Tagbanwa
+Tai_Le
+Tai_Tham
+Tai_Viet
+Takri
+Tamil
+Telugu
+Thaana
+Thai
+Tibetan
+Tifinagh
+Tirhuta
+Ugaritic
+Vai
+Warang_Citi
+Yi
Vim character classes:
\i identifier character NOT SUPPORTED vim
diff --git a/re2.pc b/re2.pc
index 9e90cda..82832c3 100644
--- a/re2.pc
+++ b/re2.pc
@@ -6,5 +6,5 @@ libdir=${exec_prefix}/lib
Name: re2
Description: RE2 is a fast, safe, thread-friendly regular expression engine.
Version: 0.0.0
-Cflags: -I${includedir}
+Cflags: -I${includedir} -pthread
Libs: -L${libdir} -lre2 -pthread
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 8ced6ea..5740daa 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -94,7 +94,7 @@ BitState::~BitState() {
// If so, remember that it was visited so that the next time,
// we don't repeat the visit.
bool BitState::ShouldVisit(int id, const char* p) {
- uint n = id * (text_.size() + 1) + (p - text_.begin());
+ size_t n = id * (text_.size() + 1) + (p - text_.begin());
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
return false;
visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
@@ -272,7 +272,8 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].end())) {
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
+ submatch_[i].set(cap_[2*i],
+ static_cast<int>(cap_[2*i+1] - cap_[2*i]));
}
// If going for first match, we're done.
diff --git a/re2/compile.cc b/re2/compile.cc
index e5d6088..9882fef 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -113,7 +113,7 @@ struct Frag {
// Input encodings.
enum Encoding {
kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
- kEncodingLatin1, // Latin1 (0-FF)
+ kEncodingLatin1, // Latin-1 (0-FF)
};
class Compiler : public Regexp::Walker<Frag> {
@@ -193,12 +193,28 @@ class Compiler : public Regexp::Walker<Frag> {
void Add_80_10ffff();
// New suffix that matches the byte range lo-hi, then goes to next.
- int RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next);
int UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next);
+ int CachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next);
+
+ // Returns true iff the suffix is cached.
+ bool IsCachedRuneByteSuffix(int id);
// Adds a suffix to alternation.
void AddSuffix(int id);
+ // Adds a suffix to the trie starting from the given root node.
+ // Returns zero iff allocating an instruction fails. Otherwise, returns
+ // the current root node, which might be different from what was given.
+ int AddSuffixRecursive(int root, int id);
+
+ // Finds the trie node for the given suffix. Returns a Frag in order to
+ // distinguish between pointing at the root node directly (end.p == 0)
+ // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
+ Frag FindByteRange(int root, int id);
+
+ // Compares two ByteRanges and returns true iff they are equal.
+ bool ByteRangeEqual(int id1, int id2);
+
// Returns the alternation of all the added suffixes.
Frag EndRange();
@@ -496,21 +512,17 @@ int Compiler::UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase,
return f.begin;
}
-int Compiler::RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next) {
- // In Latin1 mode, there's no point in caching.
- // In forward UTF-8 mode, only need to cache continuation bytes.
- if (encoding_ == kEncodingLatin1 ||
- (encoding_ == kEncodingUTF8 &&
- !reversed_ &&
- !(0x80 <= lo && hi <= 0xbf))) {
- return UncachedRuneByteSuffix(lo, hi, foldcase, next);
- }
+static uint64 MakeRuneCacheKey(uint8 lo, uint8 hi, bool foldcase, int next) {
+ return (uint64)next << 17 |
+ (uint64)lo << 9 |
+ (uint64)hi << 1 |
+ (uint64)foldcase;
+}
- uint64 key = (uint64)next << 17 |
- (uint64)lo << 9 |
- (uint64)hi << 1 |
- (uint64)foldcase;
- map<uint64, int>::iterator it = rune_cache_.find(key);
+int Compiler::CachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase,
+ int next) {
+ uint64 key = MakeRuneCacheKey(lo, hi, foldcase, next);
+ map<uint64, int>::const_iterator it = rune_cache_.find(key);
if (it != rune_cache_.end())
return it->second;
int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
@@ -518,12 +530,28 @@ int Compiler::RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next) {
return id;
}
+bool Compiler::IsCachedRuneByteSuffix(int id) {
+ uint8 lo = inst_[id].lo_;
+ uint8 hi = inst_[id].hi_;
+ bool foldcase = inst_[id].foldcase() != 0;
+ int next = inst_[id].out();
+
+ uint64 key = MakeRuneCacheKey(lo, hi, foldcase, next);
+ return rune_cache_.find(key) != rune_cache_.end();
+}
+
void Compiler::AddSuffix(int id) {
if (rune_range_.begin == 0) {
rune_range_.begin = id;
return;
}
+ if (encoding_ == kEncodingUTF8) {
+ // Build a trie in order to reduce fanout.
+ rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id);
+ return;
+ }
+
int alt = AllocInst(1);
if (alt < 0) {
rune_range_.begin = 0;
@@ -533,6 +561,105 @@ void Compiler::AddSuffix(int id) {
rune_range_.begin = alt;
}
+int Compiler::AddSuffixRecursive(int root, int id) {
+ DCHECK(inst_[root].opcode() == kInstAlt ||
+ inst_[root].opcode() == kInstByteRange);
+
+ Frag f = FindByteRange(root, id);
+ if (IsNoMatch(f)) {
+ int alt = AllocInst(1);
+ if (alt < 0)
+ return 0;
+ inst_[alt].InitAlt(root, id);
+ return alt;
+ }
+
+ int br;
+ if (f.end.p == 0)
+ br = root;
+ else if (f.end.p&1)
+ br = inst_[f.begin].out1();
+ else
+ br = inst_[f.begin].out();
+
+ if (IsCachedRuneByteSuffix(br)) {
+ // We can't fiddle with cached suffixes, so make a clone of the head.
+ int byterange = AllocInst(1);
+ if (byterange < 0)
+ return 0;
+ inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(),
+ inst_[br].foldcase(), inst_[br].out());
+
+ // Ensure that the parent points to the clone, not to the original.
+ // Note that this could leave the head unreachable except via the cache.
+ br = byterange;
+ if (f.end.p == 0)
+ root = br;
+ else if (f.end.p&1)
+ inst_[f.begin].out1_ = br;
+ else
+ inst_[f.begin].set_out(br);
+ }
+
+ // We just saved one ByteRange instruction. :)
+ prog_->byte_inst_count_--;
+
+ int out = inst_[id].out();
+ if (!IsCachedRuneByteSuffix(id)) {
+ // The head should be the instruction most recently allocated, so free it
+ // instead of leaving it unreachable.
+ DCHECK_EQ(id, inst_len_-1);
+ inst_[id].out_opcode_ = 0;
+ inst_[id].out1_ = 0;
+ inst_len_--;
+ }
+
+ out = AddSuffixRecursive(inst_[br].out(), out);
+ if (out == 0)
+ return 0;
+
+ inst_[br].set_out(out);
+ return root;
+}
+
+bool Compiler::ByteRangeEqual(int id1, int id2) {
+ return inst_[id1].lo() == inst_[id2].lo() &&
+ inst_[id1].hi() == inst_[id2].hi() &&
+ inst_[id1].foldcase() == inst_[id2].foldcase();
+}
+
+Frag Compiler::FindByteRange(int root, int id) {
+ if (inst_[root].opcode() == kInstByteRange) {
+ if (ByteRangeEqual(root, id))
+ return Frag(root, nullPatchList);
+ else
+ return NoMatch();
+ }
+
+ while (inst_[root].opcode() == kInstAlt) {
+ int out1 = inst_[root].out1();
+ if (ByteRangeEqual(out1, id))
+ return Frag(root, PatchList::Mk((root << 1) | 1));
+
+ // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
+ // what we're looking for, then we can stop immediately. Unfortunately, we
+ // can't short-circuit the search in reverse mode.
+ if (!reversed_)
+ return NoMatch();
+
+ int out = inst_[root].out();
+ if (inst_[out].opcode() == kInstAlt)
+ root = out;
+ else if (ByteRangeEqual(out, id))
+ return Frag(root, PatchList::Mk(root << 1));
+ else
+ return NoMatch();
+ }
+
+ LOG(DFATAL) << "should never happen";
+ return NoMatch();
+}
+
Frag Compiler::EndRange() {
return rune_range_;
}
@@ -556,13 +683,13 @@ void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) {
}
void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
- // Latin1 is easy: runes *are* bytes.
+ // Latin-1 is easy: runes *are* bytes.
if (lo > hi || lo > 0xFF)
return;
if (hi > 0xFF)
hi = 0xFF;
- AddSuffix(RuneByteSuffix(static_cast<uint8>(lo), static_cast<uint8>(hi),
- foldcase, 0));
+ AddSuffix(UncachedRuneByteSuffix(static_cast<uint8>(lo),
+ static_cast<uint8>(hi), foldcase, 0));
}
// Table describing how to make a UTF-8 matching machine
@@ -633,8 +760,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
// ASCII range is always a special case.
if (hi < Runeself) {
- AddSuffix(RuneByteSuffix(static_cast<uint8>(lo), static_cast<uint8>(hi),
- foldcase, 0));
+ AddSuffix(UncachedRuneByteSuffix(static_cast<uint8>(lo),
+ static_cast<uint8>(hi), foldcase, 0));
return;
}
@@ -662,13 +789,49 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
(void)m; // USED(m)
DCHECK_EQ(n, m);
+ // The logic below encodes this thinking:
+ //
+ // 1. When we have built the whole suffix, we know that it cannot
+ // possibly be a suffix of anything longer: in forward mode, nothing
+ // else can occur before the leading byte; in reverse mode, nothing
+ // else can occur after the last continuation byte or else the leading
+ // byte would have to change. Thus, there is no benefit to caching
+ // the first byte of the suffix whereas there is a cost involved in
+ // cloning it if it begins a common prefix, which is fairly likely.
+ //
+ // 2. Conversely, the last byte of the suffix cannot possibly be a
+ // prefix of anything because next == 0, so we will never want to
+ // clone it, but it is fairly likely to be a common suffix. Perhaps
+ // more so in reverse mode than in forward mode because the former is
+ // "converging" towards lower entropy, but caching is still worthwhile
+ // for the latter in cases such as 80-BF.
+ //
+ // 3. Handling the bytes between the first and the last is less
+ // straightforward and, again, the approach depends on whether we are
+ // "converging" towards lower entropy: in forward mode, a single byte
+ // is unlikely to be part of a common suffix whereas a byte range
+ // is more likely so; in reverse mode, a byte range is unlikely to
+ // be part of a common suffix whereas a single byte is more likely
+ // so. The same benefit versus cost argument applies here.
int id = 0;
if (reversed_) {
- for (int i = 0; i < n; i++)
- id = RuneByteSuffix(ulo[i], uhi[i], false, id);
+ for (int i = 0; i < n; i++) {
+ // In reverse UTF-8 mode: cache the leading byte; don't cache the last
+ // continuation byte; cache anything else iff it's a single byte (XX-XX).
+ if (i == 0 || (ulo[i] == uhi[i] && i != n-1))
+ id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ else
+ id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ }
} else {
- for (int i = n-1; i >= 0; i--)
- id = RuneByteSuffix(ulo[i], uhi[i], false, id);
+ for (int i = n-1; i >= 0; i--) {
+ // In forward UTF-8 mode: don't cache the leading byte; cache the last
+ // continuation byte; cache anything else iff it's a byte range (XX-YY).
+ if (i == n-1 || (ulo[i] < uhi[i] && i != 0))
+ id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ else
+ id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ }
}
AddSuffix(id);
}
@@ -762,16 +925,16 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
}
case kRegexpStar:
- return Star(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
+ return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpPlus:
- return Plus(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
+ return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpQuest:
- return Quest(child_frags[0], re->parse_flags()&Regexp::NonGreedy);
+ return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpLiteral:
- return Literal(re->rune(), re->parse_flags()&Regexp::FoldCase);
+ return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0);
case kRegexpLiteralString: {
// Concatenation of literals.
@@ -779,7 +942,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
return Nop();
Frag f;
for (int i = 0; i < re->nrunes(); i++) {
- Frag f1 = Literal(re->runes()[i], re->parse_flags()&Regexp::FoldCase);
+ Frag f1 = Literal(re->runes()[i],
+ (re->parse_flags()&Regexp::FoldCase) != 0);
if (i == 0)
f = f1;
else
diff --git a/re2/dfa.cc b/re2/dfa.cc
index b102175..1f54b9f 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -94,7 +94,7 @@ class DFA {
// States, linked by the next_ pointers. If in state s and reading
// byte c, the next state should be s->next_[c].
struct State {
- inline bool IsMatch() const { return flag_ & kFlagMatch; }
+ inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
void SaveMatch(vector<int>* v);
int* inst_; // Instruction pointers in the state.
@@ -1015,7 +1015,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
// The state flag kFlagLastWord says whether the last
// byte processed was a word character. Use that info to
// insert empty-width (non-)word boundaries.
- bool islastword = state->flag_ & kFlagLastWord;
+ bool islastword = (state->flag_ & kFlagLastWord) != 0;
bool isword = (c != kByteEndText && Prog::IsWordChar(static_cast<uint8>(c)));
if (isword == islastword)
beforeflag |= kEmptyNonWordBoundary;
@@ -1901,9 +1901,9 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
// as the beginning.
if (match0) {
if (reversed_)
- *match0 = StringPiece(ep, text.end() - ep);
+ match0->set(ep, static_cast<int>(text.end() - ep));
else
- *match0 = StringPiece(text.begin(), ep - text.begin());
+ match0->set(text.begin(), static_cast<int>(ep - text.begin()));
}
return true;
}
@@ -1939,7 +1939,7 @@ int DFA::BuildAllStates() {
}
}
- return q.size();
+ return static_cast<int>(q.size());
}
// Build out all states in DFA for kind. Returns number of states.
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index 1ca46d4..5dd65d5 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc
@@ -33,7 +33,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
}
delete re;
} else {
- *id = re2_vec_.size();
+ *id = static_cast<int>(re2_vec_.size());
re2_vec_.push_back(re);
}
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
index 0f161e2..f4b2be4 100644
--- a/re2/filtered_re2.h
+++ b/re2/filtered_re2.h
@@ -76,7 +76,7 @@ class FilteredRE2 {
vector<int>* potential_regexps) const;
// The number of regexps added.
- int NumRegexps() const { return re2_vec_.size(); }
+ int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
private:
diff --git a/re2/nfa.cc b/re2/nfa.cc
index 3ca275e..bc8996c 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -608,7 +608,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
- submatch[i].set(match_[2*i], match_[2*i+1] - match_[2*i]);
+ submatch[i].set(match_[2*i],
+ static_cast<int>(match_[2*i+1] - match_[2*i]));
if (Debug)
fprintf(stderr, "match (%d,%d)\n",
static_cast<int>(match_[0] - btext_),
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 2404617..73acdc8 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -331,7 +331,8 @@ done:
if (!matched)
return false;
for (int i = 0; i < nmatch; i++)
- match[i].set(matchcap[2*i], matchcap[2*i+1] - matchcap[2*i]);
+ match[i].set(matchcap[2*i],
+ static_cast<int>(matchcap[2*i+1] - matchcap[2*i]));
return true;
}
diff --git a/re2/parse.cc b/re2/parse.cc
index 3c15bbd..f51e589 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -919,9 +919,14 @@ int Regexp::FactorAlternationRecursive(
}
n = out;
- // Round 2: Factor out common complex prefixes,
- // just the first piece of each concatenation,
- // whatever it is. This is good enough a lot of the time.
+ // Round 2: Factor out common simple prefixes,
+ // just the first piece of each concatenation.
+ // This will be good enough a lot of the time.
+ //
+ // Complex subexpressions (e.g. involving quantifiers)
+ // are not safe to factor because that collapses their
+ // distinct paths through the automaton, which affects
+ // correctness in some cases.
start = 0;
out = 0;
Regexp* first = NULL;
@@ -934,7 +939,25 @@ int Regexp::FactorAlternationRecursive(
Regexp* first_i = NULL;
if (i < n) {
first_i = LeadingRegexp(sub[i]);
- if (first != NULL && Regexp::Equal(first, first_i)) {
+ if (first != NULL && Regexp::Equal(first, first_i) &&
+ // first must be an empty-width op
+ // OR a char class, any char or any byte
+ // OR a fixed repeat of a literal, char class, any char or any byte.
+ (first->op() == kRegexpBeginLine ||
+ first->op() == kRegexpEndLine ||
+ first->op() == kRegexpWordBoundary ||
+ first->op() == kRegexpNoWordBoundary ||
+ first->op() == kRegexpBeginText ||
+ first->op() == kRegexpEndText ||
+ first->op() == kRegexpCharClass ||
+ first->op() == kRegexpAnyChar ||
+ first->op() == kRegexpAnyByte ||
+ (first->op() == kRegexpRepeat &&
+ first->min() == first->max() &&
+ (first->sub()[0]->op() == kRegexpLiteral ||
+ first->sub()[0]->op() == kRegexpCharClass ||
+ first->sub()[0]->op() == kRegexpAnyChar ||
+ first->sub()[0]->op() == kRegexpAnyByte)))) {
continue;
}
}
@@ -1427,7 +1450,8 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
BadEscape:
// Unrecognized escape sequence.
status->set_code(kRegexpBadEscape);
- status->set_error_arg(StringPiece(begin, s->data() - begin));
+ status->set_error_arg(
+ StringPiece(begin, static_cast<int>(s->data() - begin)));
return false;
}
@@ -1586,7 +1610,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
if (c != '{') {
// Name is the bit of string we just skipped over for c.
const char* p = seq.begin() + 2;
- name = StringPiece(p, s->begin() - p);
+ name = StringPiece(p, static_cast<int>(s->begin() - p));
} else {
// Name is in braces. Look for closing }
size_t end = s->find('}', 0);
@@ -1597,14 +1621,14 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
status->set_error_arg(seq);
return kParseError;
}
- name = StringPiece(s->begin(), end); // without '}'
- s->remove_prefix(end + 1); // with '}'
+ name = StringPiece(s->begin(), static_cast<int>(end)); // without '}'
+ s->remove_prefix(static_cast<int>(end) + 1); // with '}'
if (!IsValidUTF8(name, status))
return kParseError;
}
// Chop seq where s now begins.
- seq = StringPiece(seq.begin(), s->begin() - seq.begin());
+ seq = StringPiece(seq.begin(), static_cast<int>(s->begin() - seq.begin()));
// Look up group
if (name.size() > 0 && name[0] == '^') {
@@ -1645,7 +1669,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
// Got it. Check that it's valid.
q += 2;
- StringPiece name(p, q-p);
+ StringPiece name(p, static_cast<int>(q-p));
const UGroup *g = LookupPosixGroup(name);
if (g == NULL) {
@@ -1699,7 +1723,8 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
return false;
if (rr->hi < rr->lo) {
status->set_code(kRegexpBadCharRange);
- status->set_error_arg(StringPiece(os.data(), s->data() - os.data()));
+ status->set_error_arg(
+ StringPiece(os.data(), static_cast<int>(s->data() - os.data())));
return false;
}
} else {
@@ -1881,8 +1906,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
}
// t is "P<name>...", t[end] == '>'
- StringPiece capture(t.begin()-2, end+3); // "(?P<name>"
- StringPiece name(t.begin()+2, end-2); // "name"
+ StringPiece capture(t.begin()-2, static_cast<int>(end)+3); // "(?P<name>"
+ StringPiece name(t.begin()+2, static_cast<int>(end)-2); // "name"
if (!IsValidUTF8(name, status_))
return false;
if (!IsValidCaptureName(name)) {
@@ -1896,7 +1921,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
return false;
}
- s->remove_prefix(capture.end() - s->begin());
+ s->remove_prefix(static_cast<int>(capture.end() - s->begin()));
return true;
}
@@ -1979,7 +2004,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
BadPerlOp:
status_->set_code(kRegexpBadPerlOp);
- status_->set_error_arg(StringPiece(s->begin(), t.begin() - s->begin()));
+ status_->set_error_arg(
+ StringPiece(s->begin(), static_cast<int>(t.begin() - s->begin())));
return false;
}
@@ -2126,12 +2152,13 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// a** is a syntax error, not a double-star.
// (and a++ means something else entirely, which we don't support!)
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(lastunary.begin(),
- t.begin() - lastunary.begin()));
+ status->set_error_arg(
+ StringPiece(lastunary.begin(),
+ static_cast<int>(t.begin() - lastunary.begin())));
return NULL;
}
}
- opstr.set(opstr.data(), t.data() - opstr.data());
+ opstr.set(opstr.data(), static_cast<int>(t.data() - opstr.data()));
if (!ps.PushRepeatOp(op, opstr, nongreedy))
return NULL;
isunary = opstr;
@@ -2157,12 +2184,13 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (lastunary.size() > 0) {
// Not allowed to stack repetition operators.
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(lastunary.begin(),
- t.begin() - lastunary.begin()));
+ status->set_error_arg(
+ StringPiece(lastunary.begin(),
+ static_cast<int>(t.begin() - lastunary.begin())));
return NULL;
}
}
- opstr.set(opstr.data(), t.data() - opstr.data());
+ opstr.set(opstr.data(), static_cast<int>(t.data() - opstr.data()));
if (!ps.PushRepetition(lo, hi, opstr, nongreedy))
return NULL;
isunary = opstr;
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 4a25a43..45e43c9 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -503,7 +503,7 @@ Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
}
- bool latin1 = re->parse_flags() & Regexp::Latin1;
+ bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
Prefilter::Info::Walker w(latin1);
Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index 89e114c..be9b584 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -168,7 +168,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
for (size_t i = 0; i < prefilter_vec_.size(); i++) {
Prefilter* f = prefilter_vec_[i];
if (f == NULL)
- unfiltered_.push_back(i);
+ unfiltered_.push_back(static_cast<int>(i));
// We push NULL also on to v, so that we maintain the
// mapping of index==regexpid for level=0 prefilter nodes.
@@ -189,7 +189,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
// Identify unique nodes.
int unique_id = 0;
- for (int i = v.size() - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter *node = v[i];
if (node == NULL)
continue;
@@ -211,7 +211,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
entries_.resize(node_map_.size());
// Create parent StdIntMap for the entries.
- for (int i = v.size() - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
continue;
@@ -224,7 +224,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
}
// Fill the entries.
- for (int i = v.size() - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
continue;
@@ -263,8 +263,9 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
(*child_entry->parents)[prefilter->unique_id()] = 1;
}
}
- entry->propagate_up_at_count =
- prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
+ entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
+ ? static_cast<int>(uniq_child.size())
+ : 1;
break;
}
@@ -290,10 +291,10 @@ void PrefilterTree::RegexpsGivenStrings(
if (!compiled_) {
LOG(WARNING) << "Compile() not called";
for (size_t i = 0; i < prefilter_vec_.size(); ++i)
- regexps->push_back(i);
+ regexps->push_back(static_cast<int>(i));
} else {
if (!prefilter_vec_.empty()) {
- IntMap regexps_map(prefilter_vec_.size());
+ IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
vector<int> matched_atom_ids;
for (size_t j = 0; j < matched_atoms.size(); j++) {
matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
@@ -313,8 +314,8 @@ void PrefilterTree::RegexpsGivenStrings(
void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
IntMap* regexps) const {
- IntMap count(entries_.size());
- IntMap work(entries_.size());
+ IntMap count(static_cast<int>(entries_.size()));
+ IntMap work(static_cast<int>(entries_.size()));
for (size_t i = 0; i < atom_ids.size(); i++)
work.set(atom_ids[i], 1);
for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
diff --git a/re2/prog.h b/re2/prog.h
index 3e6be8f..8c5b2c4 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -96,7 +96,7 @@ class Prog {
void InitFail();
// Getters
- int id(Prog* p) { return this - p->inst_; }
+ int id(Prog* p) { return static_cast<int>(this - p->inst_); }
InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
int out() { return out_opcode_>>3; }
int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
diff --git a/re2/re2.cc b/re2/re2.cc
index 4a28b23..b3e582f 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -12,7 +12,6 @@
#include <stdio.h>
#include <string>
#include <errno.h>
-#include "util/atomicops.h"
#include "util/util.h"
#include "util/flags.h"
#include "util/sparse_array.h"
@@ -289,8 +288,19 @@ int RE2::ProgramFanout(map<int, int>* histogram) const {
return histogram->rbegin()->first;
}
+// Returns num_captures_, computing it if needed, or -1 if the
+// regexp wasn't valid on construction.
+int RE2::NumberOfCapturingGroups() const {
+ MutexLock l(mutex_);
+ if (suffix_regexp_ == NULL)
+ return -1;
+ if (num_captures_ == -1)
+ num_captures_ = suffix_regexp_->NumCaptures();
+ return num_captures_;
+}
+
// Returns named_groups_, computing it if needed.
-const map<string, int>& RE2::NamedCapturingGroups() const {
+const map<string, int>& RE2::NamedCapturingGroups() const {
MutexLock l(mutex_);
if (!ok())
return *empty_named_groups;
@@ -303,7 +313,7 @@ const map<string, int>& RE2::NamedCapturingGroups() const {
}
// Returns group_names_, computing it if needed.
-const map<int, string>& RE2::CapturingGroupNames() const {
+const map<int, string>& RE2::CapturingGroupNames() const {
MutexLock l(mutex_);
if (!ok())
return *empty_group_names;
@@ -375,7 +385,7 @@ bool RE2::Replace(string *str,
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
return false;
- if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
+ if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec))
return false;
string s;
@@ -402,7 +412,8 @@ int RE2::GlobalReplace(string *str,
string out;
int count = 0;
while (p <= ep) {
- if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
+ if (!re.Match(*str, static_cast<int>(p - str->data()),
+ static_cast<int>(str->size()), UNANCHORED, vec, nvec))
break;
if (p < vec[0].begin())
out.append(p, vec[0].begin() - p);
@@ -486,7 +497,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
if (prog_ == NULL)
return false;
- int n = prefix_.size();
+ int n = static_cast<int>(prefix_.size());
if (n > maxlen)
n = maxlen;
@@ -598,7 +609,7 @@ bool RE2::Match(const StringPiece& text,
if (!prefix_.empty()) {
if (startpos != 0)
return false;
- prefixlen = prefix_.size();
+ prefixlen = static_cast<int>(prefix_.size());
if (prefixlen > subtext.size())
return false;
if (prefix_foldcase_) {
@@ -839,8 +850,8 @@ bool RE2::DoMatch(const StringPiece& text,
return false;
}
- if(consumed != NULL)
- *consumed = vec[0].end() - text.begin();
+ if (consumed != NULL)
+ *consumed = static_cast<int>(vec[0].end() - text.begin());
if (n == 0 || args == NULL) {
// We are not interested in results
@@ -907,20 +918,6 @@ bool RE2::Rewrite(string *out, const StringPiece &rewrite,
return true;
}
-// Return the number of capturing subpatterns, or -1 if the
-// regexp wasn't valid on construction.
-int RE2::NumberOfCapturingGroups() const {
- if (suffix_regexp_ == NULL)
- return -1;
- int n;
- ATOMIC_LOAD_RELAXED(n, &num_captures_);
- if (n == -1) {
- n = suffix_regexp_->NumCaptures();
- ATOMIC_STORE_RELAXED(&num_captures_, n);
- }
- return n;
-}
-
// Checks that the rewrite string is well-formed with respect to this
// regular expression.
bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
diff --git a/re2/re2.h b/re2/re2.h
index bad75bb..5b955d0 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -447,7 +447,6 @@ class RE2 {
// does not count: if the regexp is "(a)(b)", returns 2.
int NumberOfCapturingGroups() const;
-
// Return a map from names to capturing indices.
// The map records the index of the leftmost group
// with the given name.
@@ -466,8 +465,8 @@ class RE2 {
// On a successful match, fills in match[] (up to nmatch entries)
// with information about submatches.
// I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true,
- // setting match[0] = "barbaz", match[1] = NULL, match[2] = "bar",
- // match[3] = NULL, ..., up to match[nmatch-1] = NULL.
+ // setting match[0] = "barbaz", match[1].data() = NULL, match[2] = "bar",
+ // match[3].data() = NULL, ..., up to match[nmatch-1].data() = NULL.
//
// Don't ask for more match information than you will use:
// runs much faster with nmatch == 1 than nmatch > 1, and
@@ -478,7 +477,7 @@ class RE2 {
// Passing text == StringPiece(NULL, 0) will be handled like any other
// empty string, but note that on return, it will not be possible to tell
// whether submatch i matched the empty string or did not match:
- // either way, match[i] == NULL.
+ // either way, match[i].data() == NULL.
bool Match(const StringPiece& text,
int startpos,
int endpos,
diff --git a/re2/regexp.cc b/re2/regexp.cc
index d3aa1f0..99e72e5 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -453,10 +453,11 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
continue;
}
- int n = stk.size();
+ size_t n = stk.size();
if (n == 0)
break;
+ DCHECK_GE(n, 2);
a = stk[n-2];
b = stk[n-1];
stk.resize(n-2);
@@ -677,7 +678,7 @@ bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {
}
break;
}
- *foldcase = (sub[i]->parse_flags() & FoldCase);
+ *foldcase = (sub[i]->parse_flags() & FoldCase) != 0;
i++;
// The rest.
diff --git a/re2/regexp.h b/re2/regexp.h
index b49ce0d..5f222b7 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -313,7 +313,7 @@ class Regexp {
// Get. No set, Regexps are logically immutable once created.
RegexpOp op() { return static_cast<RegexpOp>(op_); }
int nsub() { return nsub_; }
- bool simple() { return simple_; }
+ bool simple() { return simple_ != 0; }
enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
int Ref(); // For testing.
diff --git a/re2/set.cc b/re2/set.cc
index 5363ef7..a1a84ba 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -45,7 +45,7 @@ int RE2::Set::Add(const StringPiece& pattern, string* error) {
}
// Concatenate with match index and push on vector.
- int n = re_.size();
+ int n = static_cast<int>(re_.size());
re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
if (re->op() == kRegexpConcat) {
int nsub = re->nsub();
@@ -76,7 +76,7 @@ bool RE2::Set::Compile() {
Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
options_.ParseFlags());
re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(re_.data()),
- re_.size(), pf);
+ static_cast<int>(re_.size()), pf);
re_.clear();
re2::Regexp* sre = re->Simplify();
re->Decref();
diff --git a/re2/simplify.cc b/re2/simplify.cc
index d14483f..ecc60e7 100644
--- a/re2/simplify.cc
+++ b/re2/simplify.cc
@@ -61,7 +61,7 @@ bool Regexp::ComputeSimple() {
// These are simple as long as the subpieces are simple.
subs = sub();
for (int i = 0; i < nsub_; i++)
- if (!subs[i]->simple_)
+ if (!subs[i]->simple())
return false;
return true;
case kRegexpCharClass:
@@ -71,12 +71,12 @@ bool Regexp::ComputeSimple() {
return !cc_->empty() && !cc_->full();
case kRegexpCapture:
subs = sub();
- return subs[0]->simple_;
+ return subs[0]->simple();
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
subs = sub();
- if (!subs[0]->simple_)
+ if (!subs[0]->simple())
return false;
switch (subs[0]->op_) {
case kRegexpStar:
@@ -438,7 +438,7 @@ Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
}
Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
- if (re->simple_) {
+ if (re->simple()) {
*stop = true;
return re->Incref();
}
diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc
index c243527..00f478a 100644
--- a/re2/stringpiece.cc
+++ b/re2/stringpiece.cc
@@ -37,17 +37,19 @@ void StringPiece::AppendToString(string* target) const {
target->append(ptr_, length_);
}
-int StringPiece::copy(char* buf, size_type n, size_type pos) const {
- int ret = min(length_ - pos, n);
+StringPiece::size_type StringPiece::copy(char* buf, size_type n,
+ size_type pos) const {
+ size_type ret = min(length_ - pos, n);
memcpy(buf, ptr_ + pos, ret);
return ret;
}
bool StringPiece::contains(StringPiece s) const {
- return (size_t)find(s, 0) != npos;
+ return find(s, 0) != npos;
}
-int StringPiece::find(const StringPiece& s, size_type pos) const {
+StringPiece::size_type StringPiece::find(const StringPiece& s,
+ size_type pos) const {
if (length_ < 0 || pos > static_cast<size_type>(length_))
return npos;
@@ -57,7 +59,7 @@ int StringPiece::find(const StringPiece& s, size_type pos) const {
return xpos + s.length_ <= static_cast<size_type>(length_) ? xpos : npos;
}
-int StringPiece::find(char c, size_type pos) const {
+StringPiece::size_type StringPiece::find(char c, size_type pos) const {
if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
return npos;
}
@@ -65,9 +67,10 @@ int StringPiece::find(char c, size_type pos) const {
return result != ptr_ + length_ ? result - ptr_ : npos;
}
-int StringPiece::rfind(const StringPiece& s, size_type pos) const {
+StringPiece::size_type StringPiece::rfind(const StringPiece& s,
+ size_type pos) const {
if (length_ < s.length_) return npos;
- const size_t ulen = length_;
+ const size_type ulen = length_;
if (s.length_ == 0) return min(ulen, pos);
const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_;
@@ -75,9 +78,9 @@ int StringPiece::rfind(const StringPiece& s, size_type pos) const {
return result != last ? result - ptr_ : npos;
}
-int StringPiece::rfind(char c, size_type pos) const {
+StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
if (length_ <= 0) return npos;
- for (int i = min(pos, static_cast<size_type>(length_ - 1));
+ for (int i = static_cast<int>(min(pos, static_cast<size_type>(length_ - 1)));
i >= 0; --i) {
if (ptr_[i] == c) {
return i;
@@ -89,7 +92,7 @@ int StringPiece::rfind(char c, size_type pos) const {
StringPiece StringPiece::substr(size_type pos, size_type n) const {
if (pos > static_cast<size_type>(length_)) pos = static_cast<size_type>(length_);
if (n > length_ - pos) n = length_ - pos;
- return StringPiece(ptr_ + pos, n);
+ return StringPiece(ptr_ + pos, static_cast<int>(n));
}
const StringPiece::size_type StringPiece::npos = size_type(-1);
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
index bc8bf40..1479d1a 100644
--- a/re2/stringpiece.h
+++ b/re2/stringpiece.h
@@ -137,17 +137,17 @@ class StringPiece {
int max_size() const { return length_; }
int capacity() const { return length_; }
- int copy(char* buf, size_type n, size_type pos = 0) const;
+ size_type copy(char* buf, size_type n, size_type pos = 0) const;
bool contains(StringPiece s) const;
- int find(const StringPiece& s, size_type pos = 0) const;
- int find(char c, size_type pos = 0) const;
- int rfind(const StringPiece& s, size_type pos = npos) const;
- int rfind(char c, size_type pos = npos) const;
+ size_type find(const StringPiece& s, size_type pos = 0) const;
+ size_type find(char c, size_type pos = 0) const;
+ size_type rfind(const StringPiece& s, size_type pos = npos) const;
+ size_type rfind(char c, size_type pos = npos) const;
StringPiece substr(size_type pos, size_type n = npos) const;
-
+
static bool _equal(const StringPiece&, const StringPiece&);
};
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index b2dd6db..a872840 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -72,7 +72,7 @@ class Backtracker {
// Search state
const char* cap_[64]; // capture registers
uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked
- int nvisited_; // # of words in bitmap
+ size_t nvisited_; // # of words in bitmap
};
Backtracker::Backtracker(Prog* prog)
@@ -150,7 +150,7 @@ bool Backtracker::Visit(int id, const char* p) {
// either it didn't match or it did but we're hoping for a better match.
// Either way, don't go down that road again.
CHECK(p <= text_.end());
- int n = id*(text_.size()+1) + (p - text_.begin());
+ size_t n = id*(text_.size()+1) + (p - text_.begin());
CHECK_LT(n/32, nvisited_);
if (visited_[n/32] & (1 << (n&31)))
return false;
@@ -212,7 +212,8 @@ bool Backtracker::Visit(int id, const char* p) {
if (submatch_[0].data() == NULL || // First match so far ...
(longest_ && p > submatch_[0].end())) { // ... or better match
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
+ submatch_[i].set(cap_[2*i],
+ static_cast<int>(cap_[2*i+1] - cap_[2*i]));
}
return true;
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index d438b19..dee90a3 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -172,4 +172,90 @@ TEST(TestCompile, ByteRanges) {
re->Decref();
}
+static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
+ string* forward, string* reverse) {
+ Regexp* re = Regexp::Parse(pattern, flags, NULL);
+ EXPECT_TRUE(re != NULL);
+
+ if (forward != NULL) {
+ Prog* prog = re->CompileToProg(0);
+ EXPECT_TRUE(prog != NULL);
+ *forward = prog->Dump();
+ delete prog;
+ }
+
+ if (reverse != NULL) {
+ Prog* prog = re->CompileToReverseProg(0);
+ EXPECT_TRUE(prog != NULL);
+ *reverse = prog->Dump();
+ delete prog;
+ }
+
+ re->Decref();
+}
+
+TEST(TestCompile, Bug26705922) {
+ // Bug in the compiler caused inefficient bytecode to be generated for Unicode
+ // groups: common suffixes were cached, but common prefixes were not factored.
+
+ string forward, reverse;
+
+ Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse);
+ EXPECT_EQ("4. byte [f0-f0] -> 3\n"
+ "3. byte [90-90] -> 2\n"
+ "2. byte [80-80] -> 6\n"
+ "6. alt -> 1 | 5\n"
+ "1. byte [80-80] -> 7\n"
+ "5. byte [90-90] -> 7\n"
+ "7. match! 0\n",
+ forward);
+ EXPECT_EQ("6. alt -> 4 | 5\n"
+ "4. byte [80-80] -> 3\n"
+ "5. byte [90-90] -> 3\n"
+ "3. byte [80-80] -> 2\n"
+ "2. byte [90-90] -> 1\n"
+ "1. byte [f0-f0] -> 7\n"
+ "7. match! 0\n",
+ reverse);
+
+ Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse);
+ EXPECT_EQ("6. alt -> 3 | 5\n"
+ "3. byte [e8-ef] -> 2\n"
+ "5. byte [f0-f0] -> 4\n"
+ "2. byte [80-bf] -> 1\n"
+ "4. byte [90-90] -> 2\n"
+ "1. byte [80-bf] -> 7\n"
+ "7. match! 0\n",
+ forward);
+ EXPECT_EQ("3. byte [80-bf] -> 2\n"
+ "2. byte [80-bf] -> 6\n"
+ "6. alt -> 1 | 5\n"
+ "1. byte [e8-ef] -> 7\n"
+ "5. byte [90-90] -> 4\n"
+ "7. match! 0\n"
+ "4. byte [f0-f0] -> 7\n",
+ reverse);
+
+ Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
+ EXPECT_EQ("2. byte [80-bf] -> 8\n"
+ "8. alt -> 5 | 7\n"
+ "5. alt -> 1 | 4\n"
+ "7. byte [80-bf] -> 17\n"
+ "1. byte [c2-df] -> 18\n"
+ "4. byte [a0-bf] -> 3\n"
+ "17. alt -> 14 | 16\n"
+ "18. match! 0\n"
+ "3. byte [e0-e0] -> 18\n"
+ "14. alt -> 11 | 13\n"
+ "16. byte [80-8f] -> 15\n"
+ "11. alt -> 6 | 10\n"
+ "13. byte [80-bf] -> 12\n"
+ "15. byte [f4-f4] -> 18\n"
+ "6. byte [e1-ef] -> 18\n"
+ "10. byte [90-bf] -> 9\n"
+ "12. byte [f1-f3] -> 18\n"
+ "9. byte [f0-f0] -> 18\n",
+ reverse);
+}
+
} // namespace re2
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 6294d03..e9c7bef 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -10,6 +10,8 @@
#include "re2/testing/regexp_generator.h"
#include "re2/testing/string_generator.h"
+static const bool UsingMallocCounter = false;
+
DECLARE_bool(re2_dfa_bail_when_slow);
DEFINE_int32(size, 8, "log2(number of DFA nodes)");
@@ -92,14 +94,13 @@ TEST(SingleThreaded, BuildEntireDFA) {
s += "[ab]";
s += "b";
- //LOG(INFO) << s;
Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
CHECK(re);
int max = 24;
for (int i = 17; i < max; i++) {
- int limit = 1<<i;
- int usage;
- //int progusage, dfamem;
+ int64 limit = 1<<i;
+ int64 usage;
+ //int64 progusage, dfamem;
{
testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
Prog* prog = re->CompileToProg(limit);
@@ -113,8 +114,10 @@ TEST(SingleThreaded, BuildEntireDFA) {
}
if (!UsingMallocCounter)
continue;
- //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
- // limit, progusage, dfamem, usage);
+ //LOG(INFO) << "limit " << limit << ", "
+ // << "prog usage " << progusage << ", "
+ // << "DFA budget " << dfamem << ", "
+ // << "total " << usage;
// Tolerate +/- 10%.
CHECK_GT(usage, limit*9/10);
CHECK_LT(usage, limit*11/10);
@@ -133,7 +136,7 @@ TEST(SingleThreaded, BuildEntireDFA) {
// position in the input, never reusing any states until it gets to the
// end of the string. This is the worst possible case for DFA execution.
static string DeBruijnString(int n) {
- CHECK_LT(n, 8*sizeof(int));
+ CHECK_LT(n, static_cast<int>(8*sizeof(int)));
CHECK_GT(n, 0);
vector<bool> did(1<<n);
@@ -222,13 +225,13 @@ TEST(SingleThreaded, SearchDFA) {
peak_usage = m.PeakHeapGrowth();
delete prog;
}
- re->Decref();
-
if (!UsingMallocCounter)
return;
- //LOG(INFO) << "usage " << usage << " " << peak_usage;
+ //LOG(INFO) << "usage " << usage << ", "
+ // << "peak usage " << peak_usage;
CHECK_LT(usage, 1<<n);
CHECK_LT(peak_usage, 1<<n);
+ re->Decref();
}
// Helper thread: searches for match, which should match,
diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc
index c5fec5b..6dc5016 100644
--- a/re2/testing/exhaustive2_test.cc
+++ b/re2/testing/exhaustive2_test.cc
@@ -23,7 +23,7 @@ TEST(EmptyString, Exhaustive) {
TEST(Punctuation, Literals) {
vector<string> alphabet = Explode("()*+?{}[]\\^$.");
vector<string> escaped = alphabet;
- for (int i = 0; i < escaped.size(); i++)
+ for (size_t i = 0; i < escaped.size(); i++)
escaped[i] = "\\" + escaped[i];
ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
2, alphabet, "", "");
diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc
index 5613fcb..6e46bb4 100644
--- a/re2/testing/exhaustive3_test.cc
+++ b/re2/testing/exhaustive3_test.cc
@@ -84,7 +84,7 @@ TEST(InterestingUTF8, AB) {
"[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
vector<string> ops; // no ops
vector<string> alpha = InterestingUTF8();
- for (int i = 0; i < alpha.size(); i++)
+ for (size_t i = 0; i < alpha.size(); i++)
alpha[i] = "a" + alpha[i] + "b";
ExhaustiveTest(1, 0, atoms, ops,
1, alpha, "a%sb", "");
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
index f4376ee..76c1284 100644
--- a/re2/testing/filtered_re2_test.cc
+++ b/re2/testing/filtered_re2_test.cc
@@ -189,18 +189,16 @@ TEST(FilteredRE2Test, AtomTests) {
EXPECT_EQ(0, nfail);
}
-void FindAtomIndices(const vector<string> atoms,
- const vector<string> matched_atoms,
+void FindAtomIndices(const vector<string>& atoms,
+ const vector<string>& matched_atoms,
vector<int>* atom_indices) {
atom_indices->clear();
for (size_t i = 0; i < matched_atoms.size(); i++) {
- size_t j = 0;
- for (; j < atoms.size(); j++) {
+ for (size_t j = 0; j < atoms.size(); j++) {
if (matched_atoms[i] == atoms[j]) {
- atom_indices->push_back(j);
+ atom_indices->push_back(static_cast<int>(j));
break;
}
- EXPECT_LT(j, atoms.size());
}
}
}
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index 29a79b1..e204ce1 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -133,6 +133,9 @@ static Test tests[] = {
{ "\\Q+\\E+", "plus{lit{+}}" },
{ "\\Q\\\\E", "lit{\\}" },
{ "\\Q\\\\\\E", "str{\\\\}" },
+ { "\\Qa\\E*", "star{lit{a}}" },
+ { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" },
+ { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" },
// Test Perl \A and \z
{ "(?m)^", "bol{}" },
@@ -300,8 +303,8 @@ Test prefix_tests[] = {
{ "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
{ "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
{ "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
- { "(?:xx|yy)c|(?:xx|yy)d",
- "cat{alt{str{xx}str{yy}}cc{0x63-0x64}}" },
+ { ".c|.d", "cat{cc{0-0x9 0xb-0x10ffff}cc{0x63-0x64}}" },
+ { "\\Cc|\\Cd", "cat{byte{}cc{0x63-0x64}}" },
{ "x{2}|x{2}[0-9]",
"cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
{ "x{2}y|x{2}[0-9]y",
@@ -314,6 +317,10 @@ Test prefix_tests[] = {
"alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" },
{ "rs|r|n",
"alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" },
+ { "a\\C*?c|a\\C*?b",
+ "cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
+ { "^/a/bc|^/a/de",
+ "cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
};
// Test that prefix factoring works.
@@ -362,6 +369,7 @@ const char* badtests[] = {
"a{100000,}",
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
"(((x{7}){11}){13})",
+ "\\Q\\E*",
};
// Valid in Perl, bad in POSIX
diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc
index ad6c936..d843ffa 100644
--- a/re2/testing/re2_arg_test.cc
+++ b/re2/testing/re2_arg_test.cc
@@ -95,7 +95,7 @@ const int kNumStrings = arraysize(kSuccessTable);
for (int i = 0; i < kNumStrings; ++i) { \
RE2::Arg arg(&r); \
const char* const p = kSuccessTable[i].value_string; \
- bool retval = arg.Parse(p, strlen(p)); \
+ bool retval = arg.Parse(p, static_cast<int>(strlen(p))); \
bool success = kSuccessTable[i].success[column]; \
EXPECT_EQ(retval, success) \
<< "Parsing '" << p << "' for type " #type " should return " \
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index 78e0bc5..e201c1e 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -176,7 +176,7 @@ TEST(RE2, Replace) {
{ "", NULL, NULL, NULL, NULL, 0 }
};
- for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+ for (const ReplaceTest* t = tests; t->original != NULL; t++) {
VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
string one(t->original);
CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
@@ -369,12 +369,12 @@ TEST(RE2, Match) {
CHECK_EQ(port, 9000);
}
-static void TestRecursion(int size, const char *pattern) {
+static void TestRecursion(int size, const char* pattern) {
// Fill up a string repeating the pattern given
string domain;
domain.resize(size);
- int patlen = strlen(pattern);
- for (int i = 0; i < size; ++i) {
+ size_t patlen = strlen(pattern);
+ for (int i = 0; i < size; i++) {
domain[i] = pattern[i % patlen];
}
// Just make sure it doesn't crash due to too much recursion.
@@ -1410,12 +1410,56 @@ TEST(RE2, UnicodeClasses) {
// Bug reported by saito. 2009/02/17
TEST(RE2, NullVsEmptyString) {
- RE2 re2(".*");
- StringPiece v1("");
- EXPECT_TRUE(RE2::FullMatch(v1, re2));
+ RE2 re(".*");
+ EXPECT_TRUE(re.ok());
+
+ StringPiece null;
+ EXPECT_TRUE(RE2::FullMatch(null, re));
- StringPiece v2;
- EXPECT_TRUE(RE2::FullMatch(v2, re2));
+ StringPiece empty("");
+ EXPECT_TRUE(RE2::FullMatch(empty, re));
+}
+
+// Similar to the previous test, check that the null string and the empty
+// string both match, but also that the null string can only provide null
+// submatches whereas the empty string can also provide empty submatches.
+TEST(RE2, NullVsEmptyStringSubmatches) {
+ RE2 re("()|(foo)");
+ EXPECT_TRUE(re.ok());
+
+ // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
+ StringPiece matches[4];
+
+ for (int i = 0; i < arraysize(matches); i++)
+ matches[i] = "bar";
+
+ StringPiece null;
+ EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
+ matches, arraysize(matches)));
+ for (int i = 0; i < arraysize(matches); i++) {
+ EXPECT_TRUE(matches[i] == NULL);
+ EXPECT_TRUE(matches[i].data() == NULL); // always null
+ EXPECT_TRUE(matches[i] == "");
+ }
+
+ for (int i = 0; i < arraysize(matches); i++)
+ matches[i] = "bar";
+
+ StringPiece empty("");
+ EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
+ matches, arraysize(matches)));
+ EXPECT_TRUE(matches[0] == NULL);
+ EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
+ EXPECT_TRUE(matches[0] == "");
+ EXPECT_TRUE(matches[1] == NULL);
+ EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
+ EXPECT_TRUE(matches[1] == "");
+ EXPECT_TRUE(matches[2] == NULL);
+ EXPECT_TRUE(matches[2].data() == NULL);
+ EXPECT_TRUE(matches[2] == "");
+ EXPECT_TRUE(matches[3] == NULL);
+ EXPECT_TRUE(matches[3].data() == NULL);
+ EXPECT_TRUE(matches[3] == "");
}
// Issue 1816809
@@ -1529,4 +1573,23 @@ TEST(RE2, Bug21371806) {
CHECK(re.ok());
}
+TEST(RE2, Bug26356109) {
+ // Bug in parser caused by factoring of common prefixes in alternations.
+
+ // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
+ // consume "ab" and then stop (when unanchored) whereas it should consume all
+ // of "abc" as per first-match semantics.
+ RE2 re("a\\C*?c|a\\C*?b");
+ CHECK(re.ok());
+
+ string s = "abc";
+ StringPiece m;
+
+ CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+ CHECK_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
+
+ CHECK(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
+ CHECK_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
+}
+
} // namespace re2
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index bc8daaa..141bad1 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -265,6 +265,7 @@ BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPU
BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
// Benchmark: FindAndConsume
+
void FindAndConsume(int iters, int nbytes) {
StopBenchmarkTiming();
string s;
@@ -286,9 +287,11 @@ BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
// Benchmark: successful anchored search.
void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
+ StopBenchmarkTiming();
string s;
MakeText(&s, nbytes);
BenchmarkMemoryUsage();
+ StartBenchmarkTiming();
search(iters, regexp, s, Prog::kAnchored, true);
SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
}
@@ -346,11 +349,9 @@ BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCP
// Benchmark: use regexp to find phone number.
void SearchDigits(int iters, SearchImpl* search) {
- const char *text = "650-253-0001";
- int len = strlen(text);
+ StringPiece s("650-253-0001");
BenchmarkMemoryUsage();
- search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
- StringPiece(text, len), Prog::kAnchored, true);
+ search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
SetBenchmarkItemsProcessed(iters);
}
@@ -688,7 +689,6 @@ BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
-
// Makes text of size nbytes, then calls run to search
// the text for regexp iters times.
void SearchPhone(int iters, int nbytes, ParseImpl* search) {
@@ -1344,14 +1344,14 @@ BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
#endif
BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
-static string http_smalltext =
+static string smallhttp_text =
"GET /abc HTTP/1.1";
void SmallHTTPPartialMatchPCRE(int n) {
StringPiece a;
PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (int i = 0; i < n; i++) {
- PCRE::PartialMatch(http_text, re, &a);
+ PCRE::PartialMatch(smallhttp_text, re, &a);
}
}
@@ -1359,7 +1359,7 @@ void SmallHTTPPartialMatchRE2(int n) {
StringPiece a;
RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (int i = 0; i < n; i++) {
- RE2::PartialMatch(http_text, re, &a);
+ RE2::PartialMatch(smallhttp_text, re, &a);
}
}
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index d10b9a8..fd085db 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -134,7 +134,7 @@ void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk,
// Generates a random postfix command sequence.
// Stops and returns true once a single sequence has been generated.
-bool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
+bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk,
int ops, int atoms) {
for (;;) {
// Stop if we get to a single element, but only sometimes.
@@ -151,7 +151,7 @@ bool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
// Add operators if there are enough arguments.
if (ops < maxops_ && acm_->Uniform(2) == 0) {
- const string& fmt = ops_[acm_->Uniform(ops_.size())];
+ const string& fmt = ops_[acm_->Uniform(static_cast<int32>(ops_.size()))];
int nargs = CountArgs(fmt);
if (nargs <= nstk) {
post->push_back(fmt);
@@ -165,7 +165,7 @@ bool RegexpGenerator::GenerateRandomPostfix(vector<string> *post, int nstk,
// Add atoms if there is room.
if (atoms < maxatoms_ && acm_->Uniform(2) == 0) {
- post->push_back(atoms_[acm_->Uniform(atoms_.size())]);
+ post->push_back(atoms_[acm_->Uniform(static_cast<int32>(atoms_.size()))]);
bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
post->pop_back();
if (ret)
diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc
index a0e7f0b..31c76a3 100644
--- a/re2/testing/regexp_test.cc
+++ b/re2/testing/regexp_test.cc
@@ -32,7 +32,8 @@ TEST(Regexp, BigConcat) {
for (size_t i = 0; i < v.size(); i++)
x->Incref();
CHECK_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref();
- Regexp* re = Regexp::Concat(&v[0], v.size(), Regexp::NoParseFlags);
+ Regexp* re = Regexp::Concat(v.data(), static_cast<int>(v.size()),
+ Regexp::NoParseFlags);
CHECK_EQ(re->ToString(), string(v.size(), 'x'));
re->Decref();
CHECK_EQ(x->Ref(), 1) << x->Ref();
diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
index 728ce17..f96ff20 100644
--- a/re2/testing/string_generator.cc
+++ b/re2/testing/string_generator.cc
@@ -43,14 +43,14 @@ void StringGenerator::Reset() {
// Returns false if all the numbers have been used.
bool StringGenerator::IncrementDigits() {
// First try to increment the current number.
- for (int i = digits_.size() - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(digits_.size()) - 1; i >= 0; i--) {
if (++digits_[i] < static_cast<int>(alphabet_.size()))
return true;
digits_[i] = 0;
}
// If that failed, make a longer number.
- if (digits_.size() < static_cast<size_t>(maxlen_)) {
+ if (static_cast<int>(digits_.size()) < maxlen_) {
digits_.push_back(0);
return true;
}
@@ -68,7 +68,7 @@ bool StringGenerator::RandomDigits() {
int len = acm_->Uniform(maxlen_+1);
digits_.resize(len);
for (int i = 0; i < len; i++)
- digits_[i] = acm_->Uniform(alphabet_.size());
+ digits_[i] = acm_->Uniform(static_cast<int32>(alphabet_.size()));
return true;
}
@@ -110,4 +110,3 @@ void StringGenerator::GenerateNULL() {
}
} // namespace re2
-
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index 20b12e7..cb12bad 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -392,10 +392,13 @@ void TestInstance::RunSearch(Engine type,
if (kind_ == Prog::kFullMatch)
re_anchor = RE2::ANCHOR_BOTH;
- result->matched = re2_->Match(context,
- text.begin() - context.begin(),
- text.end() - context.begin(),
- re_anchor, result->submatch, nsubmatch);
+ result->matched = re2_->Match(
+ context,
+ static_cast<int>(text.begin() - context.begin()),
+ static_cast<int>(text.end() - context.begin()),
+ re_anchor,
+ result->submatch,
+ nsubmatch);
result->have_submatch = nsubmatch > 0;
break;
}
diff --git a/re2/tostring.cc b/re2/tostring.cc
index c59d4d9..0230c8c 100644
--- a/re2/tostring.cc
+++ b/re2/tostring.cc
@@ -156,12 +156,14 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
break;
case kRegexpLiteral:
- AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
+ AppendLiteral(t_, re->rune(),
+ (re->parse_flags() & Regexp::FoldCase) != 0);
break;
case kRegexpLiteralString:
for (int i = 0; i < re->nrunes(); i++)
- AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
+ AppendLiteral(t_, re->runes()[i],
+ (re->parse_flags() & Regexp::FoldCase) != 0);
if (prec < PrecConcat)
t_->append(")");
break;
diff --git a/re2/unicode.py b/re2/unicode.py
index 6dfe87b..4b2240c 100644
--- a/re2/unicode.py
+++ b/re2/unicode.py
@@ -9,7 +9,7 @@ import re
import urllib2
# Directory or URL where Unicode tables reside.
-_UNICODE_DIR = "http://www.unicode.org/Public/6.3.0/ucd"
+_UNICODE_DIR = "http://www.unicode.org/Public/8.0.0/ucd"
# Largest valid Unicode code value.
_RUNE_MAX = 0x10FFFF
diff --git a/re2/unicode_casefold.cc b/re2/unicode_casefold.cc
index 2293cc7..91a96b4 100644
--- a/re2/unicode_casefold.cc
+++ b/re2/unicode_casefold.cc
@@ -7,7 +7,7 @@
namespace re2 {
-// 1034 groups, 2089 pairs, 289 ranges
+// 1224 groups, 2469 pairs, 314 ranges
const CaseFold unicode_casefold[] = {
{ 65, 90, 32 },
{ 97, 106, -32 },
@@ -105,13 +105,16 @@ const CaseFold unicode_casefold[] = {
{ 598, 599, -205 },
{ 601, 601, -202 },
{ 603, 603, -203 },
+ { 604, 604, 42319 },
{ 608, 608, -205 },
+ { 609, 609, 42315 },
{ 611, 611, -207 },
{ 613, 613, 42280 },
{ 614, 614, 42308 },
{ 616, 616, -209 },
{ 617, 617, -211 },
{ 619, 619, 10743 },
+ { 620, 620, 42305 },
{ 623, 623, -211 },
{ 625, 625, 10749 },
{ 626, 626, -213 },
@@ -119,15 +122,19 @@ const CaseFold unicode_casefold[] = {
{ 637, 637, 10727 },
{ 640, 640, -218 },
{ 643, 643, -218 },
+ { 647, 647, 42282 },
{ 648, 648, -218 },
{ 649, 649, -69 },
{ 650, 651, -217 },
{ 652, 652, -71 },
{ 658, 658, -219 },
+ { 669, 669, 42261 },
+ { 670, 670, 42258 },
{ 837, 837, 84 },
{ 880, 883, EvenOdd },
{ 886, 887, EvenOdd },
{ 891, 893, 130 },
+ { 895, 895, 116 },
{ 902, 902, 38 },
{ 904, 906, 37 },
{ 908, 908, 64 },
@@ -168,6 +175,7 @@ const CaseFold unicode_casefold[] = {
{ 1008, 1008, -86 },
{ 1009, 1009, -80 },
{ 1010, 1010, 7 },
+ { 1011, 1011, -116 },
{ 1012, 1012, -92 },
{ 1013, 1013, -96 },
{ 1015, 1016, OddEven },
@@ -183,12 +191,15 @@ const CaseFold unicode_casefold[] = {
{ 1216, 1216, 15 },
{ 1217, 1230, OddEven },
{ 1231, 1231, -15 },
- { 1232, 1319, EvenOdd },
+ { 1232, 1327, EvenOdd },
{ 1329, 1366, 48 },
{ 1377, 1414, -48 },
{ 4256, 4293, 7264 },
{ 4295, 4295, 7264 },
{ 4301, 4301, 7264 },
+ { 5024, 5103, 38864 },
+ { 5104, 5109, 8 },
+ { 5112, 5117, -8 },
{ 7545, 7545, 35332 },
{ 7549, 7549, 3814 },
{ 7680, 7776, EvenOdd },
@@ -283,7 +294,7 @@ const CaseFold unicode_casefold[] = {
{ 11559, 11559, -7264 },
{ 11565, 11565, -7264 },
{ 42560, 42605, EvenOdd },
- { 42624, 42647, EvenOdd },
+ { 42624, 42651, EvenOdd },
{ 42786, 42799, EvenOdd },
{ 42802, 42863, EvenOdd },
{ 42873, 42876, OddEven },
@@ -292,16 +303,30 @@ const CaseFold unicode_casefold[] = {
{ 42891, 42892, OddEven },
{ 42893, 42893, -42280 },
{ 42896, 42899, EvenOdd },
- { 42912, 42921, EvenOdd },
+ { 42902, 42921, EvenOdd },
{ 42922, 42922, -42308 },
+ { 42923, 42923, -42319 },
+ { 42924, 42924, -42315 },
+ { 42925, 42925, -42305 },
+ { 42928, 42928, -42258 },
+ { 42929, 42929, -42282 },
+ { 42930, 42930, -42261 },
+ { 42931, 42931, 928 },
+ { 42932, 42935, EvenOdd },
+ { 43859, 43859, -928 },
+ { 43888, 43967, -38864 },
{ 65313, 65338, 32 },
{ 65345, 65370, -32 },
{ 66560, 66599, 40 },
{ 66600, 66639, -40 },
+ { 68736, 68786, 64 },
+ { 68800, 68850, -64 },
+ { 71840, 71871, 32 },
+ { 71872, 71903, -32 },
};
-const int num_unicode_casefold = 289;
+const int num_unicode_casefold = 314;
-// 1034 groups, 1055 pairs, 167 ranges
+// 1224 groups, 1245 pairs, 180 ranges
const CaseFold unicode_tolower[] = {
{ 65, 90, 32 },
{ 181, 181, 775 },
@@ -370,6 +395,7 @@ const CaseFold unicode_tolower[] = {
{ 837, 837, 116 },
{ 880, 882, EvenOddSkip },
{ 886, 886, EvenOdd },
+ { 895, 895, 116 },
{ 902, 902, 38 },
{ 904, 906, 37 },
{ 908, 908, 64 },
@@ -397,11 +423,12 @@ const CaseFold unicode_tolower[] = {
{ 1162, 1214, EvenOddSkip },
{ 1216, 1216, 15 },
{ 1217, 1229, OddEvenSkip },
- { 1232, 1318, EvenOddSkip },
+ { 1232, 1326, EvenOddSkip },
{ 1329, 1366, 48 },
{ 4256, 4293, 7264 },
{ 4295, 4295, 7264 },
{ 4301, 4301, 7264 },
+ { 5112, 5117, -8 },
{ 7680, 7828, EvenOddSkip },
{ 7835, 7835, -58 },
{ 7838, 7838, -7615 },
@@ -457,7 +484,7 @@ const CaseFold unicode_tolower[] = {
{ 11499, 11501, OddEvenSkip },
{ 11506, 11506, EvenOdd },
{ 42560, 42604, EvenOddSkip },
- { 42624, 42646, EvenOddSkip },
+ { 42624, 42650, EvenOddSkip },
{ 42786, 42798, EvenOddSkip },
{ 42802, 42862, EvenOddSkip },
{ 42873, 42875, OddEvenSkip },
@@ -466,12 +493,23 @@ const CaseFold unicode_tolower[] = {
{ 42891, 42891, OddEven },
{ 42893, 42893, -42280 },
{ 42896, 42898, EvenOddSkip },
- { 42912, 42920, EvenOddSkip },
+ { 42902, 42920, EvenOddSkip },
{ 42922, 42922, -42308 },
+ { 42923, 42923, -42319 },
+ { 42924, 42924, -42315 },
+ { 42925, 42925, -42305 },
+ { 42928, 42928, -42258 },
+ { 42929, 42929, -42282 },
+ { 42930, 42930, -42261 },
+ { 42931, 42931, 928 },
+ { 42932, 42934, EvenOddSkip },
+ { 43888, 43967, -38864 },
{ 65313, 65338, 32 },
{ 66560, 66599, 40 },
+ { 68736, 68786, 64 },
+ { 71840, 71871, 32 },
};
-const int num_unicode_tolower = 167;
+const int num_unicode_tolower = 180;
diff --git a/re2/unicode_groups.cc b/re2/unicode_groups.cc
index 0df585e..59087bc 100644
--- a/re2/unicode_groups.cc
+++ b/re2/unicode_groups.cc
@@ -53,6 +53,7 @@ static const URange16 Ps_range16[] = {
{ 11812, 11812 },
{ 11814, 11814 },
{ 11816, 11816 },
+ { 11842, 11842 },
{ 12296, 12296 },
{ 12298, 12298 },
{ 12300, 12300 },
@@ -63,7 +64,7 @@ static const URange16 Ps_range16[] = {
{ 12312, 12312 },
{ 12314, 12314 },
{ 12317, 12317 },
- { 64830, 64830 },
+ { 64831, 64831 },
{ 65047, 65047 },
{ 65077, 65077 },
{ 65079, 65079 },
@@ -97,7 +98,7 @@ static const URange32 Nl_range32[] = {
{ 66369, 66369 },
{ 66378, 66378 },
{ 66513, 66517 },
- { 74752, 74850 },
+ { 74752, 74862 },
};
static const URange16 No_range16[] = {
{ 178, 179 },
@@ -132,18 +133,34 @@ static const URange16 No_range16[] = {
static const URange32 No_range32[] = {
{ 65799, 65843 },
{ 65909, 65912 },
- { 65930, 65930 },
+ { 65930, 65931 },
+ { 66273, 66299 },
{ 66336, 66339 },
{ 67672, 67679 },
+ { 67705, 67711 },
+ { 67751, 67759 },
+ { 67835, 67839 },
{ 67862, 67867 },
+ { 68028, 68029 },
+ { 68032, 68047 },
+ { 68050, 68095 },
{ 68160, 68167 },
{ 68221, 68222 },
+ { 68253, 68255 },
+ { 68331, 68335 },
{ 68440, 68447 },
{ 68472, 68479 },
+ { 68521, 68527 },
+ { 68858, 68863 },
{ 69216, 69246 },
{ 69714, 69733 },
+ { 70113, 70132 },
+ { 71482, 71483 },
+ { 71914, 71922 },
+ { 93019, 93025 },
{ 119648, 119665 },
- { 127232, 127242 },
+ { 125127, 125135 },
+ { 127232, 127244 },
};
static const URange16 Lo_range16[] = {
{ 170, 170 },
@@ -168,14 +185,12 @@ static const URange16 Lo_range16[] = {
{ 1994, 2026 },
{ 2048, 2069 },
{ 2112, 2136 },
- { 2208, 2208 },
- { 2210, 2220 },
+ { 2208, 2228 },
{ 2308, 2361 },
{ 2365, 2365 },
{ 2384, 2384 },
{ 2392, 2401 },
- { 2418, 2423 },
- { 2425, 2431 },
+ { 2418, 2432 },
{ 2437, 2444 },
{ 2447, 2448 },
{ 2451, 2472 },
@@ -206,6 +221,7 @@ static const URange16 Lo_range16[] = {
{ 2749, 2749 },
{ 2768, 2768 },
{ 2784, 2785 },
+ { 2809, 2809 },
{ 2821, 2828 },
{ 2831, 2832 },
{ 2835, 2856 },
@@ -230,10 +246,9 @@ static const URange16 Lo_range16[] = {
{ 3077, 3084 },
{ 3086, 3088 },
{ 3090, 3112 },
- { 3114, 3123 },
- { 3125, 3129 },
+ { 3114, 3129 },
{ 3133, 3133 },
- { 3160, 3161 },
+ { 3160, 3162 },
{ 3168, 3169 },
{ 3205, 3212 },
{ 3214, 3216 },
@@ -249,7 +264,7 @@ static const URange16 Lo_range16[] = {
{ 3346, 3386 },
{ 3389, 3389 },
{ 3406, 3406 },
- { 3424, 3425 },
+ { 3423, 3425 },
{ 3450, 3455 },
{ 3461, 3478 },
{ 3482, 3505 },
@@ -306,11 +321,11 @@ static const URange16 Lo_range16[] = {
{ 4882, 4885 },
{ 4888, 4954 },
{ 4992, 5007 },
- { 5024, 5108 },
{ 5121, 5740 },
{ 5743, 5759 },
{ 5761, 5786 },
{ 5792, 5866 },
+ { 5873, 5880 },
{ 5888, 5900 },
{ 5902, 5905 },
{ 5920, 5937 },
@@ -324,11 +339,11 @@ static const URange16 Lo_range16[] = {
{ 6272, 6312 },
{ 6314, 6314 },
{ 6320, 6389 },
- { 6400, 6428 },
+ { 6400, 6430 },
{ 6480, 6509 },
{ 6512, 6516 },
{ 6528, 6571 },
- { 6593, 6599 },
+ { 6576, 6601 },
{ 6656, 6678 },
{ 6688, 6740 },
{ 6917, 6963 },
@@ -364,7 +379,7 @@ static const URange16 Lo_range16[] = {
{ 12704, 12730 },
{ 12784, 12799 },
{ 13312, 19893 },
- { 19968, 40908 },
+ { 19968, 40917 },
{ 40960, 40980 },
{ 40982, 42124 },
{ 42192, 42231 },
@@ -373,6 +388,8 @@ static const URange16 Lo_range16[] = {
{ 42538, 42539 },
{ 42606, 42606 },
{ 42656, 42725 },
+ { 42895, 42895 },
+ { 42999, 42999 },
{ 43003, 43009 },
{ 43011, 43013 },
{ 43015, 43018 },
@@ -381,17 +398,21 @@ static const URange16 Lo_range16[] = {
{ 43138, 43187 },
{ 43250, 43255 },
{ 43259, 43259 },
+ { 43261, 43261 },
{ 43274, 43301 },
{ 43312, 43334 },
{ 43360, 43388 },
{ 43396, 43442 },
+ { 43488, 43492 },
+ { 43495, 43503 },
+ { 43514, 43518 },
{ 43520, 43560 },
{ 43584, 43586 },
{ 43588, 43595 },
{ 43616, 43631 },
{ 43633, 43638 },
{ 43642, 43642 },
- { 43648, 43695 },
+ { 43646, 43695 },
{ 43697, 43697 },
{ 43701, 43702 },
{ 43705, 43709 },
@@ -443,19 +464,29 @@ static const URange32 Lo_range32[] = {
{ 65664, 65786 },
{ 66176, 66204 },
{ 66208, 66256 },
- { 66304, 66334 },
+ { 66304, 66335 },
{ 66352, 66368 },
{ 66370, 66377 },
+ { 66384, 66421 },
{ 66432, 66461 },
{ 66464, 66499 },
{ 66504, 66511 },
{ 66640, 66717 },
+ { 66816, 66855 },
+ { 66864, 66915 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
{ 67584, 67589 },
{ 67592, 67592 },
{ 67594, 67637 },
{ 67639, 67640 },
{ 67644, 67644 },
{ 67647, 67669 },
+ { 67680, 67702 },
+ { 67712, 67742 },
+ { 67808, 67826 },
+ { 67828, 67829 },
{ 67840, 67861 },
{ 67872, 67897 },
{ 67968, 68023 },
@@ -465,23 +496,70 @@ static const URange32 Lo_range32[] = {
{ 68117, 68119 },
{ 68121, 68147 },
{ 68192, 68220 },
+ { 68224, 68252 },
+ { 68288, 68295 },
+ { 68297, 68324 },
{ 68352, 68405 },
{ 68416, 68437 },
{ 68448, 68466 },
+ { 68480, 68497 },
{ 68608, 68680 },
{ 69635, 69687 },
{ 69763, 69807 },
{ 69840, 69864 },
{ 69891, 69926 },
+ { 69968, 70002 },
+ { 70006, 70006 },
{ 70019, 70066 },
{ 70081, 70084 },
+ { 70106, 70106 },
+ { 70108, 70108 },
+ { 70144, 70161 },
+ { 70163, 70187 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70312 },
+ { 70320, 70366 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70461, 70461 },
+ { 70480, 70480 },
+ { 70493, 70497 },
+ { 70784, 70831 },
+ { 70852, 70853 },
+ { 70855, 70855 },
+ { 71040, 71086 },
+ { 71128, 71131 },
+ { 71168, 71215 },
+ { 71236, 71236 },
{ 71296, 71338 },
- { 73728, 74606 },
+ { 71424, 71449 },
+ { 71935, 71935 },
+ { 72384, 72440 },
+ { 73728, 74649 },
+ { 74880, 75075 },
{ 77824, 78894 },
+ { 82944, 83526 },
{ 92160, 92728 },
+ { 92736, 92766 },
+ { 92880, 92909 },
+ { 92928, 92975 },
+ { 93027, 93047 },
+ { 93053, 93071 },
{ 93952, 94020 },
{ 94032, 94032 },
{ 110592, 110593 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
+ { 124928, 125124 },
{ 126464, 126467 },
{ 126469, 126495 },
{ 126497, 126498 },
@@ -518,6 +596,7 @@ static const URange32 Lo_range32[] = {
{ 131072, 173782 },
{ 173824, 177972 },
{ 177984, 178205 },
+ { 178208, 183969 },
{ 194560, 195101 },
};
static const URange16 Ll_range16[] = {
@@ -786,7 +865,12 @@ static const URange16 Ll_range16[] = {
{ 1315, 1315 },
{ 1317, 1317 },
{ 1319, 1319 },
+ { 1321, 1321 },
+ { 1323, 1323 },
+ { 1325, 1325 },
+ { 1327, 1327 },
{ 1377, 1415 },
+ { 5112, 5117 },
{ 7424, 7467 },
{ 7531, 7543 },
{ 7545, 7578 },
@@ -1044,6 +1128,8 @@ static const URange16 Ll_range16[] = {
{ 42643, 42643 },
{ 42645, 42645 },
{ 42647, 42647 },
+ { 42649, 42649 },
+ { 42651, 42651 },
{ 42787, 42787 },
{ 42789, 42789 },
{ 42791, 42791 },
@@ -1093,19 +1179,31 @@ static const URange16 Ll_range16[] = {
{ 42892, 42892 },
{ 42894, 42894 },
{ 42897, 42897 },
- { 42899, 42899 },
+ { 42899, 42901 },
+ { 42903, 42903 },
+ { 42905, 42905 },
+ { 42907, 42907 },
+ { 42909, 42909 },
+ { 42911, 42911 },
{ 42913, 42913 },
{ 42915, 42915 },
{ 42917, 42917 },
{ 42919, 42919 },
{ 42921, 42921 },
+ { 42933, 42933 },
+ { 42935, 42935 },
{ 43002, 43002 },
+ { 43824, 43866 },
+ { 43872, 43877 },
+ { 43888, 43967 },
{ 64256, 64262 },
{ 64275, 64279 },
{ 65345, 65370 },
};
static const URange32 Ll_range32[] = {
{ 66600, 66639 },
+ { 68800, 68850 },
+ { 71872, 71903 },
{ 119834, 119859 },
{ 119886, 119892 },
{ 119894, 119911 },
@@ -1177,18 +1275,22 @@ static const URange16 Lm_range16[] = {
{ 42232, 42237 },
{ 42508, 42508 },
{ 42623, 42623 },
+ { 42652, 42653 },
{ 42775, 42783 },
{ 42864, 42864 },
{ 42888, 42888 },
{ 43000, 43001 },
{ 43471, 43471 },
+ { 43494, 43494 },
{ 43632, 43632 },
{ 43741, 43741 },
{ 43763, 43764 },
+ { 43868, 43871 },
{ 65392, 65392 },
{ 65438, 65439 },
};
static const URange32 Lm_range32[] = {
+ { 92992, 92995 },
{ 94099, 94111 },
};
static const URange16 Nd_range16[] = {
@@ -1205,6 +1307,7 @@ static const URange16 Nd_range16[] = {
{ 3174, 3183 },
{ 3302, 3311 },
{ 3430, 3439 },
+ { 3558, 3567 },
{ 3664, 3673 },
{ 3792, 3801 },
{ 3872, 3881 },
@@ -1224,6 +1327,7 @@ static const URange16 Nd_range16[] = {
{ 43216, 43225 },
{ 43264, 43273 },
{ 43472, 43481 },
+ { 43504, 43513 },
{ 43600, 43609 },
{ 44016, 44025 },
{ 65296, 65305 },
@@ -1234,7 +1338,14 @@ static const URange32 Nd_range32[] = {
{ 69872, 69881 },
{ 69942, 69951 },
{ 70096, 70105 },
+ { 70384, 70393 },
+ { 70864, 70873 },
+ { 71248, 71257 },
{ 71360, 71369 },
+ { 71472, 71481 },
+ { 71904, 71913 },
+ { 92768, 92777 },
+ { 93008, 93017 },
{ 120782, 120831 },
};
static const URange16 Pc_range16[] = {
@@ -1405,6 +1516,7 @@ static const URange16 Lu_range16[] = {
{ 880, 880 },
{ 882, 882 },
{ 886, 886 },
+ { 895, 895 },
{ 902, 902 },
{ 904, 906 },
{ 908, 908 },
@@ -1524,10 +1636,15 @@ static const URange16 Lu_range16[] = {
{ 1314, 1314 },
{ 1316, 1316 },
{ 1318, 1318 },
+ { 1320, 1320 },
+ { 1322, 1322 },
+ { 1324, 1324 },
+ { 1326, 1326 },
{ 1329, 1366 },
{ 4256, 4293 },
{ 4295, 4295 },
{ 4301, 4301 },
+ { 5024, 5109 },
{ 7680, 7680 },
{ 7682, 7682 },
{ 7684, 7684 },
@@ -1778,6 +1895,8 @@ static const URange16 Lu_range16[] = {
{ 42642, 42642 },
{ 42644, 42644 },
{ 42646, 42646 },
+ { 42648, 42648 },
+ { 42650, 42650 },
{ 42786, 42786 },
{ 42788, 42788 },
{ 42790, 42790 },
@@ -1827,16 +1946,25 @@ static const URange16 Lu_range16[] = {
{ 42893, 42893 },
{ 42896, 42896 },
{ 42898, 42898 },
+ { 42902, 42902 },
+ { 42904, 42904 },
+ { 42906, 42906 },
+ { 42908, 42908 },
+ { 42910, 42910 },
{ 42912, 42912 },
{ 42914, 42914 },
{ 42916, 42916 },
{ 42918, 42918 },
{ 42920, 42920 },
- { 42922, 42922 },
+ { 42922, 42925 },
+ { 42928, 42932 },
+ { 42934, 42934 },
{ 65313, 65338 },
};
static const URange32 Lu_range32[] = {
{ 66560, 66599 },
+ { 68736, 68786 },
+ { 71840, 71871 },
{ 119808, 119833 },
{ 119860, 119885 },
{ 119912, 119937 },
@@ -1891,6 +2019,7 @@ static const URange16 Pd_range16[] = {
{ 11799, 11799 },
{ 11802, 11802 },
{ 11834, 11835 },
+ { 11840, 11840 },
{ 12316, 12316 },
{ 12336, 12336 },
{ 12448, 12448 },
@@ -1953,7 +2082,7 @@ static const URange16 Pe_range16[] = {
{ 12313, 12313 },
{ 12315, 12315 },
{ 12318, 12319 },
- { 64831, 64831 },
+ { 64830, 64830 },
{ 65048, 65048 },
{ 65078, 65078 },
{ 65080, 65080 },
@@ -2068,6 +2197,8 @@ static const URange16 Po_range16[] = {
{ 11806, 11807 },
{ 11818, 11822 },
{ 11824, 11833 },
+ { 11836, 11839 },
+ { 11841, 11841 },
{ 12289, 12291 },
{ 12349, 12349 },
{ 12539, 12539 },
@@ -2079,6 +2210,7 @@ static const URange16 Po_range16[] = {
{ 43124, 43127 },
{ 43214, 43215 },
{ 43256, 43258 },
+ { 43260, 43260 },
{ 43310, 43311 },
{ 43359, 43359 },
{ 43457, 43469 },
@@ -2112,21 +2244,41 @@ static const URange32 Po_range32[] = {
{ 65792, 65794 },
{ 66463, 66463 },
{ 66512, 66512 },
+ { 66927, 66927 },
{ 67671, 67671 },
{ 67871, 67871 },
{ 67903, 67903 },
{ 68176, 68184 },
{ 68223, 68223 },
+ { 68336, 68342 },
{ 68409, 68415 },
+ { 68505, 68508 },
{ 69703, 69709 },
{ 69819, 69820 },
{ 69822, 69825 },
{ 69952, 69955 },
- { 70085, 70088 },
- { 74864, 74867 },
+ { 70004, 70005 },
+ { 70085, 70089 },
+ { 70093, 70093 },
+ { 70107, 70107 },
+ { 70109, 70111 },
+ { 70200, 70205 },
+ { 70313, 70313 },
+ { 70854, 70854 },
+ { 71105, 71127 },
+ { 71233, 71235 },
+ { 71484, 71486 },
+ { 74864, 74868 },
+ { 92782, 92783 },
+ { 92917, 92917 },
+ { 92983, 92987 },
+ { 92996, 92996 },
+ { 113823, 113823 },
+ { 121479, 121483 },
};
static const URange16 Me_range16[] = {
{ 1160, 1161 },
+ { 6846, 6846 },
{ 8413, 8416 },
{ 8418, 8420 },
{ 42608, 42610 },
@@ -2135,7 +2287,7 @@ static const URange16 C_range16[] = {
{ 0, 31 },
{ 127, 159 },
{ 173, 173 },
- { 1536, 1540 },
+ { 1536, 1541 },
{ 1564, 1564 },
{ 1757, 1757 },
{ 1807, 1807 },
@@ -2150,6 +2302,7 @@ static const URange16 C_range16[] = {
};
static const URange32 C_range32[] = {
{ 69821, 69821 },
+ { 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
{ 917536, 917631 },
@@ -2221,8 +2374,6 @@ static const URange16 Mc_range16[] = {
{ 6441, 6443 },
{ 6448, 6449 },
{ 6451, 6456 },
- { 6576, 6592 },
- { 6600, 6601 },
{ 6681, 6682 },
{ 6741, 6741 },
{ 6743, 6743 },
@@ -2238,7 +2389,6 @@ static const URange16 Mc_range16[] = {
{ 7073, 7073 },
{ 7078, 7079 },
{ 7082, 7082 },
- { 7084, 7085 },
{ 7143, 7143 },
{ 7146, 7148 },
{ 7150, 7150 },
@@ -2261,6 +2411,7 @@ static const URange16 Mc_range16[] = {
{ 43571, 43572 },
{ 43597, 43597 },
{ 43643, 43643 },
+ { 43645, 43645 },
{ 43755, 43755 },
{ 43758, 43759 },
{ 43765, 43765 },
@@ -2279,9 +2430,32 @@ static const URange32 Mc_range32[] = {
{ 70018, 70018 },
{ 70067, 70069 },
{ 70079, 70080 },
+ { 70188, 70190 },
+ { 70194, 70195 },
+ { 70197, 70197 },
+ { 70368, 70370 },
+ { 70402, 70403 },
+ { 70462, 70463 },
+ { 70465, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70487, 70487 },
+ { 70498, 70499 },
+ { 70832, 70834 },
+ { 70841, 70841 },
+ { 70843, 70846 },
+ { 70849, 70849 },
+ { 71087, 71089 },
+ { 71096, 71099 },
+ { 71102, 71102 },
+ { 71216, 71218 },
+ { 71227, 71228 },
+ { 71230, 71230 },
{ 71340, 71340 },
{ 71342, 71343 },
{ 71350, 71350 },
+ { 71456, 71457 },
+ { 71462, 71462 },
{ 94033, 94078 },
{ 119141, 119142 },
{ 119149, 119154 },
@@ -2310,8 +2484,7 @@ static const URange16 Mn_range16[] = {
{ 2085, 2087 },
{ 2089, 2093 },
{ 2137, 2139 },
- { 2276, 2302 },
- { 2304, 2306 },
+ { 2275, 2306 },
{ 2362, 2362 },
{ 2364, 2364 },
{ 2369, 2376 },
@@ -2347,16 +2520,19 @@ static const URange16 Mn_range16[] = {
{ 2946, 2946 },
{ 3008, 3008 },
{ 3021, 3021 },
+ { 3072, 3072 },
{ 3134, 3136 },
{ 3142, 3144 },
{ 3146, 3149 },
{ 3157, 3158 },
{ 3170, 3171 },
+ { 3201, 3201 },
{ 3260, 3260 },
{ 3263, 3263 },
{ 3270, 3270 },
{ 3276, 3277 },
{ 3298, 3299 },
+ { 3329, 3329 },
{ 3393, 3396 },
{ 3405, 3405 },
{ 3426, 3427 },
@@ -2416,6 +2592,7 @@ static const URange16 Mn_range16[] = {
{ 6757, 6764 },
{ 6771, 6780 },
{ 6783, 6783 },
+ { 6832, 6845 },
{ 6912, 6915 },
{ 6964, 6964 },
{ 6966, 6970 },
@@ -2425,7 +2602,7 @@ static const URange16 Mn_range16[] = {
{ 7040, 7041 },
{ 7074, 7077 },
{ 7080, 7081 },
- { 7083, 7083 },
+ { 7083, 7085 },
{ 7142, 7142 },
{ 7144, 7145 },
{ 7149, 7149 },
@@ -2437,7 +2614,8 @@ static const URange16 Mn_range16[] = {
{ 7394, 7400 },
{ 7405, 7405 },
{ 7412, 7412 },
- { 7616, 7654 },
+ { 7416, 7417 },
+ { 7616, 7669 },
{ 7676, 7679 },
{ 8400, 8412 },
{ 8417, 8417 },
@@ -2449,7 +2627,7 @@ static const URange16 Mn_range16[] = {
{ 12441, 12442 },
{ 42607, 42607 },
{ 42612, 42621 },
- { 42655, 42655 },
+ { 42654, 42655 },
{ 42736, 42737 },
{ 43010, 43010 },
{ 43014, 43014 },
@@ -2463,11 +2641,13 @@ static const URange16 Mn_range16[] = {
{ 43443, 43443 },
{ 43446, 43449 },
{ 43452, 43452 },
+ { 43493, 43493 },
{ 43561, 43566 },
{ 43569, 43570 },
{ 43573, 43574 },
{ 43587, 43587 },
{ 43596, 43596 },
+ { 43644, 43644 },
{ 43696, 43696 },
{ 43698, 43700 },
{ 43703, 43704 },
@@ -2480,35 +2660,74 @@ static const URange16 Mn_range16[] = {
{ 44013, 44013 },
{ 64286, 64286 },
{ 65024, 65039 },
- { 65056, 65062 },
+ { 65056, 65071 },
};
static const URange32 Mn_range32[] = {
{ 66045, 66045 },
+ { 66272, 66272 },
+ { 66422, 66426 },
{ 68097, 68099 },
{ 68101, 68102 },
{ 68108, 68111 },
{ 68152, 68154 },
{ 68159, 68159 },
+ { 68325, 68326 },
{ 69633, 69633 },
{ 69688, 69702 },
- { 69760, 69761 },
+ { 69759, 69761 },
{ 69811, 69814 },
{ 69817, 69818 },
{ 69888, 69890 },
{ 69927, 69931 },
{ 69933, 69940 },
+ { 70003, 70003 },
{ 70016, 70017 },
{ 70070, 70078 },
+ { 70090, 70092 },
+ { 70191, 70193 },
+ { 70196, 70196 },
+ { 70198, 70199 },
+ { 70367, 70367 },
+ { 70371, 70378 },
+ { 70400, 70401 },
+ { 70460, 70460 },
+ { 70464, 70464 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+ { 70835, 70840 },
+ { 70842, 70842 },
+ { 70847, 70848 },
+ { 70850, 70851 },
+ { 71090, 71093 },
+ { 71100, 71101 },
+ { 71103, 71104 },
+ { 71132, 71133 },
+ { 71219, 71226 },
+ { 71229, 71229 },
+ { 71231, 71232 },
{ 71339, 71339 },
{ 71341, 71341 },
{ 71344, 71349 },
{ 71351, 71351 },
+ { 71453, 71455 },
+ { 71458, 71461 },
+ { 71463, 71467 },
+ { 92912, 92916 },
+ { 92976, 92982 },
{ 94095, 94098 },
+ { 113821, 113822 },
{ 119143, 119145 },
{ 119163, 119170 },
{ 119173, 119179 },
{ 119210, 119213 },
{ 119362, 119364 },
+ { 121344, 121398 },
+ { 121403, 121452 },
+ { 121461, 121461 },
+ { 121476, 121476 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+ { 125136, 125142 },
{ 917760, 917999 },
};
static const URange16 M_range16[] = {
@@ -2535,8 +2754,7 @@ static const URange16 M_range16[] = {
{ 2085, 2087 },
{ 2089, 2093 },
{ 2137, 2139 },
- { 2276, 2302 },
- { 2304, 2307 },
+ { 2275, 2307 },
{ 2362, 2364 },
{ 2366, 2383 },
{ 2385, 2391 },
@@ -2574,20 +2792,20 @@ static const URange16 M_range16[] = {
{ 3014, 3016 },
{ 3018, 3021 },
{ 3031, 3031 },
- { 3073, 3075 },
+ { 3072, 3075 },
{ 3134, 3140 },
{ 3142, 3144 },
{ 3146, 3149 },
{ 3157, 3158 },
{ 3170, 3171 },
- { 3202, 3203 },
+ { 3201, 3203 },
{ 3260, 3260 },
{ 3262, 3268 },
{ 3270, 3272 },
{ 3274, 3277 },
{ 3285, 3286 },
{ 3298, 3299 },
- { 3330, 3331 },
+ { 3329, 3331 },
{ 3390, 3396 },
{ 3398, 3400 },
{ 3402, 3405 },
@@ -2636,12 +2854,11 @@ static const URange16 M_range16[] = {
{ 6313, 6313 },
{ 6432, 6443 },
{ 6448, 6459 },
- { 6576, 6592 },
- { 6600, 6601 },
{ 6679, 6683 },
{ 6741, 6750 },
{ 6752, 6780 },
{ 6783, 6783 },
+ { 6832, 6846 },
{ 6912, 6916 },
{ 6964, 6980 },
{ 7019, 7027 },
@@ -2653,7 +2870,8 @@ static const URange16 M_range16[] = {
{ 7380, 7400 },
{ 7405, 7405 },
{ 7410, 7412 },
- { 7616, 7654 },
+ { 7416, 7417 },
+ { 7616, 7669 },
{ 7676, 7679 },
{ 8400, 8432 },
{ 11503, 11505 },
@@ -2663,7 +2881,7 @@ static const URange16 M_range16[] = {
{ 12441, 12442 },
{ 42607, 42610 },
{ 42612, 42621 },
- { 42655, 42655 },
+ { 42654, 42655 },
{ 42736, 42737 },
{ 43010, 43010 },
{ 43014, 43014 },
@@ -2676,10 +2894,11 @@ static const URange16 M_range16[] = {
{ 43335, 43347 },
{ 43392, 43395 },
{ 43443, 43456 },
+ { 43493, 43493 },
{ 43561, 43574 },
{ 43587, 43587 },
{ 43596, 43597 },
- { 43643, 43643 },
+ { 43643, 43645 },
{ 43696, 43696 },
{ 43698, 43700 },
{ 43703, 43704 },
@@ -2691,32 +2910,64 @@ static const URange16 M_range16[] = {
{ 44012, 44013 },
{ 64286, 64286 },
{ 65024, 65039 },
- { 65056, 65062 },
+ { 65056, 65071 },
};
static const URange32 M_range32[] = {
{ 66045, 66045 },
+ { 66272, 66272 },
+ { 66422, 66426 },
{ 68097, 68099 },
{ 68101, 68102 },
{ 68108, 68111 },
{ 68152, 68154 },
{ 68159, 68159 },
+ { 68325, 68326 },
{ 69632, 69634 },
{ 69688, 69702 },
- { 69760, 69762 },
+ { 69759, 69762 },
{ 69808, 69818 },
{ 69888, 69890 },
{ 69927, 69940 },
+ { 70003, 70003 },
{ 70016, 70018 },
{ 70067, 70080 },
+ { 70090, 70092 },
+ { 70188, 70199 },
+ { 70367, 70378 },
+ { 70400, 70403 },
+ { 70460, 70460 },
+ { 70462, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70487, 70487 },
+ { 70498, 70499 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+ { 70832, 70851 },
+ { 71087, 71093 },
+ { 71096, 71104 },
+ { 71132, 71133 },
+ { 71216, 71232 },
{ 71339, 71351 },
+ { 71453, 71467 },
+ { 92912, 92916 },
+ { 92976, 92982 },
{ 94033, 94078 },
{ 94095, 94098 },
+ { 113821, 113822 },
{ 119141, 119145 },
{ 119149, 119154 },
{ 119163, 119170 },
{ 119173, 119179 },
{ 119210, 119213 },
{ 119362, 119364 },
+ { 121344, 121398 },
+ { 121403, 121452 },
+ { 121461, 121461 },
+ { 121476, 121476 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+ { 125136, 125142 },
{ 917760, 917999 },
};
static const URange16 L_range16[] = {
@@ -2735,13 +2986,14 @@ static const URange16 L_range16[] = {
{ 880, 884 },
{ 886, 887 },
{ 890, 893 },
+ { 895, 895 },
{ 902, 902 },
{ 904, 906 },
{ 908, 908 },
{ 910, 929 },
{ 931, 1013 },
{ 1015, 1153 },
- { 1162, 1319 },
+ { 1162, 1327 },
{ 1329, 1366 },
{ 1369, 1369 },
{ 1377, 1415 },
@@ -2767,14 +3019,12 @@ static const URange16 L_range16[] = {
{ 2084, 2084 },
{ 2088, 2088 },
{ 2112, 2136 },
- { 2208, 2208 },
- { 2210, 2220 },
+ { 2208, 2228 },
{ 2308, 2361 },
{ 2365, 2365 },
{ 2384, 2384 },
{ 2392, 2401 },
- { 2417, 2423 },
- { 2425, 2431 },
+ { 2417, 2432 },
{ 2437, 2444 },
{ 2447, 2448 },
{ 2451, 2472 },
@@ -2805,6 +3055,7 @@ static const URange16 L_range16[] = {
{ 2749, 2749 },
{ 2768, 2768 },
{ 2784, 2785 },
+ { 2809, 2809 },
{ 2821, 2828 },
{ 2831, 2832 },
{ 2835, 2856 },
@@ -2829,10 +3080,9 @@ static const URange16 L_range16[] = {
{ 3077, 3084 },
{ 3086, 3088 },
{ 3090, 3112 },
- { 3114, 3123 },
- { 3125, 3129 },
+ { 3114, 3129 },
{ 3133, 3133 },
- { 3160, 3161 },
+ { 3160, 3162 },
{ 3168, 3169 },
{ 3205, 3212 },
{ 3214, 3216 },
@@ -2848,7 +3098,7 @@ static const URange16 L_range16[] = {
{ 3346, 3386 },
{ 3389, 3389 },
{ 3406, 3406 },
- { 3424, 3425 },
+ { 3423, 3425 },
{ 3450, 3455 },
{ 3461, 3478 },
{ 3482, 3505 },
@@ -2909,11 +3159,13 @@ static const URange16 L_range16[] = {
{ 4882, 4885 },
{ 4888, 4954 },
{ 4992, 5007 },
- { 5024, 5108 },
+ { 5024, 5109 },
+ { 5112, 5117 },
{ 5121, 5740 },
{ 5743, 5759 },
{ 5761, 5786 },
{ 5792, 5866 },
+ { 5873, 5880 },
{ 5888, 5900 },
{ 5902, 5905 },
{ 5920, 5937 },
@@ -2927,11 +3179,11 @@ static const URange16 L_range16[] = {
{ 6272, 6312 },
{ 6314, 6314 },
{ 6320, 6389 },
- { 6400, 6428 },
+ { 6400, 6430 },
{ 6480, 6509 },
{ 6512, 6516 },
{ 6528, 6571 },
- { 6593, 6599 },
+ { 6576, 6601 },
{ 6656, 6678 },
{ 6688, 6740 },
{ 6823, 6823 },
@@ -3015,21 +3267,20 @@ static const URange16 L_range16[] = {
{ 12704, 12730 },
{ 12784, 12799 },
{ 13312, 19893 },
- { 19968, 40908 },
+ { 19968, 40917 },
{ 40960, 42124 },
{ 42192, 42237 },
{ 42240, 42508 },
{ 42512, 42527 },
{ 42538, 42539 },
{ 42560, 42606 },
- { 42623, 42647 },
+ { 42623, 42653 },
{ 42656, 42725 },
{ 42775, 42783 },
{ 42786, 42888 },
- { 42891, 42894 },
- { 42896, 42899 },
- { 42912, 42922 },
- { 43000, 43009 },
+ { 42891, 42925 },
+ { 42928, 42935 },
+ { 42999, 43009 },
{ 43011, 43013 },
{ 43015, 43018 },
{ 43020, 43042 },
@@ -3037,17 +3288,21 @@ static const URange16 L_range16[] = {
{ 43138, 43187 },
{ 43250, 43255 },
{ 43259, 43259 },
+ { 43261, 43261 },
{ 43274, 43301 },
{ 43312, 43334 },
{ 43360, 43388 },
{ 43396, 43442 },
{ 43471, 43471 },
+ { 43488, 43492 },
+ { 43494, 43503 },
+ { 43514, 43518 },
{ 43520, 43560 },
{ 43584, 43586 },
{ 43588, 43595 },
{ 43616, 43638 },
{ 43642, 43642 },
- { 43648, 43695 },
+ { 43646, 43695 },
{ 43697, 43697 },
{ 43701, 43702 },
{ 43705, 43709 },
@@ -3061,7 +3316,9 @@ static const URange16 L_range16[] = {
{ 43793, 43798 },
{ 43808, 43814 },
{ 43816, 43822 },
- { 43968, 44002 },
+ { 43824, 43866 },
+ { 43868, 43877 },
+ { 43888, 44002 },
{ 44032, 55203 },
{ 55216, 55238 },
{ 55243, 55291 },
@@ -3101,19 +3358,29 @@ static const URange32 L_range32[] = {
{ 65664, 65786 },
{ 66176, 66204 },
{ 66208, 66256 },
- { 66304, 66334 },
+ { 66304, 66335 },
{ 66352, 66368 },
{ 66370, 66377 },
+ { 66384, 66421 },
{ 66432, 66461 },
{ 66464, 66499 },
{ 66504, 66511 },
{ 66560, 66717 },
+ { 66816, 66855 },
+ { 66864, 66915 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
{ 67584, 67589 },
{ 67592, 67592 },
{ 67594, 67637 },
{ 67639, 67640 },
{ 67644, 67644 },
{ 67647, 67669 },
+ { 67680, 67702 },
+ { 67712, 67742 },
+ { 67808, 67826 },
+ { 67828, 67829 },
{ 67840, 67861 },
{ 67872, 67897 },
{ 67968, 68023 },
@@ -3123,24 +3390,74 @@ static const URange32 L_range32[] = {
{ 68117, 68119 },
{ 68121, 68147 },
{ 68192, 68220 },
+ { 68224, 68252 },
+ { 68288, 68295 },
+ { 68297, 68324 },
{ 68352, 68405 },
{ 68416, 68437 },
{ 68448, 68466 },
+ { 68480, 68497 },
{ 68608, 68680 },
+ { 68736, 68786 },
+ { 68800, 68850 },
{ 69635, 69687 },
{ 69763, 69807 },
{ 69840, 69864 },
{ 69891, 69926 },
+ { 69968, 70002 },
+ { 70006, 70006 },
{ 70019, 70066 },
{ 70081, 70084 },
+ { 70106, 70106 },
+ { 70108, 70108 },
+ { 70144, 70161 },
+ { 70163, 70187 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70312 },
+ { 70320, 70366 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70461, 70461 },
+ { 70480, 70480 },
+ { 70493, 70497 },
+ { 70784, 70831 },
+ { 70852, 70853 },
+ { 70855, 70855 },
+ { 71040, 71086 },
+ { 71128, 71131 },
+ { 71168, 71215 },
+ { 71236, 71236 },
{ 71296, 71338 },
- { 73728, 74606 },
+ { 71424, 71449 },
+ { 71840, 71903 },
+ { 71935, 71935 },
+ { 72384, 72440 },
+ { 73728, 74649 },
+ { 74880, 75075 },
{ 77824, 78894 },
+ { 82944, 83526 },
{ 92160, 92728 },
+ { 92736, 92766 },
+ { 92880, 92909 },
+ { 92928, 92975 },
+ { 92992, 92995 },
+ { 93027, 93047 },
+ { 93053, 93071 },
{ 93952, 94020 },
{ 94032, 94032 },
{ 94099, 94111 },
{ 110592, 110593 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
{ 119808, 119892 },
{ 119894, 119964 },
{ 119966, 119967 },
@@ -3171,6 +3488,7 @@ static const URange32 L_range32[] = {
{ 120714, 120744 },
{ 120746, 120770 },
{ 120772, 120779 },
+ { 124928, 125124 },
{ 126464, 126467 },
{ 126469, 126495 },
{ 126497, 126498 },
@@ -3207,6 +3525,7 @@ static const URange32 L_range32[] = {
{ 131072, 173782 },
{ 173824, 177972 },
{ 177984, 178205 },
+ { 178208, 183969 },
{ 194560, 195101 },
};
static const URange16 N_range16[] = {
@@ -3229,6 +3548,7 @@ static const URange16 N_range16[] = {
{ 3192, 3198 },
{ 3302, 3311 },
{ 3430, 3445 },
+ { 3558, 3567 },
{ 3664, 3673 },
{ 3792, 3801 },
{ 3872, 3891 },
@@ -3271,6 +3591,7 @@ static const URange16 N_range16[] = {
{ 43216, 43225 },
{ 43264, 43273 },
{ 43472, 43481 },
+ { 43504, 43513 },
{ 43600, 43609 },
{ 44016, 44025 },
{ 65296, 65305 },
@@ -3278,28 +3599,49 @@ static const URange16 N_range16[] = {
static const URange32 N_range32[] = {
{ 65799, 65843 },
{ 65856, 65912 },
- { 65930, 65930 },
+ { 65930, 65931 },
+ { 66273, 66299 },
{ 66336, 66339 },
{ 66369, 66369 },
{ 66378, 66378 },
{ 66513, 66517 },
{ 66720, 66729 },
{ 67672, 67679 },
+ { 67705, 67711 },
+ { 67751, 67759 },
+ { 67835, 67839 },
{ 67862, 67867 },
+ { 68028, 68029 },
+ { 68032, 68047 },
+ { 68050, 68095 },
{ 68160, 68167 },
{ 68221, 68222 },
+ { 68253, 68255 },
+ { 68331, 68335 },
{ 68440, 68447 },
{ 68472, 68479 },
+ { 68521, 68527 },
+ { 68858, 68863 },
{ 69216, 69246 },
{ 69714, 69743 },
{ 69872, 69881 },
{ 69942, 69951 },
{ 70096, 70105 },
+ { 70113, 70132 },
+ { 70384, 70393 },
+ { 70864, 70873 },
+ { 71248, 71257 },
{ 71360, 71369 },
- { 74752, 74850 },
+ { 71472, 71483 },
+ { 71904, 71922 },
+ { 74752, 74862 },
+ { 92768, 92777 },
+ { 93008, 93017 },
+ { 93019, 93025 },
{ 119648, 119665 },
{ 120782, 120831 },
- { 127232, 127242 },
+ { 125127, 125135 },
+ { 127232, 127244 },
};
static const URange16 Sk_range16[] = {
{ 94, 94 },
@@ -3325,11 +3667,15 @@ static const URange16 Sk_range16[] = {
{ 42752, 42774 },
{ 42784, 42785 },
{ 42889, 42890 },
+ { 43867, 43867 },
{ 64434, 64449 },
{ 65342, 65342 },
{ 65344, 65344 },
{ 65507, 65507 },
};
+static const URange32 Sk_range32[] = {
+ { 127995, 127999 },
+};
static const URange16 P_range16[] = {
{ 33, 35 },
{ 37, 42 },
@@ -3416,7 +3762,7 @@ static const URange16 P_range16[] = {
{ 11518, 11519 },
{ 11632, 11632 },
{ 11776, 11822 },
- { 11824, 11835 },
+ { 11824, 11842 },
{ 12289, 12291 },
{ 12296, 12305 },
{ 12308, 12319 },
@@ -3432,6 +3778,7 @@ static const URange16 P_range16[] = {
{ 43124, 43127 },
{ 43214, 43215 },
{ 43256, 43258 },
+ { 43260, 43260 },
{ 43310, 43311 },
{ 43359, 43359 },
{ 43457, 43469 },
@@ -3462,18 +3809,37 @@ static const URange32 P_range32[] = {
{ 65792, 65794 },
{ 66463, 66463 },
{ 66512, 66512 },
+ { 66927, 66927 },
{ 67671, 67671 },
{ 67871, 67871 },
{ 67903, 67903 },
{ 68176, 68184 },
{ 68223, 68223 },
+ { 68336, 68342 },
{ 68409, 68415 },
+ { 68505, 68508 },
{ 69703, 69709 },
{ 69819, 69820 },
{ 69822, 69825 },
{ 69952, 69955 },
- { 70085, 70088 },
- { 74864, 74867 },
+ { 70004, 70005 },
+ { 70085, 70089 },
+ { 70093, 70093 },
+ { 70107, 70107 },
+ { 70109, 70111 },
+ { 70200, 70205 },
+ { 70313, 70313 },
+ { 70854, 70854 },
+ { 71105, 71127 },
+ { 71233, 71235 },
+ { 71484, 71486 },
+ { 74864, 74868 },
+ { 92782, 92783 },
+ { 92917, 92917 },
+ { 92983, 92987 },
+ { 92996, 92996 },
+ { 113823, 113823 },
+ { 121479, 121483 },
};
static const URange16 S_range16[] = {
{ 36, 36 },
@@ -3500,7 +3866,7 @@ static const URange16 S_range16[] = {
{ 900, 901 },
{ 1014, 1014 },
{ 1154, 1154 },
- { 1423, 1423 },
+ { 1421, 1423 },
{ 1542, 1544 },
{ 1547, 1547 },
{ 1550, 1551 },
@@ -3544,7 +3910,7 @@ static const URange16 S_range16[] = {
{ 8274, 8274 },
{ 8314, 8316 },
{ 8330, 8332 },
- { 8352, 8378 },
+ { 8352, 8382 },
{ 8448, 8449 },
{ 8451, 8454 },
{ 8456, 8457 },
@@ -3559,21 +3925,25 @@ static const URange16 S_range16[] = {
{ 8512, 8516 },
{ 8522, 8525 },
{ 8527, 8527 },
+ { 8586, 8587 },
{ 8592, 8967 },
{ 8972, 9000 },
- { 9003, 9203 },
+ { 9003, 9210 },
{ 9216, 9254 },
{ 9280, 9290 },
{ 9372, 9449 },
- { 9472, 9983 },
- { 9985, 10087 },
+ { 9472, 10087 },
{ 10132, 10180 },
{ 10183, 10213 },
{ 10224, 10626 },
{ 10649, 10711 },
{ 10716, 10747 },
- { 10750, 11084 },
- { 11088, 11097 },
+ { 10750, 11123 },
+ { 11126, 11157 },
+ { 11160, 11193 },
+ { 11197, 11208 },
+ { 11210, 11217 },
+ { 11244, 11247 },
{ 11493, 11498 },
{ 11904, 11929 },
{ 11931, 12019 },
@@ -3603,6 +3973,7 @@ static const URange16 S_range16[] = {
{ 43048, 43051 },
{ 43062, 43065 },
{ 43639, 43641 },
+ { 43867, 43867 },
{ 64297, 64297 },
{ 64434, 64449 },
{ 65020, 65021 },
@@ -3623,15 +3994,23 @@ static const URange16 S_range16[] = {
static const URange32 S_range32[] = {
{ 65847, 65855 },
{ 65913, 65929 },
+ { 65932, 65932 },
{ 65936, 65947 },
+ { 65952, 65952 },
{ 66000, 66044 },
+ { 67703, 67704 },
+ { 68296, 68296 },
+ { 71487, 71487 },
+ { 92988, 92991 },
+ { 92997, 92997 },
+ { 113820, 113820 },
{ 118784, 119029 },
{ 119040, 119078 },
{ 119081, 119140 },
{ 119146, 119148 },
{ 119171, 119172 },
{ 119180, 119209 },
- { 119214, 119261 },
+ { 119214, 119272 },
{ 119296, 119361 },
{ 119365, 119365 },
{ 119552, 119638 },
@@ -3645,13 +4024,18 @@ static const URange32 S_range32[] = {
{ 120713, 120713 },
{ 120745, 120745 },
{ 120771, 120771 },
+ { 120832, 121343 },
+ { 121399, 121402 },
+ { 121453, 121460 },
+ { 121462, 121475 },
+ { 121477, 121478 },
{ 126704, 126705 },
{ 126976, 127019 },
{ 127024, 127123 },
{ 127136, 127150 },
- { 127153, 127166 },
+ { 127153, 127167 },
{ 127169, 127183 },
- { 127185, 127199 },
+ { 127185, 127221 },
{ 127248, 127278 },
{ 127280, 127339 },
{ 127344, 127386 },
@@ -3659,24 +4043,21 @@ static const URange32 S_range32[] = {
{ 127504, 127546 },
{ 127552, 127560 },
{ 127568, 127569 },
- { 127744, 127776 },
- { 127792, 127797 },
- { 127799, 127868 },
- { 127872, 127891 },
- { 127904, 127940 },
- { 127942, 127946 },
- { 127968, 127984 },
- { 128000, 128062 },
- { 128064, 128064 },
- { 128066, 128247 },
- { 128249, 128252 },
- { 128256, 128317 },
- { 128320, 128323 },
- { 128336, 128359 },
- { 128507, 128576 },
- { 128581, 128591 },
- { 128640, 128709 },
+ { 127744, 128377 },
+ { 128379, 128419 },
+ { 128421, 128720 },
+ { 128736, 128748 },
+ { 128752, 128755 },
{ 128768, 128883 },
+ { 128896, 128980 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129296, 129304 },
+ { 129408, 129412 },
+ { 129472, 129472 },
};
static const URange16 So_range16[] = {
{ 166, 166 },
@@ -3684,6 +4065,7 @@ static const URange16 So_range16[] = {
{ 174, 174 },
{ 176, 176 },
{ 1154, 1154 },
+ { 1421, 1422 },
{ 1550, 1551 },
{ 1758, 1758 },
{ 1769, 1769 },
@@ -3726,6 +4108,7 @@ static const URange16 So_range16[] = {
{ 8522, 8522 },
{ 8524, 8525 },
{ 8527, 8527 },
+ { 8586, 8587 },
{ 8597, 8601 },
{ 8604, 8607 },
{ 8609, 8610 },
@@ -3741,7 +4124,7 @@ static const URange16 So_range16[] = {
{ 9003, 9083 },
{ 9085, 9114 },
{ 9140, 9179 },
- { 9186, 9203 },
+ { 9186, 9210 },
{ 9216, 9254 },
{ 9280, 9290 },
{ 9372, 9449 },
@@ -3749,13 +4132,17 @@ static const URange16 So_range16[] = {
{ 9656, 9664 },
{ 9666, 9719 },
{ 9728, 9838 },
- { 9840, 9983 },
- { 9985, 10087 },
+ { 9840, 10087 },
{ 10132, 10175 },
{ 10240, 10495 },
{ 11008, 11055 },
{ 11077, 11078 },
- { 11088, 11097 },
+ { 11085, 11123 },
+ { 11126, 11157 },
+ { 11160, 11193 },
+ { 11197, 11208 },
+ { 11210, 11217 },
+ { 11244, 11247 },
{ 11493, 11498 },
{ 11904, 11929 },
{ 11931, 12019 },
@@ -3791,24 +4178,37 @@ static const URange16 So_range16[] = {
static const URange32 So_range32[] = {
{ 65847, 65855 },
{ 65913, 65929 },
+ { 65932, 65932 },
{ 65936, 65947 },
+ { 65952, 65952 },
{ 66000, 66044 },
+ { 67703, 67704 },
+ { 68296, 68296 },
+ { 71487, 71487 },
+ { 92988, 92991 },
+ { 92997, 92997 },
+ { 113820, 113820 },
{ 118784, 119029 },
{ 119040, 119078 },
{ 119081, 119140 },
{ 119146, 119148 },
{ 119171, 119172 },
{ 119180, 119209 },
- { 119214, 119261 },
+ { 119214, 119272 },
{ 119296, 119361 },
{ 119365, 119365 },
{ 119552, 119638 },
+ { 120832, 121343 },
+ { 121399, 121402 },
+ { 121453, 121460 },
+ { 121462, 121475 },
+ { 121477, 121478 },
{ 126976, 127019 },
{ 127024, 127123 },
{ 127136, 127150 },
- { 127153, 127166 },
+ { 127153, 127167 },
{ 127169, 127183 },
- { 127185, 127199 },
+ { 127185, 127221 },
{ 127248, 127278 },
{ 127280, 127339 },
{ 127344, 127386 },
@@ -3816,24 +4216,22 @@ static const URange32 So_range32[] = {
{ 127504, 127546 },
{ 127552, 127560 },
{ 127568, 127569 },
- { 127744, 127776 },
- { 127792, 127797 },
- { 127799, 127868 },
- { 127872, 127891 },
- { 127904, 127940 },
- { 127942, 127946 },
- { 127968, 127984 },
- { 128000, 128062 },
- { 128064, 128064 },
- { 128066, 128247 },
- { 128249, 128252 },
- { 128256, 128317 },
- { 128320, 128323 },
- { 128336, 128359 },
- { 128507, 128576 },
- { 128581, 128591 },
- { 128640, 128709 },
+ { 127744, 127994 },
+ { 128000, 128377 },
+ { 128379, 128419 },
+ { 128421, 128720 },
+ { 128736, 128748 },
+ { 128752, 128755 },
{ 128768, 128883 },
+ { 128896, 128980 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129296, 129304 },
+ { 129408, 129412 },
+ { 129472, 129472 },
};
static const URange16 Sm_range16[] = {
{ 43, 43 },
@@ -3914,7 +4312,7 @@ static const URange16 Sc_range16[] = {
{ 3065, 3065 },
{ 3647, 3647 },
{ 6107, 6107 },
- { 8352, 8378 },
+ { 8352, 8382 },
{ 43064, 43064 },
{ 65020, 65020 },
{ 65129, 65129 },
@@ -3948,7 +4346,7 @@ static const URange16 Cc_range16[] = {
};
static const URange16 Cf_range16[] = {
{ 173, 173 },
- { 1536, 1540 },
+ { 1536, 1541 },
{ 1564, 1564 },
{ 1757, 1757 },
{ 1807, 1807 },
@@ -3962,6 +4360,7 @@ static const URange16 Cf_range16[] = {
};
static const URange32 Cf_range32[] = {
{ 69821, 69821 },
+ { 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
{ 917536, 917631 },
@@ -3985,29 +4384,28 @@ static const URange16 Thaana_range16[] = {
{ 1920, 1969 },
};
static const URange16 Telugu_range16[] = {
- { 3073, 3075 },
+ { 3072, 3075 },
{ 3077, 3084 },
{ 3086, 3088 },
{ 3090, 3112 },
- { 3114, 3123 },
- { 3125, 3129 },
+ { 3114, 3129 },
{ 3133, 3140 },
{ 3142, 3144 },
{ 3146, 3149 },
{ 3157, 3158 },
- { 3160, 3161 },
+ { 3160, 3162 },
{ 3168, 3171 },
{ 3174, 3183 },
{ 3192, 3199 },
};
static const URange16 Cyrillic_range16[] = {
{ 1024, 1156 },
- { 1159, 1319 },
+ { 1159, 1327 },
{ 7467, 7467 },
{ 7544, 7544 },
{ 11744, 11775 },
- { 42560, 42647 },
- { 42655, 42655 },
+ { 42560, 42655 },
+ { 65070, 65071 },
};
static const URange16 Hangul_range16[] = {
{ 4352, 4607 },
@@ -4068,22 +4466,25 @@ static const URange16 Inherited_range16[] = {
{ 1611, 1621 },
{ 1648, 1648 },
{ 2385, 2386 },
+ { 6832, 6846 },
{ 7376, 7378 },
{ 7380, 7392 },
{ 7394, 7400 },
{ 7405, 7405 },
{ 7412, 7412 },
- { 7616, 7654 },
+ { 7416, 7417 },
+ { 7616, 7669 },
{ 7676, 7679 },
{ 8204, 8205 },
{ 8400, 8432 },
{ 12330, 12333 },
{ 12441, 12442 },
{ 65024, 65039 },
- { 65056, 65062 },
+ { 65056, 65069 },
};
static const URange32 Inherited_range32[] = {
{ 66045, 66045 },
+ { 66272, 66272 },
{ 119143, 119145 },
{ 119163, 119170 },
{ 119173, 119179 },
@@ -4092,7 +4493,13 @@ static const URange32 Inherited_range32[] = {
};
static const URange32 Meroitic_Cursive_range32[] = {
{ 68000, 68023 },
- { 68030, 68031 },
+ { 68028, 68047 },
+ { 68050, 68095 },
+};
+static const URange32 Ahom_range32[] = {
+ { 71424, 71449 },
+ { 71453, 71467 },
+ { 71472, 71487 },
};
static const URange16 Han_range16[] = {
{ 11904, 11929 },
@@ -4103,7 +4510,7 @@ static const URange16 Han_range16[] = {
{ 12321, 12329 },
{ 12344, 12347 },
{ 13312, 19893 },
- { 19968, 40908 },
+ { 19968, 40917 },
{ 63744, 64109 },
{ 64112, 64217 },
};
@@ -4111,14 +4518,18 @@ static const URange32 Han_range32[] = {
{ 131072, 173782 },
{ 173824, 177972 },
{ 177984, 178205 },
+ { 178208, 183969 },
{ 194560, 195101 },
};
+static const URange32 Old_North_Arabian_range32[] = {
+ { 68224, 68255 },
+};
static const URange16 Armenian_range16[] = {
{ 1329, 1366 },
{ 1369, 1375 },
{ 1377, 1415 },
{ 1418, 1418 },
- { 1423, 1423 },
+ { 1421, 1423 },
{ 64275, 64279 },
};
static const URange16 Tamil_range16[] = {
@@ -4144,6 +4555,10 @@ static const URange16 Bopomofo_range16[] = {
{ 12549, 12589 },
{ 12704, 12730 },
};
+static const URange32 Bassa_Vah_range32[] = {
+ { 92880, 92909 },
+ { 92912, 92917 },
+};
static const URange16 Sundanese_range16[] = {
{ 7040, 7103 },
{ 7360, 7367 },
@@ -4153,7 +4568,7 @@ static const URange16 Tagalog_range16[] = {
{ 5902, 5908 },
};
static const URange16 Malayalam_range16[] = {
- { 3330, 3331 },
+ { 3329, 3331 },
{ 3333, 3340 },
{ 3342, 3344 },
{ 3346, 3386 },
@@ -4161,7 +4576,7 @@ static const URange16 Malayalam_range16[] = {
{ 3398, 3400 },
{ 3402, 3406 },
{ 3415, 3415 },
- { 3424, 3427 },
+ { 3423, 3427 },
{ 3430, 3445 },
{ 3449, 3455 },
};
@@ -4186,12 +4601,20 @@ static const URange16 Meetei_Mayek_range16[] = {
{ 43968, 44013 },
{ 44016, 44025 },
};
+static const URange32 Pahawh_Hmong_range32[] = {
+ { 92928, 92997 },
+ { 93008, 93017 },
+ { 93019, 93025 },
+ { 93027, 93047 },
+ { 93053, 93071 },
+};
static const URange16 Tai_Le_range16[] = {
{ 6480, 6509 },
{ 6512, 6516 },
};
static const URange16 Kayah_Li_range16[] = {
- { 43264, 43311 },
+ { 43264, 43309 },
+ { 43311, 43311 },
};
static const URange16 Buginese_range16[] = {
{ 6656, 6683 },
@@ -4215,13 +4638,16 @@ static const URange16 Tai_Tham_range16[] = {
{ 6816, 6829 },
};
static const URange32 Old_Italic_range32[] = {
- { 66304, 66334 },
- { 66336, 66339 },
+ { 66304, 66339 },
};
static const URange32 Old_Persian_range32[] = {
{ 66464, 66499 },
{ 66504, 66517 },
};
+static const URange32 Warang_Citi_range32[] = {
+ { 71840, 71922 },
+ { 71935, 71935 },
+};
static const URange16 Latin_range16[] = {
{ 65, 90 },
{ 97, 122 },
@@ -4246,10 +4672,11 @@ static const URange16 Latin_range16[] = {
{ 8544, 8584 },
{ 11360, 11391 },
{ 42786, 42887 },
- { 42891, 42894 },
- { 42896, 42899 },
- { 42912, 42922 },
- { 43000, 43007 },
+ { 42891, 42925 },
+ { 42928, 42935 },
+ { 42999, 43007 },
+ { 43824, 43866 },
+ { 43868, 43876 },
{ 64256, 64262 },
{ 65313, 65338 },
{ 65345, 65370 },
@@ -4271,6 +4698,30 @@ static const URange16 Georgian_range16[] = {
{ 11559, 11559 },
{ 11565, 11565 },
};
+static const URange32 Grantha_range32[] = {
+ { 70400, 70403 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70460, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70480, 70480 },
+ { 70487, 70487 },
+ { 70493, 70499 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+};
+static const URange32 Duployan_range32[] = {
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
+ { 113820, 113823 },
+};
static const URange16 Batak_range16[] = {
{ 7104, 7155 },
{ 7164, 7167 },
@@ -4278,9 +4729,8 @@ static const URange16 Batak_range16[] = {
static const URange16 Devanagari_range16[] = {
{ 2304, 2384 },
{ 2387, 2403 },
- { 2406, 2423 },
- { 2425, 2431 },
- { 43232, 43259 },
+ { 2406, 2431 },
+ { 43232, 43261 },
};
static const URange16 Thai_range16[] = {
{ 3585, 3642 },
@@ -4307,10 +4757,14 @@ static const URange32 Ugaritic_range32[] = {
static const URange16 Braille_range16[] = {
{ 10240, 10495 },
};
+static const URange32 Anatolian_Hieroglyphs_range32[] = {
+ { 82944, 83526 },
+};
static const URange16 Greek_range16[] = {
{ 880, 883 },
{ 885, 887 },
{ 890, 893 },
+ { 895, 895 },
{ 900, 900 },
{ 902, 902 },
{ 904, 906 },
@@ -4339,14 +4793,20 @@ static const URange16 Greek_range16[] = {
{ 8178, 8180 },
{ 8182, 8190 },
{ 8486, 8486 },
+ { 43877, 43877 },
};
static const URange32 Greek_range32[] = {
- { 65856, 65930 },
+ { 65856, 65932 },
+ { 65952, 65952 },
{ 119296, 119365 },
};
static const URange32 Lycian_range32[] = {
{ 66176, 66204 },
};
+static const URange32 Mende_Kikakui_range32[] = {
+ { 124928, 125124 },
+ { 125127, 125142 },
+};
static const URange16 Tai_Viet_range16[] = {
{ 43648, 43714 },
{ 43739, 43743 },
@@ -4374,11 +4834,14 @@ static const URange16 Syriac_range16[] = {
};
static const URange16 Runic_range16[] = {
{ 5792, 5866 },
- { 5870, 5872 },
+ { 5870, 5880 },
};
static const URange32 Gothic_range32[] = {
{ 66352, 66378 },
};
+static const URange32 Mahajani_range32[] = {
+ { 69968, 70006 },
+};
static const URange16 Katakana_range16[] = {
{ 12449, 12538 },
{ 12541, 12543 },
@@ -4405,14 +4868,19 @@ static const URange16 Ol_Chiki_range16[] = {
{ 7248, 7295 },
};
static const URange16 Limbu_range16[] = {
- { 6400, 6428 },
+ { 6400, 6430 },
{ 6432, 6443 },
{ 6448, 6459 },
{ 6464, 6464 },
{ 6468, 6479 },
};
+static const URange32 Pau_Cin_Hau_range32[] = {
+ { 72384, 72440 },
+};
static const URange16 Cherokee_range16[] = {
- { 5024, 5108 },
+ { 5024, 5109 },
+ { 5112, 5117 },
+ { 43888, 43967 },
};
static const URange32 Miao_range32[] = {
{ 93952, 94020 },
@@ -4436,8 +4904,8 @@ static const URange16 Oriya_range16[] = {
{ 2918, 2935 },
};
static const URange32 Sharada_range32[] = {
- { 70016, 70088 },
- { 70096, 70105 },
+ { 70016, 70093 },
+ { 70096, 70111 },
};
static const URange16 Gujarati_range16[] = {
{ 2689, 2691 },
@@ -4453,11 +4921,20 @@ static const URange16 Gujarati_range16[] = {
{ 2768, 2768 },
{ 2784, 2787 },
{ 2790, 2801 },
+ { 2809, 2809 },
+};
+static const URange32 Modi_range32[] = {
+ { 71168, 71236 },
+ { 71248, 71257 },
};
static const URange32 Inscriptional_Pahlavi_range32[] = {
{ 68448, 68466 },
{ 68472, 68479 },
};
+static const URange32 Manichaean_range32[] = {
+ { 68288, 68326 },
+ { 68331, 68342 },
+};
static const URange16 Khmer_range16[] = {
{ 6016, 6109 },
{ 6112, 6121 },
@@ -4465,14 +4942,24 @@ static const URange16 Khmer_range16[] = {
{ 6624, 6655 },
};
static const URange32 Cuneiform_range32[] = {
- { 73728, 74606 },
- { 74752, 74850 },
- { 74864, 74867 },
+ { 73728, 74649 },
+ { 74752, 74862 },
+ { 74864, 74868 },
+ { 74880, 75075 },
+};
+static const URange32 Khudawadi_range32[] = {
+ { 70320, 70378 },
+ { 70384, 70393 },
};
static const URange16 Mandaic_range16[] = {
{ 2112, 2139 },
{ 2142, 2142 },
};
+static const URange32 Hatran_range32[] = {
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67835, 67839 },
+};
static const URange16 Syloti_Nagri_range16[] = {
{ 43008, 43051 },
};
@@ -4490,8 +4977,12 @@ static const URange32 Phoenician_range32[] = {
{ 67840, 67867 },
{ 67871, 67871 },
};
+static const URange32 Nabataean_range32[] = {
+ { 67712, 67742 },
+ { 67751, 67759 },
+};
static const URange16 Bengali_range16[] = {
- { 2433, 2435 },
+ { 2432, 2435 },
{ 2437, 2444 },
{ 2447, 2448 },
{ 2451, 2472 },
@@ -4544,6 +5035,9 @@ static const URange16 Javanese_range16[] = {
{ 43472, 43481 },
{ 43486, 43487 },
};
+static const URange32 Old_Permic_range32[] = {
+ { 66384, 66426 },
+};
static const URange16 Phags_Pa_range16[] = {
{ 43072, 43127 },
};
@@ -4556,7 +5050,7 @@ static const URange32 Cypriot_range32[] = {
{ 67647, 67647 },
};
static const URange16 Kannada_range16[] = {
- { 3202, 3203 },
+ { 3201, 3203 },
{ 3205, 3212 },
{ 3214, 3216 },
{ 3218, 3240 },
@@ -4571,6 +5065,10 @@ static const URange16 Kannada_range16[] = {
{ 3302, 3311 },
{ 3313, 3314 },
};
+static const URange32 Khojki_range32[] = {
+ { 70144, 70161 },
+ { 70163, 70205 },
+};
static const URange16 Mongolian_range16[] = {
{ 6144, 6145 },
{ 6148, 6148 },
@@ -4590,11 +5088,19 @@ static const URange16 Sinhala_range16[] = {
{ 3535, 3540 },
{ 3542, 3542 },
{ 3544, 3551 },
+ { 3558, 3567 },
{ 3570, 3572 },
};
+static const URange32 Sinhala_range32[] = {
+ { 70113, 70132 },
+};
static const URange32 Brahmi_range32[] = {
{ 69632, 69709 },
{ 69714, 69743 },
+ { 69759, 69759 },
+};
+static const URange32 Elbasan_range32[] = {
+ { 66816, 66855 },
};
static const URange32 Deseret_range32[] = {
{ 66560, 66639 },
@@ -4603,6 +5109,18 @@ static const URange16 Rejang_range16[] = {
{ 43312, 43347 },
{ 43359, 43359 },
};
+static const URange32 SignWriting_range32[] = {
+ { 120832, 121483 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+};
+static const URange32 Multani_range32[] = {
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70313 },
+};
static const URange16 Yi_range16[] = {
{ 40960, 42124 },
{ 42128, 42182 },
@@ -4643,6 +5161,11 @@ static const URange32 Linear_B_range32[] = {
{ 65616, 65629 },
{ 65664, 65786 },
};
+static const URange32 Linear_A_range32[] = {
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
+};
static const URange32 Old_Turkic_range32[] = {
{ 68608, 68680 },
};
@@ -4658,6 +5181,15 @@ static const URange32 Lydian_range32[] = {
static const URange32 Egyptian_Hieroglyphs_range32[] = {
{ 77824, 78894 },
};
+static const URange32 Caucasian_Albanian_range32[] = {
+ { 66864, 66915 },
+ { 66927, 66927 },
+};
+static const URange32 Old_Hungarian_range32[] = {
+ { 68736, 68786 },
+ { 68800, 68850 },
+ { 68858, 68863 },
+};
static const URange16 Samaritan_range16[] = {
{ 2048, 2093 },
{ 2096, 2110 },
@@ -4668,6 +5200,18 @@ static const URange16 Lisu_range16[] = {
static const URange16 Buhid_range16[] = {
{ 5952, 5971 },
};
+static const URange32 Palmyrene_range32[] = {
+ { 67680, 67711 },
+};
+static const URange32 Tirhuta_range32[] = {
+ { 70784, 70855 },
+ { 70864, 70873 },
+};
+static const URange32 Mro_range32[] = {
+ { 92736, 92766 },
+ { 92768, 92777 },
+ { 92782, 92783 },
+};
static const URange16 Common_range16[] = {
{ 0, 64 },
{ 91, 96 },
@@ -4684,11 +5228,11 @@ static const URange16 Common_range16[] = {
{ 901, 901 },
{ 903, 903 },
{ 1417, 1417 },
+ { 1541, 1541 },
{ 1548, 1548 },
- { 1563, 1563 },
+ { 1563, 1564 },
{ 1567, 1567 },
{ 1600, 1600 },
- { 1632, 1641 },
{ 1757, 1757 },
{ 2404, 2405 },
{ 3647, 3647 },
@@ -4708,21 +5252,24 @@ static const URange16 Common_range16[] = {
{ 8294, 8304 },
{ 8308, 8318 },
{ 8320, 8334 },
- { 8352, 8378 },
+ { 8352, 8382 },
{ 8448, 8485 },
{ 8487, 8489 },
{ 8492, 8497 },
{ 8499, 8525 },
{ 8527, 8543 },
- { 8585, 8585 },
- { 8592, 9203 },
+ { 8585, 8587 },
+ { 8592, 9210 },
{ 9216, 9254 },
{ 9280, 9290 },
- { 9312, 9983 },
- { 9985, 10239 },
- { 10496, 11084 },
- { 11088, 11097 },
- { 11776, 11835 },
+ { 9312, 10239 },
+ { 10496, 11123 },
+ { 11126, 11157 },
+ { 11160, 11193 },
+ { 11197, 11208 },
+ { 11210, 11217 },
+ { 11244, 11247 },
+ { 11776, 11842 },
{ 12272, 12283 },
{ 12288, 12292 },
{ 12294, 12294 },
@@ -4741,9 +5288,10 @@ static const URange16 Common_range16[] = {
{ 42752, 42785 },
{ 42888, 42890 },
{ 43056, 43065 },
+ { 43310, 43310 },
{ 43471, 43471 },
+ { 43867, 43867 },
{ 64830, 64831 },
- { 65021, 65021 },
{ 65040, 65049 },
{ 65072, 65106 },
{ 65108, 65126 },
@@ -4764,13 +5312,15 @@ static const URange32 Common_range32[] = {
{ 65847, 65855 },
{ 65936, 65947 },
{ 66000, 66044 },
+ { 66273, 66299 },
+ { 113824, 113827 },
{ 118784, 119029 },
{ 119040, 119078 },
{ 119081, 119142 },
{ 119146, 119162 },
{ 119171, 119172 },
{ 119180, 119209 },
- { 119214, 119261 },
+ { 119214, 119272 },
{ 119552, 119638 },
{ 119648, 119665 },
{ 119808, 119892 },
@@ -4797,10 +5347,10 @@ static const URange32 Common_range32[] = {
{ 126976, 127019 },
{ 127024, 127123 },
{ 127136, 127150 },
- { 127153, 127166 },
+ { 127153, 127167 },
{ 127169, 127183 },
- { 127185, 127199 },
- { 127232, 127242 },
+ { 127185, 127221 },
+ { 127232, 127244 },
{ 127248, 127278 },
{ 127280, 127339 },
{ 127344, 127386 },
@@ -4809,24 +5359,21 @@ static const URange32 Common_range32[] = {
{ 127504, 127546 },
{ 127552, 127560 },
{ 127568, 127569 },
- { 127744, 127776 },
- { 127792, 127797 },
- { 127799, 127868 },
- { 127872, 127891 },
- { 127904, 127940 },
- { 127942, 127946 },
- { 127968, 127984 },
- { 128000, 128062 },
- { 128064, 128064 },
- { 128066, 128247 },
- { 128249, 128252 },
- { 128256, 128317 },
- { 128320, 128323 },
- { 128336, 128359 },
- { 128507, 128576 },
- { 128581, 128591 },
- { 128640, 128709 },
+ { 127744, 128377 },
+ { 128379, 128419 },
+ { 128421, 128720 },
+ { 128736, 128748 },
+ { 128752, 128755 },
{ 128768, 128883 },
+ { 128896, 128980 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129296, 129304 },
+ { 129408, 129412 },
+ { 129472, 129472 },
{ 917505, 917505 },
{ 917536, 917631 },
};
@@ -4843,23 +5390,20 @@ static const URange16 Arabic_range16[] = {
{ 1536, 1540 },
{ 1542, 1547 },
{ 1549, 1562 },
- { 1564, 1564 },
{ 1566, 1566 },
{ 1568, 1599 },
{ 1601, 1610 },
- { 1622, 1631 },
- { 1642, 1647 },
+ { 1622, 1647 },
{ 1649, 1756 },
{ 1758, 1791 },
{ 1872, 1919 },
- { 2208, 2208 },
- { 2210, 2220 },
- { 2276, 2302 },
+ { 2208, 2228 },
+ { 2275, 2303 },
{ 64336, 64449 },
{ 64467, 64829 },
{ 64848, 64911 },
{ 64914, 64967 },
- { 65008, 65020 },
+ { 65008, 65021 },
{ 65136, 65140 },
{ 65142, 65276 },
};
@@ -4908,7 +5452,12 @@ static const URange32 Bamum_range32[] = {
};
static const URange16 Myanmar_range16[] = {
{ 4096, 4255 },
- { 43616, 43643 },
+ { 43488, 43518 },
+ { 43616, 43647 },
+};
+static const URange32 Siddham_range32[] = {
+ { 71040, 71093 },
+ { 71096, 71133 },
};
static const URange32 Avestan_range32[] = {
{ 68352, 68405 },
@@ -4925,127 +5474,159 @@ static const URange16 Hebrew_range16[] = {
{ 64323, 64324 },
{ 64326, 64335 },
};
+static const URange32 Psalter_Pahlavi_range32[] = {
+ { 68480, 68497 },
+ { 68505, 68508 },
+ { 68521, 68527 },
+};
static const URange32 Takri_range32[] = {
{ 71296, 71351 },
{ 71360, 71369 },
};
-// 3867 16-bit ranges, 723 32-bit ranges
+// 3949 16-bit ranges, 1133 32-bit ranges
const UGroup unicode_groups[] = {
- { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
+ { "Ahom", +1, 0, 0, Ahom_range32, 3 },
+ { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
+ { "Arabic", +1, Arabic_range16, 19, Arabic_range32, 35 },
{ "Armenian", +1, Armenian_range16, 6, 0, 0 },
{ "Avestan", +1, 0, 0, Avestan_range32, 2 },
{ "Balinese", +1, Balinese_range16, 2, 0, 0 },
{ "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 },
+ { "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 },
{ "Batak", +1, Batak_range16, 2, 0, 0 },
{ "Bengali", +1, Bengali_range16, 14, 0, 0 },
{ "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 },
- { "Brahmi", +1, 0, 0, Brahmi_range32, 2 },
+ { "Brahmi", +1, 0, 0, Brahmi_range32, 3 },
{ "Braille", +1, Braille_range16, 1, 0, 0 },
{ "Buginese", +1, Buginese_range16, 2, 0, 0 },
{ "Buhid", +1, Buhid_range16, 1, 0, 0 },
- { "C", +1, C_range16, 15, C_range32, 6 },
+ { "C", +1, C_range16, 15, C_range32, 7 },
{ "Canadian_Aboriginal", +1, Canadian_Aboriginal_range16, 2, 0, 0 },
{ "Carian", +1, 0, 0, Carian_range32, 1 },
+ { "Caucasian_Albanian", +1, 0, 0, Caucasian_Albanian_range32, 2 },
{ "Cc", +1, Cc_range16, 2, 0, 0 },
- { "Cf", +1, Cf_range16, 12, Cf_range32, 4 },
+ { "Cf", +1, Cf_range16, 12, Cf_range32, 5 },
{ "Chakma", +1, 0, 0, Chakma_range32, 2 },
{ "Cham", +1, Cham_range16, 4, 0, 0 },
- { "Cherokee", +1, Cherokee_range16, 1, 0, 0 },
+ { "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
{ "Co", +1, Co_range16, 1, Co_range32, 2 },
- { "Common", +1, Common_range16, 88, Common_range32, 70 },
+ { "Common", +1, Common_range16, 92, Common_range32, 69 },
{ "Coptic", +1, Coptic_range16, 3, 0, 0 },
{ "Cs", +1, Cs_range16, 1, 0, 0 },
- { "Cuneiform", +1, 0, 0, Cuneiform_range32, 3 },
+ { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
{ "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
{ "Cyrillic", +1, Cyrillic_range16, 7, 0, 0 },
{ "Deseret", +1, 0, 0, Deseret_range32, 1 },
- { "Devanagari", +1, Devanagari_range16, 5, 0, 0 },
+ { "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
+ { "Duployan", +1, 0, 0, Duployan_range32, 5 },
{ "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 1 },
+ { "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
{ "Ethiopic", +1, Ethiopic_range16, 32, 0, 0 },
{ "Georgian", +1, Georgian_range16, 8, 0, 0 },
{ "Glagolitic", +1, Glagolitic_range16, 2, 0, 0 },
{ "Gothic", +1, 0, 0, Gothic_range32, 1 },
- { "Greek", +1, Greek_range16, 31, Greek_range32, 2 },
- { "Gujarati", +1, Gujarati_range16, 13, 0, 0 },
+ { "Grantha", +1, 0, 0, Grantha_range32, 15 },
+ { "Greek", +1, Greek_range16, 33, Greek_range32, 3 },
+ { "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
- { "Han", +1, Han_range16, 11, Han_range32, 4 },
+ { "Han", +1, Han_range16, 11, Han_range32, 5 },
{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
+ { "Hatran", +1, 0, 0, Hatran_range32, 3 },
{ "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
{ "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 2 },
{ "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
- { "Inherited", +1, Inherited_range16, 18, Inherited_range32, 6 },
+ { "Inherited", +1, Inherited_range16, 20, Inherited_range32, 7 },
{ "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
{ "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 },
{ "Javanese", +1, Javanese_range16, 3, 0, 0 },
{ "Kaithi", +1, 0, 0, Kaithi_range32, 1 },
{ "Kannada", +1, Kannada_range16, 14, 0, 0 },
{ "Katakana", +1, Katakana_range16, 7, Katakana_range32, 1 },
- { "Kayah_Li", +1, Kayah_Li_range16, 1, 0, 0 },
+ { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
{ "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
- { "L", +1, L_range16, 370, L_range32, 116 },
+ { "Khojki", +1, 0, 0, Khojki_range32, 2 },
+ { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
+ { "L", +1, L_range16, 376, L_range32, 178 },
{ "Lao", +1, Lao_range16, 18, 0, 0 },
- { "Latin", +1, Latin_range16, 30, 0, 0 },
+ { "Latin", +1, Latin_range16, 31, 0, 0 },
{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
{ "Limbu", +1, Limbu_range16, 5, 0, 0 },
+ { "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
{ "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
{ "Lisu", +1, Lisu_range16, 1, 0, 0 },
- { "Ll", +1, Ll_range16, 582, Ll_range32, 29 },
- { "Lm", +1, Lm_range16, 51, Lm_range32, 1 },
- { "Lo", +1, Lo_range16, 286, Lo_range32, 85 },
+ { "Ll", +1, Ll_range16, 599, Ll_range32, 31 },
+ { "Lm", +1, Lm_range16, 54, Lm_range32, 2 },
+ { "Lo", +1, Lo_range16, 290, Lo_range32, 143 },
{ "Lt", +1, Lt_range16, 10, 0, 0 },
- { "Lu", +1, Lu_range16, 576, Lu_range32, 32 },
+ { "Lu", +1, Lu_range16, 591, Lu_range32, 34 },
{ "Lycian", +1, 0, 0, Lycian_range32, 1 },
{ "Lydian", +1, 0, 0, Lydian_range32, 2 },
- { "M", +1, M_range16, 180, M_range32, 24 },
+ { "M", +1, M_range16, 180, M_range32, 56 },
+ { "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
{ "Malayalam", +1, Malayalam_range16, 11, 0, 0 },
{ "Mandaic", +1, Mandaic_range16, 2, 0, 0 },
- { "Mc", +1, Mc_range16, 111, Mc_range32, 15 },
- { "Me", +1, Me_range16, 4, 0, 0 },
+ { "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
+ { "Mc", +1, Mc_range16, 109, Mc_range32, 38 },
+ { "Me", +1, Me_range16, 5, 0, 0 },
{ "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
- { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 2 },
+ { "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 },
+ { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
{ "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 },
{ "Miao", +1, 0, 0, Miao_range32, 3 },
- { "Mn", +1, Mn_range16, 194, Mn_range32, 27 },
+ { "Mn", +1, Mn_range16, 200, Mn_range32, 66 },
+ { "Modi", +1, 0, 0, Modi_range32, 2 },
{ "Mongolian", +1, Mongolian_range16, 6, 0, 0 },
- { "Myanmar", +1, Myanmar_range16, 2, 0, 0 },
- { "N", +1, N_range16, 64, N_range32, 24 },
- { "Nd", +1, Nd_range16, 35, Nd_range32, 7 },
+ { "Mro", +1, 0, 0, Mro_range32, 3 },
+ { "Multani", +1, 0, 0, Multani_range32, 5 },
+ { "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
+ { "N", +1, N_range16, 66, N_range32, 45 },
+ { "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
+ { "Nd", +1, Nd_range16, 37, Nd_range32, 14 },
{ "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
{ "Nko", +1, Nko_range16, 1, 0, 0 },
{ "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
- { "No", +1, No_range16, 28, No_range32, 14 },
+ { "No", +1, No_range16, 28, No_range32, 30 },
{ "Ogham", +1, Ogham_range16, 1, 0, 0 },
{ "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 },
- { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 },
+ { "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 },
+ { "Old_Italic", +1, 0, 0, Old_Italic_range32, 1 },
+ { "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 },
+ { "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 },
{ "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 },
{ "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 },
{ "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 },
{ "Oriya", +1, Oriya_range16, 14, 0, 0 },
{ "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
- { "P", +1, P_range16, 126, P_range32, 15 },
+ { "P", +1, P_range16, 127, P_range32, 34 },
+ { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
+ { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 },
+ { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 },
{ "Pc", +1, Pc_range16, 6, 0, 0 },
- { "Pd", +1, Pd_range16, 16, 0, 0 },
+ { "Pd", +1, Pd_range16, 17, 0, 0 },
{ "Pe", +1, Pe_range16, 72, 0, 0 },
{ "Pf", +1, Pf_range16, 10, 0, 0 },
{ "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
{ "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
{ "Pi", +1, Pi_range16, 11, 0, 0 },
- { "Po", +1, Po_range16, 120, Po_range32, 15 },
- { "Ps", +1, Ps_range16, 74, 0, 0 },
+ { "Po", +1, Po_range16, 123, Po_range32, 34 },
+ { "Ps", +1, Ps_range16, 75, 0, 0 },
+ { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
{ "Rejang", +1, Rejang_range16, 2, 0, 0 },
{ "Runic", +1, Runic_range16, 2, 0, 0 },
- { "S", +1, S_range16, 143, S_range32, 56 },
+ { "S", +1, S_range16, 148, S_range32, 66 },
{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
{ "Sc", +1, Sc_range16, 17, 0, 0 },
{ "Sharada", +1, 0, 0, Sharada_range32, 2 },
{ "Shavian", +1, 0, 0, Shavian_range32, 1 },
- { "Sinhala", +1, Sinhala_range16, 11, 0, 0 },
- { "Sk", +1, Sk_range16, 27, 0, 0 },
+ { "Siddham", +1, 0, 0, Siddham_range32, 2 },
+ { "SignWriting", +1, 0, 0, SignWriting_range32, 3 },
+ { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
+ { "Sk", +1, Sk_range16, 28, Sk_range32, 1 },
{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
- { "So", +1, So_range16, 108, So_range32, 45 },
+ { "So", +1, So_range16, 114, So_range32, 56 },
{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
{ "Sundanese", +1, Sundanese_range16, 2, 0, 0 },
{ "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 },
@@ -5057,20 +5638,22 @@ const UGroup unicode_groups[] = {
{ "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 },
{ "Takri", +1, 0, 0, Takri_range32, 2 },
{ "Tamil", +1, Tamil_range16, 16, 0, 0 },
- { "Telugu", +1, Telugu_range16, 14, 0, 0 },
+ { "Telugu", +1, Telugu_range16, 13, 0, 0 },
{ "Thaana", +1, Thaana_range16, 1, 0, 0 },
{ "Thai", +1, Thai_range16, 2, 0, 0 },
{ "Tibetan", +1, Tibetan_range16, 7, 0, 0 },
{ "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 },
+ { "Tirhuta", +1, 0, 0, Tirhuta_range32, 2 },
{ "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 },
{ "Vai", +1, Vai_range16, 1, 0, 0 },
+ { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 },
{ "Yi", +1, Yi_range16, 2, 0, 0 },
{ "Z", +1, Z_range16, 8, 0, 0 },
{ "Zl", +1, Zl_range16, 1, 0, 0 },
{ "Zp", +1, Zp_range16, 1, 0, 0 },
{ "Zs", +1, Zs_range16, 7, 0, 0 },
};
-const int num_unicode_groups = 138;
+const int num_unicode_groups = 167;
} // namespace re2
diff --git a/re2_test.bzl b/re2_test.bzl
index a52cd9f..8dafbd5 100644
--- a/re2_test.bzl
+++ b/re2_test.bzl
@@ -3,12 +3,13 @@
# license that can be found in the LICENSE file.
# Define a bazel macro that creates cc_test for re2.
-def re2_test(name, deps=[]):
+def re2_test(name, deps=[], size="medium"):
native.cc_test(
name=name,
srcs=["re2/testing/%s.cc" % (name)],
deps=[
":re2",
":test",
- ] + deps
+ ] + deps,
+ size = size,
)
diff --git a/util/atomicops.h b/util/atomicops.h
index 6007b56..dc944e7 100644
--- a/util/atomicops.h
+++ b/util/atomicops.h
@@ -53,19 +53,19 @@ static inline void WriteMemoryBarrier() {
#elif defined(__ppc__) || defined(__powerpc64__)
static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("eieio" : : : "memory");
+ __asm__ __volatile__("lwsync" : : : "memory");
}
-#elif defined(__alpha__)
+#elif defined(__aarch64__)
static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("wmb" : : : "memory");
+ __asm__ __volatile__("dmb st" : : : "memory");
}
-#elif defined(__aarch64__)
+#elif defined(__alpha__)
static inline void WriteMemoryBarrier() {
- __asm__ __volatile__("dmb st" : : : "memory");
+ __asm__ __volatile__("wmb" : : : "memory");
}
#elif defined(__arm__) && defined(__linux__)
@@ -80,36 +80,28 @@ static inline void WriteMemoryBarrier() {
#include <intrin.h>
#include <windows.h>
+static inline void WriteMemoryBarrier() {
#if defined(_M_IX86) || defined(_M_X64)
-
-// x86 and x64 CPUs have a strong memory model that prohibits most types of
-// reordering, so a non-instruction intrinsic to suppress compiler reordering is
-// sufficient. _WriteBarrier is deprecated but is still appropriate for the
-// "old compiler" path (pre C++11).
-inline void WriteMemoryBarrier() {
+ // x86 and x64 CPUs have a strong memory model that prohibits most types of
+ // reordering, so a non-instruction intrinsic to suppress compiler reordering
+ // is sufficient. _WriteBarrier is deprecated, but is still appropriate for
+ // the "old compiler" path (pre C++11).
_WriteBarrier();
-}
-
#else
-
-// Windows
-inline void WriteMemoryBarrier() {
LONG x;
::InterlockedExchange(&x, 0);
-}
-
#endif
+}
#elif defined(OS_NACL)
-// Native Client
-inline void WriteMemoryBarrier() {
+static inline void WriteMemoryBarrier() {
__sync_synchronize();
}
#elif defined(__mips__)
-inline void WriteMemoryBarrier() {
+static inline void WriteMemoryBarrier() {
__asm__ __volatile__("sync" : : : "memory");
}
@@ -148,7 +140,13 @@ static inline void MaybeReadMemoryBarrier() {}
// Read barrier for various targets.
-#if defined(__aarch64__)
+#if defined(__ppc__) || defined(__powerpc64__)
+
+static inline void ReadMemoryBarrier() {
+ __asm__ __volatile__("lwsync" : : : "memory");
+}
+
+#elif defined(__aarch64__)
static inline void ReadMemoryBarrier() {
__asm__ __volatile__("dmb ld" : : : "memory");
@@ -162,7 +160,7 @@ static inline void ReadMemoryBarrier() {
#elif defined(__mips__)
-inline void ReadMemoryBarrier() {
+static inline void ReadMemoryBarrier() {
__asm__ __volatile__("sync" : : : "memory");
}
diff --git a/util/benchmark.cc b/util/benchmark.cc
index 03dbda4..b77e22d 100644
--- a/util/benchmark.cc
+++ b/util/benchmark.cc
@@ -124,9 +124,9 @@ void RunBench(Benchmark* b, int nthread, int siz) {
while(ns < (int)1e9 && n < (int)1e9) {
last = n;
if(ns/n == 0)
- n = 1e9;
+ n = (int)1e9;
else
- n = 1e9 / (ns/n);
+ n = (int)1e9 / static_cast<int>(ns/n);
n = max(last+1, min(n+n/2, 100*last));
n = round(n);
diff --git a/util/logging.h b/util/logging.h
index 7812ecd..feac199 100644
--- a/util/logging.h
+++ b/util/logging.h
@@ -85,6 +85,11 @@ class LogMessage {
DISALLOW_COPY_AND_ASSIGN(LogMessage);
};
+#ifdef _WIN32
+#pragma warning(push)
+#pragma warning(disable: 4722) // destructor never returns
+#endif
+
class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
@@ -97,4 +102,8 @@ class LogMessageFatal : public LogMessage {
DISALLOW_COPY_AND_ASSIGN(LogMessageFatal);
};
+#ifdef _WIN32
+#pragma warning(pop)
+#endif
+
#endif // RE2_UTIL_LOGGING_H__
diff --git a/util/mutex.h b/util/mutex.h
index 9cb6de3..b479e48 100644
--- a/util/mutex.h
+++ b/util/mutex.h
@@ -55,7 +55,9 @@ namespace re2 {
# include <pthread.h>
typedef pthread_mutex_t MutexType;
#elif defined(_WIN32)
-# define WIN32_LEAN_AND_MEAN // We only need minimal includes
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN // We only need minimal includes
+# endif
# ifdef GMUTEX_TRYLOCK
// We need Windows NT or later for TryEnterCriticalSection(). If you
// don't need that functionality, you can remove these _WIN32_WINNT
diff --git a/util/pcre.cc b/util/pcre.cc
index b52236f..9a3f32d 100644
--- a/util/pcre.cc
+++ b/util/pcre.cc
@@ -384,10 +384,10 @@ int PCRE::GlobalReplace(string *str,
int count = 0;
int vec[kVecSize] = {};
string out;
- size_t start = 0;
+ int start = 0;
bool last_match_was_empty_string = false;
- for (; start <= str->length();) {
+ while (start <= static_cast<int>(str->size())) {
// If the previous match was for the empty string, we shouldn't
// just match again: we'll match in the same way and get an
// infinite loop. Instead, we do the match in a special way:
@@ -403,18 +403,19 @@ int PCRE::GlobalReplace(string *str,
matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
vec, kVecSize);
if (matches <= 0) {
- if (start < str->length())
+ if (start < static_cast<int>(str->size()))
out.push_back((*str)[start]);
start++;
last_match_was_empty_string = false;
continue;
}
} else {
- matches = pattern.TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
+ matches = pattern.TryMatch(*str, start, UNANCHORED, true,
+ vec, kVecSize);
if (matches <= 0)
break;
}
- size_t matchstart = vec[0], matchend = vec[1];
+ int matchstart = vec[0], matchend = vec[1];
assert(matchstart >= start);
assert(matchend >= matchstart);
@@ -428,8 +429,8 @@ int PCRE::GlobalReplace(string *str,
if (count == 0)
return 0;
- if (start < str->length())
- out.append(*str, start, str->length() - start);
+ if (start < static_cast<int>(str->size()))
+ out.append(*str, start, static_cast<int>(str->size()) - start);
swap(out, *str);
return count;
}
@@ -484,7 +485,7 @@ string PCRE::QuoteMeta(const StringPiece& unquoted) {
/***** Actual matching and rewriting code *****/
bool PCRE::HitLimit() {
- return hit_limit_;
+ return hit_limit_ != 0;
}
void PCRE::ClearHitLimit() {
@@ -632,9 +633,9 @@ bool PCRE::DoMatch(const StringPiece& text,
const Arg* const args[],
int n) const {
assert(n >= 0);
- size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
- // (as for kVecSize)
- int *vec = new int[vecsize];
+ const int vecsize = (1 + n) * 3; // results + PCRE workspace
+ // (as for kVecSize)
+ int* vec = new int[vecsize];
bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
delete[] vec;
return b;
@@ -840,7 +841,7 @@ bool PCRE::Arg::parse_short_radix(const char* str,
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
if ((short)r != r) return false; // Out of range
if (dest == NULL) return true;
- *(reinterpret_cast<short*>(dest)) = r;
+ *(reinterpret_cast<short*>(dest)) = (short)r;
return true;
}
@@ -852,7 +853,7 @@ bool PCRE::Arg::parse_ushort_radix(const char* str,
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
if ((ushort)r != r) return false; // Out of range
if (dest == NULL) return true;
- *(reinterpret_cast<unsigned short*>(dest)) = r;
+ *(reinterpret_cast<unsigned short*>(dest)) = (ushort)r;
return true;
}
diff --git a/util/sparse_array.h b/util/sparse_array.h
index 8bc243b..8f71fa0 100644
--- a/util/sparse_array.h
+++ b/util/sparse_array.h
@@ -220,18 +220,25 @@ class SparseArray {
// and at the beginning and end of all public non-const member functions.
inline void DebugCheckInvariants() const;
+ static bool InitMemory() {
+#ifdef MEMORY_SANITIZER
+ return true;
+#else
+ return RunningOnValgrind();
+#endif
+ }
+
int size_;
int max_size_;
int* sparse_to_dense_;
vector<IndexValue> dense_;
- bool valgrind_;
DISALLOW_COPY_AND_ASSIGN(SparseArray);
};
template<typename Value>
SparseArray<Value>::SparseArray()
- : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(), valgrind_(RunningOnValgrind()) {}
+ : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_() {}
// IndexValue pairs: exposed in SparseArray::iterator.
template<typename Value>
@@ -272,16 +279,22 @@ void SparseArray<Value>::resize(int new_max_size) {
int* a = new int[new_max_size];
if (sparse_to_dense_) {
memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
- // Don't need to zero the memory but appease Valgrind.
- if (valgrind_) {
- for (int i = max_size_; i < new_max_size; i++)
- a[i] = 0xababababU;
- }
delete[] sparse_to_dense_;
}
sparse_to_dense_ = a;
dense_.resize(new_max_size);
+
+ // These don't need to be initialized for correctness,
+ // but Valgrind will warn about use of uninitialized memory,
+ // so initialize the new memory when compiling debug binaries.
+ // Initialize it to garbage to detect bugs in the future.
+ if (InitMemory()) {
+ for (int i = max_size_; i < new_max_size; i++) {
+ sparse_to_dense_[i] = 0xababababU;
+ dense_[i].index_ = 0xababababU;
+ }
+ }
}
max_size_ = new_max_size;
if (size_ > max_size_)
@@ -418,10 +431,9 @@ void SparseArray<Value>::create_index(int i) {
template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
max_size_ = max_size;
sparse_to_dense_ = new int[max_size];
- valgrind_ = RunningOnValgrind();
dense_.resize(max_size);
// Don't need to zero the new memory, but appease Valgrind.
- if (valgrind_) {
+ if (InitMemory()) {
for (int i = 0; i < max_size; i++) {
sparse_to_dense_[i] = 0xababababU;
dense_[i].index_ = 0xababababU;
diff --git a/util/sparse_set.h b/util/sparse_set.h
index ff592a8..9dd41ee 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h
@@ -51,28 +51,18 @@
namespace re2 {
-static bool InitMemory() {
-#ifdef MEMORY_SANITIZER
- return true;
-#else
- return RunningOnValgrind();
-#endif
-}
-
class SparseSet {
public:
SparseSet()
- : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL),
- init_memory_(InitMemory()) {}
+ : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL) {}
SparseSet(int max_size) {
max_size_ = max_size;
sparse_to_dense_ = new int[max_size];
dense_ = new int[max_size];
- init_memory_ = InitMemory();
// Don't need to zero the memory, but do so anyway
// to appease Valgrind.
- if (init_memory_) {
+ if (InitMemory()) {
for (int i = 0; i < max_size; i++) {
dense_[i] = 0xababababU;
sparse_to_dense_[i] = 0xababababU;
@@ -104,7 +94,7 @@ class SparseSet {
int* a = new int[new_max_size];
if (sparse_to_dense_) {
memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
- if (init_memory_) {
+ if (InitMemory()) {
for (int i = max_size_; i < new_max_size; i++)
a[i] = 0xababababU;
}
@@ -115,7 +105,7 @@ class SparseSet {
a = new int[new_max_size];
if (dense_) {
memmove(a, dense_, size_*sizeof a[0]);
- if (init_memory_) {
+ if (InitMemory()) {
for (int i = size_; i < new_max_size; i++)
a[i] = 0xababababU;
}
@@ -174,11 +164,18 @@ class SparseSet {
static bool less(int a, int b) { return a < b; }
private:
+ static bool InitMemory() {
+#ifdef MEMORY_SANITIZER
+ return true;
+#else
+ return RunningOnValgrind();
+#endif
+ }
+
int size_;
int max_size_;
int* sparse_to_dense_;
int* dense_;
- bool init_memory_;
DISALLOW_COPY_AND_ASSIGN(SparseSet);
};
diff --git a/util/strutil.cc b/util/strutil.cc
index 19a4640..d3a0249 100644
--- a/util/strutil.cc
+++ b/util/strutil.cc
@@ -83,7 +83,7 @@ string PrefixSuccessor(const StringPiece& prefix) {
// 255's, we just return the empty string.
bool done = false;
string limit(prefix.data(), prefix.size());
- int index = limit.length() - 1;
+ int index = static_cast<int>(limit.size()) - 1;
while (!done && index >= 0) {
if ((limit[index]&255) == 255) {
limit.erase(index);
diff --git a/util/test.cc b/util/test.cc
index 85055b2..b0167e7 100644
--- a/util/test.cc
+++ b/util/test.cc
@@ -23,18 +23,6 @@ void RegisterTest(void (*fn)(void), const char *name) {
tests[ntests++].name = name;
}
-namespace re2 {
-int64 VirtualProcessSize() {
-#ifdef _WIN32
- return 0;
-#else
- struct rusage ru;
- getrusage(RUSAGE_SELF, &ru);
- return (int64)ru.ru_maxrss*1024;
-#endif
-}
-} // namespace re2
-
int main(int argc, char **argv) {
for (int i = 0; i < ntests; i++) {
printf("%s\n", tests[i].name);
diff --git a/util/test.h b/util/test.h
index 45ca6fa..3701eab 100644
--- a/util/test.h
+++ b/util/test.h
@@ -31,20 +31,15 @@ class TestRegisterer {
#define EXPECT_GE CHECK_GE
#define EXPECT_FALSE(x) CHECK(!(x))
-const bool UsingMallocCounter = false;
namespace testing {
class MallocCounter {
public:
- MallocCounter(int x) { }
+ MallocCounter(int x) {}
static const int THIS_THREAD_ONLY = 0;
long long HeapGrowth() { return 0; }
long long PeakHeapGrowth() { return 0; }
- void Reset() { }
+ void Reset() {}
};
} // namespace testing
-namespace re2 {
-int64 VirtualProcessSize();
-} // namespace re2
-
#endif // RE2_UTIL_TEST_H__
diff --git a/util/util.h b/util/util.h
index d4b072d..c59d91f 100644
--- a/util/util.h
+++ b/util/util.h
@@ -70,9 +70,6 @@ using std::unordered_set;
#define strtoull _strtoui64
#define vsnprintf vsnprintf_s
-#pragma warning(disable: 4018) // signed/unsigned mismatch
-#pragma warning(disable: 4800) // conversion from int to bool
-
#endif
namespace re2 {
@@ -141,7 +138,7 @@ static inline uint64 Hash64StringWithSeed(const char* s, int len, uint32 seed) {
return ((uint64)x << 32) | y;
}
-int RunningOnValgrind();
+bool RunningOnValgrind();
} // namespace re2
diff --git a/util/valgrind.cc b/util/valgrind.cc
index 82f9a4c..19ec22e 100644
--- a/util/valgrind.cc
+++ b/util/valgrind.cc
@@ -9,17 +9,11 @@
namespace re2 {
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
-int RunningOnValgrind() {
-#if __has_feature(memory_sanitizer)
- return true;
-#elif defined(RUNNING_ON_VALGRIND)
- return RUNNING_ON_VALGRIND;
+bool RunningOnValgrind() {
+#ifdef RUNNING_ON_VALGRIND
+ return RUNNING_ON_VALGRIND != 0;
#else
- return 0;
+ return false;
#endif
}