From 2918ba0a6307898d30f1db0dda88f60f1b00e906 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 23 Sep 2020 15:43:41 +0900 Subject: Imported Upstream version 20200401 --- doc/syntax.html | 4 + doc/syntax.txt | 4 + re2/re2.cc | 32 +++-- re2/set.cc | 7 +- re2/unicode.py | 2 +- re2/unicode_casefold.cc | 12 +- re2/unicode_groups.cc | 361 +++++++++++++++++++++++++++++++----------------- util/mutex.h | 23 ++- 8 files changed, 295 insertions(+), 150 deletions(-) diff --git a/doc/syntax.html b/doc/syntax.html index 8a268a1..47541e5 100644 --- a/doc/syntax.html +++ b/doc/syntax.html @@ -264,6 +264,7 @@ Chakma Cham Cherokee +Chorasmian Common Coptic Cuneiform @@ -271,6 +272,7 @@ Cyrillic Deseret Devanagari +Dives_Akuru Dogra Duployan Egyptian_Hieroglyphs @@ -302,6 +304,7 @@ Katakana Kayah_Li Kharoshthi +Khitan_Small_Script Khmer Khojki Khudawadi @@ -391,6 +394,7 @@ Vai Wancho Warang_Citi +Yezidi Yi Zanabazar_Square diff --git a/doc/syntax.txt b/doc/syntax.txt index cb04bbf..ce87866 100644 --- a/doc/syntax.txt +++ b/doc/syntax.txt @@ -253,6 +253,7 @@ Caucasian_Albanian Chakma Cham Cherokee +Chorasmian Common Coptic Cuneiform @@ -260,6 +261,7 @@ Cypriot Cyrillic Deseret Devanagari +Dives_Akuru Dogra Duployan Egyptian_Hieroglyphs @@ -291,6 +293,7 @@ Kannada Katakana Kayah_Li Kharoshthi +Khitan_Small_Script Khmer Khojki Khudawadi @@ -380,6 +383,7 @@ Ugaritic Vai Wancho Warang_Citi +Yezidi Yi Zanabazar_Square diff --git a/re2/re2.cc b/re2/re2.cc index 8c9ed4d..eb8ab3e 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -689,9 +689,11 @@ bool RE2::Match(const StringPiece& text, Prog::kLongestMatch, matchp, &dfa_failed, NULL)) { if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", " - << "bytemap range " << prog->bytemap_range() << ", " - << "list count " << prog->list_count(); + LOG(ERROR) << "DFA out of memory: " + << "pattern length " << pattern_.size() << ", " + << "program size " << prog->size() << ", " + << "list count " << prog->list_count() << ", " + << "bytemap range " << prog->bytemap_range(); // Fall back to NFA below. skipped_test = true; break; @@ -707,9 +709,11 @@ bool RE2::Match(const StringPiece& text, matchp, &dfa_failed, NULL)) { if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " - << "bytemap range " << prog_->bytemap_range() << ", " - << "list count " << prog_->list_count(); + LOG(ERROR) << "DFA out of memory: " + << "pattern length " << pattern_.size() << ", " + << "program size " << prog_->size() << ", " + << "list count " << prog_->list_count() << ", " + << "bytemap range " << prog_->bytemap_range(); // Fall back to NFA below. skipped_test = true; break; @@ -731,9 +735,11 @@ bool RE2::Match(const StringPiece& text, Prog::kLongestMatch, &match, &dfa_failed, NULL)) { if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", " - << "bytemap range " << prog->bytemap_range() << ", " - << "list count " << prog->list_count(); + LOG(ERROR) << "DFA out of memory: " + << "pattern length " << pattern_.size() << ", " + << "program size " << prog->size() << ", " + << "list count " << prog->list_count() << ", " + << "bytemap range " << prog->bytemap_range(); // Fall back to NFA below. skipped_test = true; break; @@ -771,9 +777,11 @@ bool RE2::Match(const StringPiece& text, &match, &dfa_failed, NULL)) { if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " - << "bytemap range " << prog_->bytemap_range() << ", " - << "list count " << prog_->list_count(); + LOG(ERROR) << "DFA out of memory: " + << "pattern length " << pattern_.size() << ", " + << "program size " << prog_->size() << ", " + << "list count " << prog_->list_count() << ", " + << "bytemap range " << prog_->bytemap_range(); // Fall back to NFA below. skipped_test = true; break; diff --git a/re2/set.cc b/re2/set.cc index 69af666..87db6b7 100644 --- a/re2/set.cc +++ b/re2/set.cc @@ -124,9 +124,10 @@ bool RE2::Set::Match(const StringPiece& text, std::vector* v, NULL, &dfa_failed, matches.get()); if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " - << "bytemap range " << prog_->bytemap_range() << ", " - << "list count " << prog_->list_count(); + LOG(ERROR) << "DFA out of memory: " + << "program size " << prog_->size() << ", " + << "list count " << prog_->list_count() << ", " + << "bytemap range " << prog_->bytemap_range(); if (error_info != NULL) error_info->kind = kOutOfMemory; return false; diff --git a/re2/unicode.py b/re2/unicode.py index 56ca811..e0f33ef 100644 --- a/re2/unicode.py +++ b/re2/unicode.py @@ -13,7 +13,7 @@ import re from six.moves import urllib # Directory or URL where Unicode tables reside. -_UNICODE_DIR = "https://www.unicode.org/Public/12.1.0/ucd" +_UNICODE_DIR = "https://www.unicode.org/Public/13.0.0/ucd" # Largest valid Unicode code value. _RUNE_MAX = 0x10FFFF diff --git a/re2/unicode_casefold.cc b/re2/unicode_casefold.cc index 4ea2533..8424107 100644 --- a/re2/unicode_casefold.cc +++ b/re2/unicode_casefold.cc @@ -7,7 +7,7 @@ namespace re2 { -// 1381 groups, 2792 pairs, 356 ranges +// 1384 groups, 2798 pairs, 358 ranges const CaseFold unicode_casefold[] = { { 65, 90, 32 }, { 97, 106, -32 }, @@ -349,6 +349,8 @@ const CaseFold unicode_casefold[] = { { 42948, 42948, -48 }, { 42949, 42949, -42307 }, { 42950, 42950, -35384 }, + { 42951, 42954, OddEven }, + { 42997, 42998, OddEven }, { 43859, 43859, -928 }, { 43888, 43967, -38864 }, { 65313, 65338, 32 }, @@ -366,9 +368,9 @@ const CaseFold unicode_casefold[] = { { 125184, 125217, 34 }, { 125218, 125251, -34 }, }; -const int num_unicode_casefold = 356; +const int num_unicode_casefold = 358; -// 1381 groups, 1411 pairs, 198 ranges +// 1384 groups, 1414 pairs, 200 ranges const CaseFold unicode_tolower[] = { { 65, 90, 32 }, { 181, 181, 775 }, @@ -560,6 +562,8 @@ const CaseFold unicode_tolower[] = { { 42948, 42948, -48 }, { 42949, 42949, -42307 }, { 42950, 42950, -35384 }, + { 42951, 42953, OddEvenSkip }, + { 42997, 42997, OddEven }, { 43888, 43967, -38864 }, { 65313, 65338, 32 }, { 66560, 66599, 40 }, @@ -569,7 +573,7 @@ const CaseFold unicode_tolower[] = { { 93760, 93791, 32 }, { 125184, 125217, 34 }, }; -const int num_unicode_tolower = 198; +const int num_unicode_tolower = 200; diff --git a/re2/unicode_groups.cc b/re2/unicode_groups.cc index 63e6116..7b7a3c6 100644 --- a/re2/unicode_groups.cc +++ b/re2/unicode_groups.cc @@ -125,7 +125,7 @@ static const URange16 L_range16[] = { { 2112, 2136 }, { 2144, 2154 }, { 2208, 2228 }, - { 2230, 2237 }, + { 2230, 2247 }, { 2308, 2361 }, { 2365, 2365 }, { 2384, 2384 }, @@ -201,7 +201,7 @@ static const URange16 L_range16[] = { { 3294, 3294 }, { 3296, 3297 }, { 3313, 3314 }, - { 3333, 3340 }, + { 3332, 3340 }, { 3342, 3344 }, { 3346, 3386 }, { 3389, 3389 }, @@ -372,10 +372,10 @@ static const URange16 L_range16[] = { { 12540, 12543 }, { 12549, 12591 }, { 12593, 12686 }, - { 12704, 12730 }, + { 12704, 12735 }, { 12784, 12799 }, - { 13312, 19893 }, - { 19968, 40943 }, + { 13312, 19903 }, + { 19968, 40956 }, { 40960, 42124 }, { 42192, 42237 }, { 42240, 42508 }, @@ -387,8 +387,8 @@ static const URange16 L_range16[] = { { 42775, 42783 }, { 42786, 42888 }, { 42891, 42943 }, - { 42946, 42950 }, - { 42999, 43009 }, + { 42946, 42954 }, + { 42997, 43009 }, { 43011, 43013 }, { 43015, 43018 }, { 43020, 43042 }, @@ -425,7 +425,7 @@ static const URange16 L_range16[] = { { 43808, 43814 }, { 43816, 43822 }, { 43824, 43866 }, - { 43868, 43879 }, + { 43868, 43881 }, { 43888, 44002 }, { 44032, 55203 }, { 55216, 55238 }, @@ -511,15 +511,19 @@ static const URange32 L_range32[] = { { 68736, 68786 }, { 68800, 68850 }, { 68864, 68899 }, + { 69248, 69289 }, + { 69296, 69297 }, { 69376, 69404 }, { 69415, 69415 }, { 69424, 69445 }, + { 69552, 69572 }, { 69600, 69622 }, { 69635, 69687 }, { 69763, 69807 }, { 69840, 69864 }, { 69891, 69926 }, { 69956, 69956 }, + { 69959, 69959 }, { 69968, 70002 }, { 70006, 70006 }, { 70019, 70066 }, @@ -545,7 +549,7 @@ static const URange32 L_range32[] = { { 70493, 70497 }, { 70656, 70708 }, { 70727, 70730 }, - { 70751, 70751 }, + { 70751, 70753 }, { 70784, 70831 }, { 70852, 70853 }, { 70855, 70855 }, @@ -558,7 +562,13 @@ static const URange32 L_range32[] = { { 71424, 71450 }, { 71680, 71723 }, { 71840, 71903 }, - { 71935, 71935 }, + { 71935, 71942 }, + { 71945, 71945 }, + { 71948, 71955 }, + { 71957, 71958 }, + { 71960, 71983 }, + { 71999, 71999 }, + { 72001, 72001 }, { 72096, 72103 }, { 72106, 72144 }, { 72161, 72161 }, @@ -583,6 +593,7 @@ static const URange32 L_range32[] = { { 73066, 73097 }, { 73112, 73112 }, { 73440, 73458 }, + { 73648, 73648 }, { 73728, 74649 }, { 74880, 75075 }, { 77824, 78894 }, @@ -601,7 +612,8 @@ static const URange32 L_range32[] = { { 94176, 94177 }, { 94179, 94179 }, { 94208, 100343 }, - { 100352, 101106 }, + { 100352, 101589 }, + { 101632, 101640 }, { 110592, 110878 }, { 110928, 110930 }, { 110948, 110951 }, @@ -680,12 +692,13 @@ static const URange32 L_range32[] = { { 126625, 126627 }, { 126629, 126633 }, { 126635, 126651 }, - { 131072, 173782 }, + { 131072, 173789 }, { 173824, 177972 }, { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, { 194560, 195101 }, + { 196608, 201546 }, }; static const URange16 Ll_range16[] = { { 97, 122 }, @@ -1289,9 +1302,12 @@ static const URange16 Ll_range16[] = { { 42941, 42941 }, { 42943, 42943 }, { 42947, 42947 }, + { 42952, 42952 }, + { 42954, 42954 }, + { 42998, 42998 }, { 43002, 43002 }, { 43824, 43866 }, - { 43872, 43879 }, + { 43872, 43880 }, { 43888, 43967 }, { 64256, 64262 }, { 64275, 64279 }, @@ -1386,6 +1402,7 @@ static const URange16 Lm_range16[] = { { 43741, 43741 }, { 43763, 43764 }, { 43868, 43871 }, + { 43881, 43881 }, { 65392, 65392 }, { 65438, 65439 }, }; @@ -1422,7 +1439,7 @@ static const URange16 Lo_range16[] = { { 2112, 2136 }, { 2144, 2154 }, { 2208, 2228 }, - { 2230, 2237 }, + { 2230, 2247 }, { 2308, 2361 }, { 2365, 2365 }, { 2384, 2384 }, @@ -1498,7 +1515,7 @@ static const URange16 Lo_range16[] = { { 3294, 3294 }, { 3296, 3297 }, { 3313, 3314 }, - { 3333, 3340 }, + { 3332, 3340 }, { 3342, 3344 }, { 3346, 3386 }, { 3389, 3389 }, @@ -1611,10 +1628,10 @@ static const URange16 Lo_range16[] = { { 12543, 12543 }, { 12549, 12591 }, { 12593, 12686 }, - { 12704, 12730 }, + { 12704, 12735 }, { 12784, 12799 }, - { 13312, 19893 }, - { 19968, 40943 }, + { 13312, 19903 }, + { 19968, 40956 }, { 40960, 40980 }, { 40982, 42124 }, { 42192, 42231 }, @@ -1740,15 +1757,19 @@ static const URange32 Lo_range32[] = { { 68480, 68497 }, { 68608, 68680 }, { 68864, 68899 }, + { 69248, 69289 }, + { 69296, 69297 }, { 69376, 69404 }, { 69415, 69415 }, { 69424, 69445 }, + { 69552, 69572 }, { 69600, 69622 }, { 69635, 69687 }, { 69763, 69807 }, { 69840, 69864 }, { 69891, 69926 }, { 69956, 69956 }, + { 69959, 69959 }, { 69968, 70002 }, { 70006, 70006 }, { 70019, 70066 }, @@ -1774,7 +1795,7 @@ static const URange32 Lo_range32[] = { { 70493, 70497 }, { 70656, 70708 }, { 70727, 70730 }, - { 70751, 70751 }, + { 70751, 70753 }, { 70784, 70831 }, { 70852, 70853 }, { 70855, 70855 }, @@ -1786,7 +1807,13 @@ static const URange32 Lo_range32[] = { { 71352, 71352 }, { 71424, 71450 }, { 71680, 71723 }, - { 71935, 71935 }, + { 71935, 71942 }, + { 71945, 71945 }, + { 71948, 71955 }, + { 71957, 71958 }, + { 71960, 71983 }, + { 71999, 71999 }, + { 72001, 72001 }, { 72096, 72103 }, { 72106, 72144 }, { 72161, 72161 }, @@ -1811,6 +1838,7 @@ static const URange32 Lo_range32[] = { { 73066, 73097 }, { 73112, 73112 }, { 73440, 73458 }, + { 73648, 73648 }, { 73728, 74649 }, { 74880, 75075 }, { 77824, 78894 }, @@ -1824,7 +1852,8 @@ static const URange32 Lo_range32[] = { { 93952, 94026 }, { 94032, 94032 }, { 94208, 100343 }, - { 100352, 101106 }, + { 100352, 101589 }, + { 101632, 101640 }, { 110592, 110878 }, { 110928, 110930 }, { 110948, 110951 }, @@ -1870,12 +1899,13 @@ static const URange32 Lo_range32[] = { { 126625, 126627 }, { 126629, 126633 }, { 126635, 126651 }, - { 131072, 173782 }, + { 131072, 173789 }, { 173824, 177972 }, { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, { 194560, 195101 }, + { 196608, 201546 }, }; static const URange16 Lt_range16[] = { { 453, 453 }, @@ -2487,7 +2517,9 @@ static const URange16 Lu_range16[] = { { 42940, 42940 }, { 42942, 42942 }, { 42946, 42946 }, - { 42948, 42950 }, + { 42948, 42951 }, + { 42953, 42953 }, + { 42997, 42997 }, { 65313, 65338 }, }; static const URange32 Lu_range32[] = { @@ -2588,7 +2620,7 @@ static const URange16 M_range16[] = { { 2878, 2884 }, { 2887, 2888 }, { 2891, 2893 }, - { 2902, 2903 }, + { 2901, 2903 }, { 2914, 2915 }, { 2946, 2946 }, { 3006, 3010 }, @@ -2615,7 +2647,7 @@ static const URange16 M_range16[] = { { 3402, 3405 }, { 3415, 3415 }, { 3426, 3427 }, - { 3458, 3459 }, + { 3457, 3459 }, { 3530, 3530 }, { 3535, 3540 }, { 3542, 3542 }, @@ -2662,7 +2694,7 @@ static const URange16 M_range16[] = { { 6741, 6750 }, { 6752, 6780 }, { 6783, 6783 }, - { 6832, 6846 }, + { 6832, 6848 }, { 6912, 6916 }, { 6964, 6980 }, { 7019, 7027 }, @@ -2691,6 +2723,7 @@ static const URange16 M_range16[] = { { 43014, 43014 }, { 43019, 43019 }, { 43043, 43047 }, + { 43052, 43052 }, { 43136, 43137 }, { 43188, 43205 }, { 43232, 43249 }, @@ -2728,6 +2761,7 @@ static const URange32 M_range32[] = { { 68159, 68159 }, { 68325, 68326 }, { 68900, 68903 }, + { 69291, 69292 }, { 69446, 69456 }, { 69632, 69634 }, { 69688, 69702 }, @@ -2740,6 +2774,7 @@ static const URange32 M_range32[] = { { 70016, 70018 }, { 70067, 70080 }, { 70089, 70092 }, + { 70094, 70095 }, { 70188, 70199 }, { 70206, 70206 }, { 70367, 70378 }, @@ -2762,6 +2797,11 @@ static const URange32 M_range32[] = { { 71339, 71351 }, { 71453, 71467 }, { 71724, 71738 }, + { 71984, 71989 }, + { 71991, 71992 }, + { 71995, 71998 }, + { 72000, 72000 }, + { 72002, 72003 }, { 72145, 72151 }, { 72154, 72160 }, { 72164, 72164 }, @@ -2789,6 +2829,8 @@ static const URange32 M_range32[] = { { 94031, 94031 }, { 94033, 94087 }, { 94095, 94098 }, + { 94180, 94180 }, + { 94192, 94193 }, { 113821, 113822 }, { 119141, 119145 }, { 119149, 119154 }, @@ -2935,6 +2977,7 @@ static const URange32 Mc_range32[] = { { 70018, 70018 }, { 70067, 70069 }, { 70079, 70080 }, + { 70094, 70094 }, { 70188, 70190 }, { 70194, 70195 }, { 70197, 70197 }, @@ -2966,6 +3009,11 @@ static const URange32 Mc_range32[] = { { 71462, 71462 }, { 71724, 71726 }, { 71736, 71736 }, + { 71984, 71989 }, + { 71991, 71992 }, + { 71997, 71997 }, + { 72000, 72000 }, + { 72002, 72002 }, { 72145, 72147 }, { 72156, 72159 }, { 72164, 72164 }, @@ -2982,6 +3030,7 @@ static const URange32 Mc_range32[] = { { 73110, 73110 }, { 73461, 73462 }, { 94033, 94087 }, + { 94192, 94193 }, { 119141, 119142 }, { 119149, 119154 }, }; @@ -3051,7 +3100,7 @@ static const URange16 Mn_range16[] = { { 2879, 2879 }, { 2881, 2884 }, { 2893, 2893 }, - { 2902, 2902 }, + { 2901, 2902 }, { 2914, 2915 }, { 2946, 2946 }, { 3008, 3008 }, @@ -3074,6 +3123,7 @@ static const URange16 Mn_range16[] = { { 3393, 3396 }, { 3405, 3405 }, { 3426, 3427 }, + { 3457, 3457 }, { 3530, 3530 }, { 3538, 3540 }, { 3542, 3542 }, @@ -3131,6 +3181,7 @@ static const URange16 Mn_range16[] = { { 6771, 6780 }, { 6783, 6783 }, { 6832, 6845 }, + { 6847, 6848 }, { 6912, 6915 }, { 6964, 6964 }, { 6966, 6970 }, @@ -3171,6 +3222,7 @@ static const URange16 Mn_range16[] = { { 43014, 43014 }, { 43019, 43019 }, { 43045, 43046 }, + { 43052, 43052 }, { 43204, 43205 }, { 43232, 43249 }, { 43263, 43263 }, @@ -3212,6 +3264,7 @@ static const URange32 Mn_range32[] = { { 68159, 68159 }, { 68325, 68326 }, { 68900, 68903 }, + { 69291, 69292 }, { 69446, 69456 }, { 69633, 69633 }, { 69688, 69702 }, @@ -3225,6 +3278,7 @@ static const URange32 Mn_range32[] = { { 70016, 70017 }, { 70070, 70078 }, { 70089, 70092 }, + { 70095, 70095 }, { 70191, 70193 }, { 70196, 70196 }, { 70198, 70199 }, @@ -3260,6 +3314,9 @@ static const URange32 Mn_range32[] = { { 71463, 71467 }, { 71727, 71735 }, { 71737, 71738 }, + { 71995, 71996 }, + { 71998, 71998 }, + { 72003, 72003 }, { 72148, 72151 }, { 72154, 72155 }, { 72160, 72160 }, @@ -3291,6 +3348,7 @@ static const URange32 Mn_range32[] = { { 92976, 92982 }, { 94031, 94031 }, { 94095, 94098 }, + { 94180, 94180 }, { 113821, 113822 }, { 119143, 119145 }, { 119163, 119170 }, @@ -3413,6 +3471,7 @@ static const URange32 N_range32[] = { { 69216, 69246 }, { 69405, 69414 }, { 69457, 69460 }, + { 69573, 69579 }, { 69714, 69743 }, { 69872, 69881 }, { 69942, 69951 }, @@ -3425,6 +3484,7 @@ static const URange32 N_range32[] = { { 71360, 71369 }, { 71472, 71483 }, { 71904, 71922 }, + { 72016, 72025 }, { 72784, 72812 }, { 73040, 73049 }, { 73120, 73129 }, @@ -3447,6 +3507,7 @@ static const URange32 N_range32[] = { { 126209, 126253 }, { 126255, 126269 }, { 127232, 127244 }, + { 130032, 130041 }, }; static const URange16 Nd_range16[] = { { 48, 57 }, @@ -3501,6 +3562,7 @@ static const URange32 Nd_range32[] = { { 71360, 71369 }, { 71472, 71481 }, { 71904, 71913 }, + { 72016, 72025 }, { 72784, 72793 }, { 73040, 73049 }, { 73120, 73129 }, @@ -3510,6 +3572,7 @@ static const URange32 Nd_range32[] = { { 123200, 123209 }, { 123632, 123641 }, { 125264, 125273 }, + { 130032, 130041 }, }; static const URange16 Nl_range16[] = { { 5870, 5872 }, @@ -3583,6 +3646,7 @@ static const URange32 No_range32[] = { { 69216, 69246 }, { 69405, 69414 }, { 69457, 69460 }, + { 69573, 69579 }, { 69714, 69733 }, { 70113, 70132 }, { 71482, 71483 }, @@ -3692,6 +3756,7 @@ static const URange16 P_range16[] = { { 11632, 11632 }, { 11776, 11822 }, { 11824, 11855 }, + { 11858, 11858 }, { 12289, 12291 }, { 12296, 12305 }, { 12308, 12319 }, @@ -3747,6 +3812,7 @@ static const URange32 P_range32[] = { { 68336, 68342 }, { 68409, 68415 }, { 68505, 68508 }, + { 69293, 69293 }, { 69461, 69465 }, { 69703, 69709 }, { 69819, 69820 }, @@ -3760,7 +3826,7 @@ static const URange32 P_range32[] = { { 70200, 70205 }, { 70313, 70313 }, { 70731, 70735 }, - { 70747, 70747 }, + { 70746, 70747 }, { 70749, 70749 }, { 70854, 70854 }, { 71105, 71127 }, @@ -3768,6 +3834,7 @@ static const URange32 P_range32[] = { { 71264, 71276 }, { 71484, 71486 }, { 71739, 71739 }, + { 72004, 72006 }, { 72162, 72162 }, { 72255, 72262 }, { 72346, 72348 }, @@ -3814,6 +3881,9 @@ static const URange16 Pd_range16[] = { { 65123, 65123 }, { 65293, 65293 }, }; +static const URange32 Pd_range32[] = { + { 69293, 69293 }, +}; static const URange16 Pe_range16[] = { { 41, 41 }, { 93, 93 }, @@ -4002,6 +4072,7 @@ static const URange16 Po_range16[] = { { 11836, 11839 }, { 11841, 11841 }, { 11843, 11855 }, + { 11858, 11858 }, { 12289, 12291 }, { 12349, 12349 }, { 12539, 12539 }, @@ -4069,7 +4140,7 @@ static const URange32 Po_range32[] = { { 70200, 70205 }, { 70313, 70313 }, { 70731, 70735 }, - { 70747, 70747 }, + { 70746, 70747 }, { 70749, 70749 }, { 70854, 70854 }, { 71105, 71127 }, @@ -4077,6 +4148,7 @@ static const URange32 Po_range32[] = { { 71264, 71276 }, { 71484, 71486 }, { 71739, 71739 }, + { 72004, 72006 }, { 72162, 72162 }, { 72255, 72262 }, { 72346, 72348 }, @@ -4274,8 +4346,9 @@ static const URange16 S_range16[] = { { 10716, 10747 }, { 10750, 11123 }, { 11126, 11157 }, - { 11160, 11263 }, + { 11159, 11263 }, { 11493, 11498 }, + { 11856, 11857 }, { 11904, 11929 }, { 11931, 12019 }, { 12032, 12245 }, @@ -4304,6 +4377,7 @@ static const URange16 S_range16[] = { { 43062, 43065 }, { 43639, 43641 }, { 43867, 43867 }, + { 43882, 43883 }, { 64297, 64297 }, { 64434, 64449 }, { 65020, 65021 }, @@ -4325,7 +4399,7 @@ static const URange32 S_range32[] = { { 65847, 65855 }, { 65913, 65929 }, { 65932, 65934 }, - { 65936, 65947 }, + { 65936, 65948 }, { 65952, 65952 }, { 66000, 66044 }, { 67703, 67704 }, @@ -4372,16 +4446,15 @@ static const URange32 S_range32[] = { { 127153, 127167 }, { 127169, 127183 }, { 127185, 127221 }, - { 127248, 127340 }, - { 127344, 127404 }, + { 127245, 127405 }, { 127462, 127490 }, { 127504, 127547 }, { 127552, 127560 }, { 127568, 127569 }, { 127584, 127589 }, - { 127744, 128725 }, + { 127744, 128727 }, { 128736, 128748 }, - { 128752, 128762 }, + { 128752, 128764 }, { 128768, 128883 }, { 128896, 128984 }, { 128992, 129003 }, @@ -4390,18 +4463,20 @@ static const URange32 S_range32[] = { { 129104, 129113 }, { 129120, 129159 }, { 129168, 129197 }, - { 129280, 129291 }, - { 129293, 129393 }, - { 129395, 129398 }, - { 129402, 129442 }, - { 129445, 129450 }, - { 129454, 129482 }, + { 129200, 129201 }, + { 129280, 129400 }, + { 129402, 129483 }, { 129485, 129619 }, { 129632, 129645 }, - { 129648, 129651 }, + { 129648, 129652 }, { 129656, 129658 }, - { 129664, 129666 }, - { 129680, 129685 }, + { 129664, 129670 }, + { 129680, 129704 }, + { 129712, 129718 }, + { 129728, 129730 }, + { 129744, 129750 }, + { 129792, 129938 }, + { 129940, 129994 }, }; static const URange16 Sc_range16[] = { { 36, 36 }, @@ -4453,6 +4528,7 @@ static const URange16 Sk_range16[] = { { 42784, 42785 }, { 42889, 42890 }, { 43867, 43867 }, + { 43882, 43883 }, { 64434, 64449 }, { 65342, 65342 }, { 65344, 65344 }, @@ -4610,8 +4686,9 @@ static const URange16 So_range16[] = { { 11077, 11078 }, { 11085, 11123 }, { 11126, 11157 }, - { 11160, 11263 }, + { 11159, 11263 }, { 11493, 11498 }, + { 11856, 11857 }, { 11904, 11929 }, { 11931, 12019 }, { 12032, 12245 }, @@ -4646,7 +4723,7 @@ static const URange32 So_range32[] = { { 65847, 65855 }, { 65913, 65929 }, { 65932, 65934 }, - { 65936, 65947 }, + { 65936, 65948 }, { 65952, 65952 }, { 66000, 66044 }, { 67703, 67704 }, @@ -4681,17 +4758,16 @@ static const URange32 So_range32[] = { { 127153, 127167 }, { 127169, 127183 }, { 127185, 127221 }, - { 127248, 127340 }, - { 127344, 127404 }, + { 127245, 127405 }, { 127462, 127490 }, { 127504, 127547 }, { 127552, 127560 }, { 127568, 127569 }, { 127584, 127589 }, { 127744, 127994 }, - { 128000, 128725 }, + { 128000, 128727 }, { 128736, 128748 }, - { 128752, 128762 }, + { 128752, 128764 }, { 128768, 128883 }, { 128896, 128984 }, { 128992, 129003 }, @@ -4700,18 +4776,20 @@ static const URange32 So_range32[] = { { 129104, 129113 }, { 129120, 129159 }, { 129168, 129197 }, - { 129280, 129291 }, - { 129293, 129393 }, - { 129395, 129398 }, - { 129402, 129442 }, - { 129445, 129450 }, - { 129454, 129482 }, + { 129200, 129201 }, + { 129280, 129400 }, + { 129402, 129483 }, { 129485, 129619 }, { 129632, 129645 }, - { 129648, 129651 }, + { 129648, 129652 }, { 129656, 129658 }, - { 129664, 129666 }, - { 129680, 129685 }, + { 129664, 129670 }, + { 129680, 129704 }, + { 129712, 129718 }, + { 129728, 129730 }, + { 129744, 129750 }, + { 129792, 129938 }, + { 129940, 129994 }, }; static const URange16 Z_range16[] = { { 32, 32 }, @@ -4764,7 +4842,7 @@ static const URange16 Arabic_range16[] = { { 1758, 1791 }, { 1872, 1919 }, { 2208, 2228 }, - { 2230, 2237 }, + { 2230, 2247 }, { 2259, 2273 }, { 2275, 2303 }, { 64336, 64449 }, @@ -4814,8 +4892,7 @@ static const URange32 Arabic_range32[] = { }; static const URange16 Armenian_range16[] = { { 1329, 1366 }, - { 1369, 1416 }, - { 1418, 1418 }, + { 1369, 1418 }, { 1421, 1423 }, { 64275, 64279 }, }; @@ -4866,7 +4943,7 @@ static const URange32 Bhaiksuki_range32[] = { static const URange16 Bopomofo_range16[] = { { 746, 747 }, { 12549, 12591 }, - { 12704, 12730 }, + { 12704, 12735 }, }; static const URange32 Brahmi_range32[] = { { 69632, 69709 }, @@ -4896,7 +4973,7 @@ static const URange32 Caucasian_Albanian_range32[] = { }; static const URange32 Chakma_range32[] = { { 69888, 69940 }, - { 69942, 69958 }, + { 69942, 69959 }, }; static const URange16 Cham_range16[] = { { 43520, 43574 }, @@ -4909,6 +4986,9 @@ static const URange16 Cherokee_range16[] = { { 5112, 5117 }, { 43888, 43967 }, }; +static const URange32 Chorasmian_range32[] = { + { 69552, 69579 }, +}; static const URange16 Common_range16[] = { { 0, 64 }, { 91, 96 }, @@ -4924,7 +5004,6 @@ static const URange16 Common_range16[] = { { 894, 894 }, { 901, 901 }, { 903, 903 }, - { 1417, 1417 }, { 1541, 1541 }, { 1548, 1548 }, { 1563, 1563 }, @@ -4963,8 +5042,8 @@ static const URange16 Common_range16[] = { { 9312, 10239 }, { 10496, 11123 }, { 11126, 11157 }, - { 11160, 11263 }, - { 11776, 11855 }, + { 11159, 11263 }, + { 11776, 11858 }, { 12272, 12283 }, { 12288, 12292 }, { 12294, 12294 }, @@ -4987,6 +5066,7 @@ static const URange16 Common_range16[] = { { 43310, 43310 }, { 43471, 43471 }, { 43867, 43867 }, + { 43882, 43883 }, { 64830, 64831 }, { 65040, 65049 }, { 65072, 65106 }, @@ -5006,7 +5086,7 @@ static const URange32 Common_range32[] = { { 65792, 65794 }, { 65799, 65843 }, { 65847, 65855 }, - { 65936, 65947 }, + { 65936, 65948 }, { 66000, 66044 }, { 66273, 66299 }, { 94178, 94179 }, @@ -5050,18 +5130,16 @@ static const URange32 Common_range32[] = { { 127153, 127167 }, { 127169, 127183 }, { 127185, 127221 }, - { 127232, 127244 }, - { 127248, 127340 }, - { 127344, 127404 }, + { 127232, 127405 }, { 127462, 127487 }, { 127489, 127490 }, { 127504, 127547 }, { 127552, 127560 }, { 127568, 127569 }, { 127584, 127589 }, - { 127744, 128725 }, + { 127744, 128727 }, { 128736, 128748 }, - { 128752, 128762 }, + { 128752, 128764 }, { 128768, 128883 }, { 128896, 128984 }, { 128992, 129003 }, @@ -5070,18 +5148,21 @@ static const URange32 Common_range32[] = { { 129104, 129113 }, { 129120, 129159 }, { 129168, 129197 }, - { 129280, 129291 }, - { 129293, 129393 }, - { 129395, 129398 }, - { 129402, 129442 }, - { 129445, 129450 }, - { 129454, 129482 }, + { 129200, 129201 }, + { 129280, 129400 }, + { 129402, 129483 }, { 129485, 129619 }, { 129632, 129645 }, - { 129648, 129651 }, + { 129648, 129652 }, { 129656, 129658 }, - { 129664, 129666 }, - { 129680, 129685 }, + { 129664, 129670 }, + { 129680, 129704 }, + { 129712, 129718 }, + { 129728, 129730 }, + { 129744, 129750 }, + { 129792, 129938 }, + { 129940, 129994 }, + { 130032, 130041 }, { 917505, 917505 }, { 917536, 917631 }, }; @@ -5123,6 +5204,16 @@ static const URange16 Devanagari_range16[] = { { 2406, 2431 }, { 43232, 43263 }, }; +static const URange32 Dives_Akuru_range32[] = { + { 71936, 71942 }, + { 71945, 71945 }, + { 71948, 71955 }, + { 71957, 71958 }, + { 71960, 71989 }, + { 71991, 71992 }, + { 71995, 72006 }, + { 72016, 72025 }, +}; static const URange32 Dogra_range32[] = { { 71680, 71739 }, }; @@ -5310,18 +5401,20 @@ static const URange16 Han_range16[] = { { 12295, 12295 }, { 12321, 12329 }, { 12344, 12347 }, - { 13312, 19893 }, - { 19968, 40943 }, + { 13312, 19903 }, + { 19968, 40956 }, { 63744, 64109 }, { 64112, 64217 }, }; static const URange32 Han_range32[] = { - { 131072, 173782 }, + { 94192, 94193 }, + { 131072, 173789 }, { 173824, 177972 }, { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, { 194560, 195101 }, + { 196608, 201546 }, }; static const URange16 Hangul_range16[] = { { 4352, 4607 }, @@ -5381,7 +5474,7 @@ static const URange16 Inherited_range16[] = { { 1611, 1621 }, { 1648, 1648 }, { 2385, 2388 }, - { 6832, 6846 }, + { 6832, 6848 }, { 7376, 7378 }, { 7380, 7392 }, { 7394, 7400 }, @@ -5466,6 +5559,10 @@ static const URange32 Kharoshthi_range32[] = { { 68159, 68168 }, { 68176, 68184 }, }; +static const URange32 Khitan_Small_Script_range32[] = { + { 94180, 94180 }, + { 101120, 101589 }, +}; static const URange16 Khmer_range16[] = { { 6016, 6109 }, { 6112, 6121 }, @@ -5518,11 +5615,11 @@ static const URange16 Latin_range16[] = { { 11360, 11391 }, { 42786, 42887 }, { 42891, 42943 }, - { 42946, 42950 }, - { 42999, 43007 }, + { 42946, 42954 }, + { 42997, 43007 }, { 43824, 43866 }, { 43868, 43876 }, - { 43878, 43879 }, + { 43878, 43881 }, { 64256, 64262 }, { 65313, 65338 }, { 65345, 65370 }, @@ -5556,6 +5653,9 @@ static const URange32 Linear_B_range32[] = { static const URange16 Lisu_range16[] = { { 42192, 42239 }, }; +static const URange32 Lisu_range32[] = { + { 73648, 73648 }, +}; static const URange32 Lycian_range32[] = { { 66176, 66204 }, }; @@ -5570,8 +5670,7 @@ static const URange32 Makasar_range32[] = { { 73440, 73464 }, }; static const URange16 Malayalam_range16[] = { - { 3328, 3331 }, - { 3333, 3340 }, + { 3328, 3340 }, { 3342, 3344 }, { 3346, 3396 }, { 3398, 3400 }, @@ -5674,9 +5773,8 @@ static const URange16 New_Tai_Lue_range16[] = { { 6622, 6623 }, }; static const URange32 Newa_range32[] = { - { 70656, 70745 }, - { 70747, 70747 }, - { 70749, 70751 }, + { 70656, 70747 }, + { 70749, 70753 }, }; static const URange16 Nko_range16[] = { { 1984, 2042 }, @@ -5737,7 +5835,7 @@ static const URange16 Oriya_range16[] = { { 2876, 2884 }, { 2887, 2888 }, { 2891, 2893 }, - { 2902, 2903 }, + { 2901, 2903 }, { 2908, 2909 }, { 2911, 2915 }, { 2918, 2935 }, @@ -5792,8 +5890,7 @@ static const URange16 Saurashtra_range16[] = { { 43214, 43225 }, }; static const URange32 Sharada_range32[] = { - { 70016, 70093 }, - { 70096, 70111 }, + { 70016, 70111 }, }; static const URange32 Shavian_range32[] = { { 66640, 66687 }, @@ -5808,7 +5905,7 @@ static const URange32 SignWriting_range32[] = { { 121505, 121519 }, }; static const URange16 Sinhala_range16[] = { - { 3458, 3459 }, + { 3457, 3459 }, { 3461, 3478 }, { 3482, 3505 }, { 3507, 3515 }, @@ -5839,7 +5936,7 @@ static const URange16 Sundanese_range16[] = { { 7360, 7367 }, }; static const URange16 Syloti_Nagri_range16[] = { - { 43008, 43051 }, + { 43008, 43052 }, }; static const URange16 Syriac_range16[] = { { 1792, 1805 }, @@ -5900,7 +5997,8 @@ static const URange32 Tamil_range32[] = { static const URange32 Tangut_range32[] = { { 94176, 94176 }, { 94208, 100343 }, - { 100352, 101106 }, + { 100352, 101119 }, + { 101632, 101640 }, }; static const URange16 Telugu_range16[] = { { 3072, 3084 }, @@ -5956,6 +6054,11 @@ static const URange32 Warang_Citi_range32[] = { { 71840, 71922 }, { 71935, 71935 }, }; +static const URange32 Yezidi_range32[] = { + { 69248, 69289 }, + { 69291, 69293 }, + { 69296, 69297 }, +}; static const URange16 Yi_range16[] = { { 40960, 42124 }, { 42128, 42182 }, @@ -5963,13 +6066,13 @@ static const URange16 Yi_range16[] = { static const URange32 Zanabazar_Square_range32[] = { { 72192, 72263 }, }; -// 3987 16-bit ranges, 1525 32-bit ranges +// 4001 16-bit ranges, 1602 32-bit ranges const UGroup unicode_groups[] = { { "Adlam", +1, 0, 0, Adlam_range32, 3 }, { "Ahom", +1, 0, 0, Ahom_range32, 3 }, { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 }, { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 }, - { "Armenian", +1, Armenian_range16, 5, 0, 0 }, + { "Armenian", +1, Armenian_range16, 4, 0, 0 }, { "Avestan", +1, 0, 0, Avestan_range32, 2 }, { "Balinese", +1, Balinese_range16, 2, 0, 0 }, { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 }, @@ -5991,8 +6094,9 @@ const UGroup unicode_groups[] = { { "Chakma", +1, 0, 0, Chakma_range32, 2 }, { "Cham", +1, Cham_range16, 4, 0, 0 }, { "Cherokee", +1, Cherokee_range16, 3, 0, 0 }, + { "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 }, { "Co", +1, Co_range16, 1, Co_range32, 2 }, - { "Common", +1, Common_range16, 91, Common_range32, 81 }, + { "Common", +1, Common_range16, 91, Common_range32, 82 }, { "Coptic", +1, Coptic_range16, 3, 0, 0 }, { "Cs", +1, Cs_range16, 1, 0, 0 }, { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 }, @@ -6000,6 +6104,7 @@ const UGroup unicode_groups[] = { { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 }, { "Deseret", +1, 0, 0, Deseret_range32, 1 }, { "Devanagari", +1, Devanagari_range16, 4, 0, 0 }, + { "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 }, { "Dogra", +1, 0, 0, Dogra_range32, 1 }, { "Duployan", +1, 0, 0, Duployan_range32, 5 }, { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 }, @@ -6014,7 +6119,7 @@ const UGroup unicode_groups[] = { { "Gujarati", +1, Gujarati_range16, 14, 0, 0 }, { "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 }, { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 }, - { "Han", +1, Han_range16, 11, Han_range32, 6 }, + { "Han", +1, Han_range16, 11, Han_range32, 8 }, { "Hangul", +1, Hangul_range16, 14, 0, 0 }, { "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 }, { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 }, @@ -6031,33 +6136,34 @@ const UGroup unicode_groups[] = { { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 2 }, { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 }, { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 }, + { "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 }, { "Khmer", +1, Khmer_range16, 4, 0, 0 }, { "Khojki", +1, 0, 0, Khojki_range32, 2 }, { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 }, - { "L", +1, L_range16, 380, L_range32, 229 }, + { "L", +1, L_range16, 380, L_range32, 242 }, { "Lao", +1, Lao_range16, 11, 0, 0 }, { "Latin", +1, Latin_range16, 32, 0, 0 }, { "Lepcha", +1, Lepcha_range16, 3, 0, 0 }, { "Limbu", +1, Limbu_range16, 5, 0, 0 }, { "Linear_A", +1, 0, 0, Linear_A_range32, 3 }, { "Linear_B", +1, 0, 0, Linear_B_range32, 7 }, - { "Lisu", +1, Lisu_range16, 1, 0, 0 }, - { "Ll", +1, Ll_range16, 608, Ll_range32, 34 }, - { "Lm", +1, Lm_range16, 54, Lm_range32, 6 }, - { "Lo", +1, Lo_range16, 290, Lo_range32, 186 }, + { "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 }, + { "Ll", +1, Ll_range16, 611, Ll_range32, 34 }, + { "Lm", +1, Lm_range16, 55, Lm_range32, 6 }, + { "Lo", +1, Lo_range16, 290, Lo_range32, 199 }, { "Lt", +1, Lt_range16, 10, 0, 0 }, - { "Lu", +1, Lu_range16, 599, Lu_range32, 37 }, + { "Lu", +1, Lu_range16, 601, Lu_range32, 37 }, { "Lycian", +1, 0, 0, Lycian_range32, 1 }, { "Lydian", +1, 0, 0, Lydian_range32, 2 }, - { "M", +1, M_range16, 186, M_range32, 94 }, + { "M", +1, M_range16, 187, M_range32, 103 }, { "Mahajani", +1, 0, 0, Mahajani_range32, 1 }, { "Makasar", +1, 0, 0, Makasar_range32, 1 }, - { "Malayalam", +1, Malayalam_range16, 8, 0, 0 }, + { "Malayalam", +1, Malayalam_range16, 7, 0, 0 }, { "Mandaic", +1, Mandaic_range16, 2, 0, 0 }, { "Manichaean", +1, 0, 0, Manichaean_range32, 2 }, { "Marchen", +1, 0, 0, Marchen_range32, 3 }, { "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 }, - { "Mc", +1, Mc_range16, 109, Mc_range32, 59 }, + { "Mc", +1, Mc_range16, 109, Mc_range32, 66 }, { "Me", +1, Me_range16, 5, 0, 0 }, { "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 }, { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 }, @@ -6065,21 +6171,21 @@ const UGroup unicode_groups[] = { { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 }, { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 }, { "Miao", +1, 0, 0, Miao_range32, 3 }, - { "Mn", +1, Mn_range16, 207, Mn_range32, 111 }, + { "Mn", +1, Mn_range16, 210, Mn_range32, 117 }, { "Modi", +1, 0, 0, Modi_range32, 2 }, { "Mongolian", +1, Mongolian_range16, 6, Mongolian_range32, 1 }, { "Mro", +1, 0, 0, Mro_range32, 3 }, { "Multani", +1, 0, 0, Multani_range32, 5 }, { "Myanmar", +1, Myanmar_range16, 3, 0, 0 }, - { "N", +1, N_range16, 67, N_range32, 63 }, + { "N", +1, N_range16, 67, N_range32, 66 }, { "Nabataean", +1, 0, 0, Nabataean_range32, 2 }, { "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 }, - { "Nd", +1, Nd_range16, 37, Nd_range32, 22 }, + { "Nd", +1, Nd_range16, 37, Nd_range32, 24 }, { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 }, - { "Newa", +1, 0, 0, Newa_range32, 3 }, + { "Newa", +1, 0, 0, Newa_range32, 2 }, { "Nko", +1, Nko_range16, 2, 0, 0 }, { "Nl", +1, Nl_range16, 7, Nl_range32, 5 }, - { "No", +1, No_range16, 29, No_range32, 41 }, + { "No", +1, No_range16, 29, No_range32, 42 }, { "Nushu", +1, 0, 0, Nushu_range32, 2 }, { "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 }, { "Ogham", +1, Ogham_range16, 1, 0, 0 }, @@ -6095,34 +6201,34 @@ const UGroup unicode_groups[] = { { "Oriya", +1, Oriya_range16, 14, 0, 0 }, { "Osage", +1, 0, 0, Osage_range32, 2 }, { "Osmanya", +1, 0, 0, Osmanya_range32, 2 }, - { "P", +1, P_range16, 131, P_range32, 51 }, + { "P", +1, P_range16, 132, P_range32, 53 }, { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 }, { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 }, { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 }, { "Pc", +1, Pc_range16, 6, 0, 0 }, - { "Pd", +1, Pd_range16, 17, 0, 0 }, + { "Pd", +1, Pd_range16, 17, Pd_range32, 1 }, { "Pe", +1, Pe_range16, 72, 0, 0 }, { "Pf", +1, Pf_range16, 10, 0, 0 }, { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 }, { "Phoenician", +1, 0, 0, Phoenician_range32, 2 }, { "Pi", +1, Pi_range16, 11, 0, 0 }, - { "Po", +1, Po_range16, 128, Po_range32, 51 }, + { "Po", +1, Po_range16, 129, Po_range32, 52 }, { "Ps", +1, Ps_range16, 75, 0, 0 }, { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 }, { "Rejang", +1, Rejang_range16, 2, 0, 0 }, { "Runic", +1, Runic_range16, 2, 0, 0 }, - { "S", +1, S_range16, 146, S_range32, 80 }, + { "S", +1, S_range16, 148, S_range32, 81 }, { "Samaritan", +1, Samaritan_range16, 2, 0, 0 }, { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 }, { "Sc", +1, Sc_range16, 18, Sc_range32, 3 }, - { "Sharada", +1, 0, 0, Sharada_range32, 2 }, + { "Sharada", +1, 0, 0, Sharada_range32, 1 }, { "Shavian", +1, 0, 0, Shavian_range32, 1 }, { "Siddham", +1, 0, 0, Siddham_range32, 2 }, { "SignWriting", +1, 0, 0, SignWriting_range32, 3 }, { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 }, - { "Sk", +1, Sk_range16, 28, Sk_range32, 1 }, + { "Sk", +1, Sk_range16, 29, Sk_range32, 1 }, { "Sm", +1, Sm_range16, 53, Sm_range32, 11 }, - { "So", +1, So_range16, 111, So_range32, 69 }, + { "So", +1, So_range16, 112, So_range32, 70 }, { "Sogdian", +1, 0, 0, Sogdian_range32, 1 }, { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 }, { "Soyombo", +1, 0, 0, Soyombo_range32, 1 }, @@ -6136,7 +6242,7 @@ const UGroup unicode_groups[] = { { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 }, { "Takri", +1, 0, 0, Takri_range32, 2 }, { "Tamil", +1, Tamil_range16, 16, Tamil_range32, 2 }, - { "Tangut", +1, 0, 0, Tangut_range32, 3 }, + { "Tangut", +1, 0, 0, Tangut_range32, 4 }, { "Telugu", +1, Telugu_range16, 12, 0, 0 }, { "Thaana", +1, Thaana_range16, 1, 0, 0 }, { "Thai", +1, Thai_range16, 2, 0, 0 }, @@ -6147,6 +6253,7 @@ const UGroup unicode_groups[] = { { "Vai", +1, Vai_range16, 1, 0, 0 }, { "Wancho", +1, 0, 0, Wancho_range32, 2 }, { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 }, + { "Yezidi", +1, 0, 0, Yezidi_range32, 3 }, { "Yi", +1, Yi_range16, 2, 0, 0 }, { "Z", +1, Z_range16, 8, 0, 0 }, { "Zanabazar_Square", +1, 0, 0, Zanabazar_Square_range32, 1 }, @@ -6154,7 +6261,7 @@ const UGroup unicode_groups[] = { { "Zp", +1, Zp_range16, 1, 0, 0 }, { "Zs", +1, Zs_range16, 7, 0, 0 }, }; -const int num_unicode_groups = 188; +const int num_unicode_groups = 192; } // namespace re2 diff --git a/util/mutex.h b/util/mutex.h index 9c49158..e2a8715 100644 --- a/util/mutex.h +++ b/util/mutex.h @@ -10,7 +10,12 @@ * You should assume the locks are *not* re-entrant. */ -#if !defined(_WIN32) +#ifdef _WIN32 +// Requires Windows Vista or Windows Server 2008 at minimum. +#if defined(WINVER) && WINVER >= 0x0600 +#define MUTEX_IS_WIN32_SRWLOCK +#endif +#else #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif @@ -20,7 +25,10 @@ #endif #endif -#if defined(MUTEX_IS_PTHREAD_RWLOCK) +#if defined(MUTEX_IS_WIN32_SRWLOCK) +#include +typedef SRWLOCK MutexType; +#elif defined(MUTEX_IS_PTHREAD_RWLOCK) #include #include typedef pthread_rwlock_t MutexType; @@ -56,7 +64,16 @@ class Mutex { Mutex& operator=(const Mutex&) = delete; }; -#if defined(MUTEX_IS_PTHREAD_RWLOCK) +#if defined(MUTEX_IS_WIN32_SRWLOCK) + +Mutex::Mutex() { InitializeSRWLock(&mutex_); } +Mutex::~Mutex() { } +void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); } +void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); } +void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); } +void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); } + +#elif defined(MUTEX_IS_PTHREAD_RWLOCK) #define SAFE_PTHREAD(fncall) \ do { \ -- cgit v1.2.3