diff options
Diffstat (limited to 'src/gen-use-table.py')
-rwxr-xr-x | src/gen-use-table.py | 49 |
1 files changed, 28 insertions, 21 deletions
diff --git a/src/gen-use-table.py b/src/gen-use-table.py index a922c92..0681725 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -44,6 +44,7 @@ defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block') # TODO Characters that are not in Unicode Indic files, but used in USE data[0][0x034F] = defaults[0] data[0][0x2060] = defaults[0] +data[0][0x20F0] = defaults[0] for u in range (0xFE00, 0xFE0F + 1): data[0][u] = defaults[0] @@ -117,6 +118,7 @@ property_names = [ 'Top_And_Right', 'Top_And_Left', 'Top_And_Left_And_Right', + 'Bottom_And_Left', 'Bottom_And_Right', 'Top_And_Bottom_And_Right', 'Overstruck', @@ -153,7 +155,7 @@ def is_BASE(U, UISC, UGC): def is_BASE_IND(U, UISC, UGC): #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po) return (UISC in [Consonant_Dead, Modifying_Letter] or - (UGC == Po and not U in [0x104E, 0x2022]) or + (UGC == Po and not U in [0x104E, 0x2022, 0x11A3F, 0x11A45]) or False # SPEC-DRAFT-OUTDATED! U == 0x002D ) def is_BASE_NUM(U, UISC, UGC): @@ -177,6 +179,8 @@ def is_CONS_MOD(U, UISC, UGC): def is_CONS_SUB(U, UISC, UGC): #SPEC-DRAFT return UISC == Consonant_Subjoined return UISC == Consonant_Subjoined and UGC != Lo +def is_CONS_WITH_STACKER(U, UISC, UGC): + return UISC == Consonant_With_Stacker def is_HALANT(U, UISC, UGC): return UISC in [Virama, Invisible_Stacker] def is_HALANT_NUM(U, UISC, UGC): @@ -198,9 +202,7 @@ def is_OTHER(U, UISC, UGC): def is_Reserved(U, UISC, UGC): return UGC == 'Cn' def is_REPHA(U, UISC, UGC): - #return UISC == Consonant_Preceding_Repha - #SPEC-OUTDATED hack to categorize Consonant_With_Stacker and Consonant_Prefixed - return UISC in [Consonant_Preceding_Repha, Consonant_With_Stacker, Consonant_Prefixed] + return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed] def is_SYM(U, UISC, UGC): if U == 0x25CC: return False #SPEC-DRAFT #SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter @@ -210,11 +212,13 @@ def is_SYM_MOD(U, UISC, UGC): def is_VARIATION_SELECTOR(U, UISC, UGC): return 0xFE00 <= U <= 0xFE0F def is_VOWEL(U, UISC, UGC): + # https://github.com/roozbehp/unicode-data/issues/6 return (UISC == Pure_Killer or - (UGC != Lo and UISC in [Vowel, Vowel_Dependent])) + (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29])) def is_VOWEL_MOD(U, UISC, UGC): + # https://github.com/roozbehp/unicode-data/issues/6 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or - (UGC != Lo and UISC == Bindu)) + (UGC != Lo and (UISC == Bindu or U in [0xAA29]))) use_mapping = { 'B': is_BASE, @@ -227,6 +231,7 @@ use_mapping = { 'M': is_CONS_MED, 'CM': is_CONS_MOD, 'SUB': is_CONS_SUB, + 'CS': is_CONS_WITH_STACKER, 'H': is_HALANT, 'HN': is_HALANT_NUM, 'ZWNJ': is_ZWNJ, @@ -250,7 +255,7 @@ use_positions = { }, 'M': { 'Abv': [Top], - 'Blw': [Bottom], + 'Blw': [Bottom, Bottom_And_Left], 'Pst': [Right], 'Pre': [Left], }, @@ -292,12 +297,23 @@ def map_to_use(data): if U == 0x17DD: UISC = Vowel_Dependent if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark + # TODO: https://github.com/harfbuzz/harfbuzz/pull/627 + if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom + # TODO: U+1CED should only be allowed after some of # the nasalization marks, maybe only for U+1CE9..U+1CF1. if U == 0x1CED: UISC = Tone_Mark - evals = [(k, v(U,UISC,UGC)) for k,v in items] - values = [k for k,v in evals if v] + # TODO: https://github.com/harfbuzz/harfbuzz/issues/525 + if U == 0x1A7F: UISC = Consonant_Final; UIPC = Bottom + + # TODO: https://github.com/harfbuzz/harfbuzz/pull/609 + if U == 0x20F0: UISC = Cantillation_Mark; UIPC = Top + + # TODO: https://github.com/harfbuzz/harfbuzz/pull/626 + if U == 0xA8B4: UISC = Consonant_Medial + + values = [k for k,v in items if v(U,UISC,UGC)] assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values) USE = values[0] @@ -336,12 +352,6 @@ def map_to_use(data): defaults = ('O', 'No_Block') data = map_to_use(data) -# Remove the outliers -singles = {} -for u in [0x034F, 0x25CC, 0x1107F]: - singles[u] = data[u] - del data[u] - print "/* == Start of generated table == */" print "/*" print " * The following table is generated by running:" @@ -439,20 +449,17 @@ page_bits = 12 print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) print print "USE_TABLE_ELEMENT_TYPE" -print "hb_use_get_categories (hb_codepoint_t u)" +print "hb_use_get_category (hb_codepoint_t u)" print "{" print " switch (u >> %d)" % page_bits print " {" -pages = set([u>>page_bits for u in starts+ends+singles.keys()]) +pages = set([u>>page_bits for u in starts+ends]) for p in sorted(pages): print " case 0x%0Xu:" % p for (start,end) in zip (starts, ends): if p not in [start>>page_bits, end>>page_bits]: continue offset = "use_offset_0x%04xu" % start - print " if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset) - for u,d in singles.items (): - if p != u>>page_bits: continue - print " if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0]) + print " if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset) print " break;" print "" print " default:" |