diff options
author | Youngbok Shin <youngb.shin@samsung.com> | 2018-06-26 11:15:52 +0900 |
---|---|---|
committer | Youngbok Shin <youngb.shin@samsung.com> | 2018-06-26 11:15:52 +0900 |
commit | b9f425ddd6223cd82b3d35f13fbd060d3c0c0e38 (patch) | |
tree | 87220edcaa6118811d062d77d0562a3f74c7f885 /src/gen-use-table.py | |
parent | 1c272a48507bb3c7905aa30d8bf0d092b474f781 (diff) | |
download | harfbuzz-b9f425ddd6223cd82b3d35f13fbd060d3c0c0e38.tar.gz harfbuzz-b9f425ddd6223cd82b3d35f13fbd060d3c0c0e38.tar.bz2 harfbuzz-b9f425ddd6223cd82b3d35f13fbd060d3c0c0e38.zip |
Imported Upstream version 1.8.1upstream/1.8.1
Diffstat (limited to 'src/gen-use-table.py')
-rwxr-xr-x | src/gen-use-table.py | 132 |
1 files changed, 72 insertions, 60 deletions
diff --git a/src/gen-use-table.py b/src/gen-use-table.py index 0681725..6aa5f88 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -1,14 +1,16 @@ -#!/usr/bin/python +#!/usr/bin/env python -import sys +from __future__ import print_function, division, absolute_import + +import io, sys if len (sys.argv) != 5: - print >>sys.stderr, "usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt" + print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr) sys.exit (1) BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"] -files = [file (x) for x in sys.argv[1:]] +files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]] headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2] headers.append (["UnicodeData.txt does not have a header."]) @@ -93,6 +95,7 @@ property_names = [ 'Consonant_Medial', 'Consonant_Final', 'Consonant_Head_Letter', + 'Consonant_Initial_Postfixed', 'Modifying_Letter', 'Tone_Letter', 'Tone_Mark', @@ -124,6 +127,11 @@ property_names = [ 'Overstruck', ] +try: + basestring +except NameError: + basestring = str + class PropertyValue(object): def __init__(self, name_): self.name = name_ @@ -133,6 +141,8 @@ class PropertyValue(object): return self.name == (other if isinstance(other, basestring) else other.name) def __ne__(self, other): return not (self == other) + def __hash__(self): + return hash(str(self)) property_values = {} @@ -155,7 +165,7 @@ def is_BASE(U, UISC, UGC): def is_BASE_IND(U, UISC, UGC): #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po) return (UISC in [Consonant_Dead, Modifying_Letter] or - (UGC == Po and not U in [0x104E, 0x2022, 0x11A3F, 0x11A45]) or + (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x11A3F, 0x11A45]) or False # SPEC-DRAFT-OUTDATED! U == 0x002D ) def is_BASE_NUM(U, UISC, UGC): @@ -167,7 +177,9 @@ def is_BASE_OTHER(U, UISC, UGC): def is_CGJ(U, UISC, UGC): return U == 0x034F def is_CONS_FINAL(U, UISC, UGC): + # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec. return ((UISC == Consonant_Final and UGC != Lo) or + UISC == Consonant_Initial_Postfixed or UISC == Consonant_Succeeding_Repha) def is_CONS_FINAL_MOD(U, UISC, UGC): #SPEC-DRAFT return UISC in [Consonant_Final_Modifier, Syllable_Modifier] @@ -325,17 +337,18 @@ def map_to_use(data): # TODO: These should die, but have UIPC in Unicode 8.0 if U in [0x953, 0x954]: UIPC = Not_Applicable - # TODO: In USE's override list but not in Unicode 8.0 + # TODO: In USE's override list but not in Unicode 11.0 if U == 0x103C: UIPC = Left - # TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0 + # TODO: These are not in USE's override list that we have, nor are they in Unicode 11.0 if 0xA926 <= U <= 0xA92A: UIPC = Top if U == 0x111CA: UIPC = Bottom if U == 0x11300: UIPC = Top - if U == 0x1133C: UIPC = Bottom if U == 0x1171E: UIPC = Left # Correct?! if 0x1CF2 <= U <= 0x1CF3: UIPC = Right if 0x1CF8 <= U <= 0x1CF9: UIPC = Top + # https://github.com/roozbehp/unicode-data/issues/8 + if U == 0x0A51: UIPC = Bottom assert (UIPC in [Not_Applicable, Visual_Order_Left] or USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC) @@ -352,21 +365,21 @@ def map_to_use(data): defaults = ('O', 'No_Block') data = map_to_use(data) -print "/* == Start of generated table == */" -print "/*" -print " * The following table is generated by running:" -print " *" -print " * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt" -print " *" -print " * on files with these headers:" -print " *" +print ("/* == Start of generated table == */") +print ("/*") +print (" * The following table is generated by running:") +print (" *") +print (" * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt") +print (" *") +print (" * on files with these headers:") +print (" *") for h in headers: for l in h: - print " * %s" % (l.strip()) -print " */" -print -print '#include "hb-ot-shape-complex-use-private.hh"' -print + print (" * %s" % (l.strip())) +print (" */") +print () +print ('#include "hb-ot-shape-complex-use-private.hh"') +print () total = 0 used = 0 @@ -374,30 +387,29 @@ last_block = None def print_block (block, start, end, data): global total, used, last_block if block and block != last_block: - print - print - print " /* %s */" % block + print () + print () + print (" /* %s */" % block) if start % 16: - print ' ' * (20 + (start % 16 * 6)), + print (' ' * (20 + (start % 16 * 6)), end='') num = 0 assert start % 8 == 0 assert (end+1) % 8 == 0 for u in range (start, end+1): if u % 16 == 0: - print - print " /* %04X */" % u, + print () + print (" /* %04X */" % u, end='') if u in data: num += 1 d = data.get (u, defaults) - sys.stdout.write ("%6s," % d[0]) + print ("%6s," % d[0], end='') total += end - start + 1 used += num if block: last_block = block -uu = data.keys () -uu.sort () +uu = sorted (data.keys ()) last = -100000 num = 0 @@ -406,14 +418,14 @@ starts = [] ends = [] for k,v in sorted(use_mapping.items()): if k in use_positions and use_positions[k]: continue - print "#define %s USE_%s /* %s */" % (k, k, v.__name__[3:]) + print ("#define %s USE_%s /* %s */" % (k, k, v.__name__[3:])) for k,v in sorted(use_positions.items()): if not v: continue for suf in v.keys(): tag = k + suf - print "#define %s USE_%s" % (tag, tag) -print "" -print "static const USE_TABLE_ELEMENT_TYPE use_table[] = {" + print ("#define %s USE_%s" % (tag, tag)) +print ("") +print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {") for u in uu: if u <= last: continue @@ -433,51 +445,51 @@ for u in uu: if last >= 0: ends.append (last + 1) offset += ends[-1] - starts[-1] - print - print - print "#define use_offset_0x%04xu %d" % (start, offset) + print () + print () + print ("#define use_offset_0x%04xu %d" % (start, offset)) starts.append (start) print_block (block, start, end, data) last = end ends.append (last + 1) offset += ends[-1] - starts[-1] -print -print +print () +print () occupancy = used * 100. / total page_bits = 12 -print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) -print -print "USE_TABLE_ELEMENT_TYPE" -print "hb_use_get_category (hb_codepoint_t u)" -print "{" -print " switch (u >> %d)" % page_bits -print " {" +print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)) +print () +print ("USE_TABLE_ELEMENT_TYPE") +print ("hb_use_get_category (hb_codepoint_t u)") +print ("{") +print (" switch (u >> %d)" % page_bits) +print (" {") pages = set([u>>page_bits for u in starts+ends]) for p in sorted(pages): - print " case 0x%0Xu:" % p + print (" case 0x%0Xu:" % p) for (start,end) in zip (starts, ends): if p not in [start>>page_bits, end>>page_bits]: continue offset = "use_offset_0x%04xu" % start - print " if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset) - print " break;" - print "" -print " default:" -print " break;" -print " }" -print " return USE_O;" -print "}" -print + print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)) + print (" break;") + print ("") +print (" default:") +print (" break;") +print (" }") +print (" return USE_O;") +print ("}") +print () for k in sorted(use_mapping.keys()): if k in use_positions and use_positions[k]: continue - print "#undef %s" % k + print ("#undef %s" % k) for k,v in sorted(use_positions.items()): if not v: continue for suf in v.keys(): tag = k + suf - print "#undef %s" % tag -print -print "/* == End of generated table == */" + print ("#undef %s" % tag) +print () +print ("/* == End of generated table == */") # Maintain at least 50% occupancy in the table */ if occupancy < 50: |