提交 01a30a6a 编写于 作者: B Behdad Esfahbod

[indic] Remove data for scripts that don't go thorough this shaper

上级 7cd9269f
...@@ -6,7 +6,30 @@ if len (sys.argv) != 4: ...@@ -6,7 +6,30 @@ if len (sys.argv) != 4:
print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt" print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt"
sys.exit (1) sys.exit (1)
BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"] ALLOWED_SINGLES = [0x00A0, 0x25CC]
ALLOWED_BLOCKS = [
'Basic Latin',
'Latin-1 Supplement',
'Devanagari',
'Bengali',
'Gurmukhi',
'Gujarati',
'Oriya',
'Tamil',
'Telugu',
'Kannada',
'Malayalam',
'Sinhala',
'Myanmar',
'Khmer',
'Vedic Extensions',
'General Punctuation',
'Superscripts and Subscripts',
'Devanagari Extended',
'Javanese',
'Myanmar Extended-B',
'Myanmar Extended-A',
]
files = [file (x) for x in sys.argv[1:]] files = [file (x) for x in sys.argv[1:]]
...@@ -50,7 +73,7 @@ for i,d in enumerate (data): ...@@ -50,7 +73,7 @@ for i,d in enumerate (data):
if not u in combined: if not u in combined:
combined[u] = list (defaults) combined[u] = list (defaults)
combined[u][i] = v combined[u][i] = v
combined = {k:v for k,v in combined.items() if v[2] not in BLACKLISTED_BLOCKS} combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
data = combined data = combined
del combined del combined
num = len (data) num = len (data)
...@@ -61,7 +84,7 @@ for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]: ...@@ -61,7 +84,7 @@ for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out # Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {} singles = {}
for u in [0x00A0, 0x25CC]: for u in ALLOWED_SINGLES:
singles[u] = data[u] singles[u] = data[u]
del data[u] del data[u]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册