antlr/scripts/parse-extended-pictographic/parse.py

22 lines
716 B
Python
Raw Normal View History

2017-03-09 08:39:27 +08:00
from __future__ import print_function
import codecs
import re
import sys
def main(input, output):
code_point_re = re.compile(r'^U\+([0-9a-fA-F]+)\s*;\s*ExtendedPictographic.*$')
code_point_range_re = re.compile(r'^U\+([0-9a-fA-F]+)\.\.U\+([0-9a-fA-F]+)\s*;\s*ExtendedPictographic.*$')
for line in input:
m = code_point_re.match(line)
if m:
print('set.add(0x' + m.group(1) + ');', file=output)
else:
m = code_point_range_re.match(line)
if m:
print('set.add(0x' + m.group(1) + ', 0x' + m.group(2) + ');', file=output)
if __name__ == '__main__':
with codecs.open(sys.argv[1], 'r', 'utf-8') as f:
main(f, sys.stdout)