提交 d8df7142 编写于 作者: D David Corbett 提交者: Behdad Esfahbod

Fix code point iteration in narrow Python

上级 33ca3b67
...@@ -7,6 +7,9 @@ from itertools import * ...@@ -7,6 +7,9 @@ from itertools import *
diff_symbols = "-+=*&^%$#@!~/" diff_symbols = "-+=*&^%$#@!~/"
diff_colors = ['red', 'green', 'blue'] diff_colors = ['red', 'green', 'blue']
def codepoints(s):
return (ord (u) for u in s)
try: try:
unichr = unichr unichr = unichr
...@@ -43,6 +46,28 @@ try: ...@@ -43,6 +46,28 @@ try:
except UnicodeDecodeError: except UnicodeDecodeError:
raise ValueError('unichr() arg not in range(0x110000)') raise ValueError('unichr() arg not in range(0x110000)')
def codepoints(s):
high_surrogate = None
for u in s:
cp = ord (u)
if 0xDC00 <= cp <= 0xDFFF:
if high_surrogate:
yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
high_surrogate = None
else:
yield 0xFFFC
else:
if high_surrogate:
yield 0xFFFC
high_surrogate = None
if 0xD800 <= cp <= 0xDBFF:
high_surrogate = cp
else:
yield cp
high_surrogate = None
if high_surrogate:
yield 0xFFFC
except NameError: except NameError:
unichr = chr unichr = chr
...@@ -456,7 +481,7 @@ class Unicode: ...@@ -456,7 +481,7 @@ class Unicode:
@staticmethod @staticmethod
def decode (s): def decode (s):
return u','.join ("U+%04X" % ord (u) for u in tounicode (s, 'utf-8')) return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
@staticmethod @staticmethod
def parse (s): def parse (s):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册