提交 f45bea86 编写于 作者: L Lukáš Doktor

avocado.utils.astring: Be more lenient regarding utf-8 chars

People tend to mix unicode, strings and encoded strings. This patch adds
"avocado.utils.astring.string_safe_encode" function, which tries to
allow all of the listed inputs and it assumes "utf-8" is in use for
encoded strings.

This function is then used in the tabular_output to avoid
"UnicodeDecodeErrors" for all listed input types.
Signed-off-by: NLukáš Doktor <ldoktor@redhat.com>
上级 e6549fca
......@@ -154,10 +154,13 @@ def iter_tabular_output(matrix, header=None):
if header:
for column in header:
lengths.append(len(column))
str_matrix = []
for row in matrix:
str_matrix.append([])
for i, column in enumerate(row):
column = unicode(column).encode("utf-8")
col_len = len(column)
column = string_safe_encode(column)
str_matrix[-1].append(column)
col_len = len(column.decode("utf-8"))
try:
max_len = lengths[i]
if col_len > max_len:
......@@ -174,7 +177,7 @@ def iter_tabular_output(matrix, header=None):
if header:
out_line = format_string % header
yield out_line
for row in matrix:
for row in str_matrix:
out_line = format_string % tuple(row)
yield out_line
......@@ -195,6 +198,19 @@ def tabular_output(matrix, header=None):
return "\n".join(iter_tabular_output(matrix, header))
def string_safe_encode(string):
"""
People tend to mix unicode strems with encoded strings. This function
tries to replace any input with a valid utf-8 encoded ascii stream.
"""
if not isinstance(string, basestring):
string = str(string)
try:
return string.encode("utf-8")
except UnicodeDecodeError:
return string.decode("utf-8").encode("utf-8")
def string_to_safe_path(string):
"""
Convert string to a valid file/dir name.
......
......@@ -17,5 +17,27 @@ class AstringTest(unittest.TestCase):
'foo bar\n'
'/bin/bar/sbrubles /home/myuser/sbrubles'))
def testUnicodeTabular(self):
"""
Verifies tabular can handle utf-8 chars properly
It tries valid encoded utf-8 string as well as unicode ones of
various lengths and verifies calculates the right length and reports
the correct results. (the string_safe_encode function is in use here)
"""
matrix = [("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4\xd0\xbe",
123),
(u'\u0430\u0432\u043e\u043a\u0430\u0434\u043e', 123),
("avok\xc3\xa1do", 123),
("avocado", 123)]
str_matrix = ("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4"
"\xd0\xbe 123\n"
"\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4"
"\xd0\xbe 123\n"
"avok\xc3\xa1do 123\n"
"avocado 123")
self.assertEqual(astring.tabular_output(matrix), str_matrix)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册