提交 f45bea86 编写于 作者: L Lukáš Doktor

avocado.utils.astring: Be more lenient regarding utf-8 chars

People tend to mix unicode, strings and encoded strings. This patch adds
"avocado.utils.astring.string_safe_encode" function, which tries to
allow all of the listed inputs and it assumes "utf-8" is in use for
encoded strings.

This function is then used in the tabular_output to avoid
"UnicodeDecodeErrors" for all listed input types.
Signed-off-by: NLukáš Doktor <ldoktor@redhat.com>
上级 e6549fca
...@@ -154,10 +154,13 @@ def iter_tabular_output(matrix, header=None): ...@@ -154,10 +154,13 @@ def iter_tabular_output(matrix, header=None):
if header: if header:
for column in header: for column in header:
lengths.append(len(column)) lengths.append(len(column))
str_matrix = []
for row in matrix: for row in matrix:
str_matrix.append([])
for i, column in enumerate(row): for i, column in enumerate(row):
column = unicode(column).encode("utf-8") column = string_safe_encode(column)
col_len = len(column) str_matrix[-1].append(column)
col_len = len(column.decode("utf-8"))
try: try:
max_len = lengths[i] max_len = lengths[i]
if col_len > max_len: if col_len > max_len:
...@@ -174,7 +177,7 @@ def iter_tabular_output(matrix, header=None): ...@@ -174,7 +177,7 @@ def iter_tabular_output(matrix, header=None):
if header: if header:
out_line = format_string % header out_line = format_string % header
yield out_line yield out_line
for row in matrix: for row in str_matrix:
out_line = format_string % tuple(row) out_line = format_string % tuple(row)
yield out_line yield out_line
...@@ -195,6 +198,19 @@ def tabular_output(matrix, header=None): ...@@ -195,6 +198,19 @@ def tabular_output(matrix, header=None):
return "\n".join(iter_tabular_output(matrix, header)) return "\n".join(iter_tabular_output(matrix, header))
def string_safe_encode(string):
"""
People tend to mix unicode strems with encoded strings. This function
tries to replace any input with a valid utf-8 encoded ascii stream.
"""
if not isinstance(string, basestring):
string = str(string)
try:
return string.encode("utf-8")
except UnicodeDecodeError:
return string.decode("utf-8").encode("utf-8")
def string_to_safe_path(string): def string_to_safe_path(string):
""" """
Convert string to a valid file/dir name. Convert string to a valid file/dir name.
......
...@@ -17,5 +17,27 @@ class AstringTest(unittest.TestCase): ...@@ -17,5 +17,27 @@ class AstringTest(unittest.TestCase):
'foo bar\n' 'foo bar\n'
'/bin/bar/sbrubles /home/myuser/sbrubles')) '/bin/bar/sbrubles /home/myuser/sbrubles'))
def testUnicodeTabular(self):
"""
Verifies tabular can handle utf-8 chars properly
It tries valid encoded utf-8 string as well as unicode ones of
various lengths and verifies calculates the right length and reports
the correct results. (the string_safe_encode function is in use here)
"""
matrix = [("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4\xd0\xbe",
123),
(u'\u0430\u0432\u043e\u043a\u0430\u0434\u043e', 123),
("avok\xc3\xa1do", 123),
("avocado", 123)]
str_matrix = ("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4"
"\xd0\xbe 123\n"
"\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4"
"\xd0\xbe 123\n"
"avok\xc3\xa1do 123\n"
"avocado 123")
self.assertEqual(astring.tabular_output(matrix), str_matrix)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册