diff --git a/avocado/utils/astring.py b/avocado/utils/astring.py index 2c764e85a4b5680dcee8ad0a55a44c7080837b7f..20a3fe7a2b2deb44704615b68f798fba506eaec2 100644 --- a/avocado/utils/astring.py +++ b/avocado/utils/astring.py @@ -154,10 +154,13 @@ def iter_tabular_output(matrix, header=None): if header: for column in header: lengths.append(len(column)) + str_matrix = [] for row in matrix: + str_matrix.append([]) for i, column in enumerate(row): - column = unicode(column).encode("utf-8") - col_len = len(column) + column = string_safe_encode(column) + str_matrix[-1].append(column) + col_len = len(column.decode("utf-8")) try: max_len = lengths[i] if col_len > max_len: @@ -174,7 +177,7 @@ def iter_tabular_output(matrix, header=None): if header: out_line = format_string % header yield out_line - for row in matrix: + for row in str_matrix: out_line = format_string % tuple(row) yield out_line @@ -195,6 +198,19 @@ def tabular_output(matrix, header=None): return "\n".join(iter_tabular_output(matrix, header)) +def string_safe_encode(string): + """ + People tend to mix unicode strems with encoded strings. This function + tries to replace any input with a valid utf-8 encoded ascii stream. + """ + if not isinstance(string, basestring): + string = str(string) + try: + return string.encode("utf-8") + except UnicodeDecodeError: + return string.decode("utf-8").encode("utf-8") + + def string_to_safe_path(string): """ Convert string to a valid file/dir name. diff --git a/selftests/unit/test_astring.py b/selftests/unit/test_astring.py index ff03137f7b1c8360f74b9e8e1d65bdba6b23fa08..0c0c189a4b72eb8fdfe657331985dbb41efd81a5 100644 --- a/selftests/unit/test_astring.py +++ b/selftests/unit/test_astring.py @@ -17,5 +17,27 @@ class AstringTest(unittest.TestCase): 'foo bar\n' '/bin/bar/sbrubles /home/myuser/sbrubles')) + def testUnicodeTabular(self): + """ + Verifies tabular can handle utf-8 chars properly + + It tries valid encoded utf-8 string as well as unicode ones of + various lengths and verifies calculates the right length and reports + the correct results. (the string_safe_encode function is in use here) + """ + + matrix = [("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4\xd0\xbe", + 123), + (u'\u0430\u0432\u043e\u043a\u0430\u0434\u043e', 123), + ("avok\xc3\xa1do", 123), + ("avocado", 123)] + str_matrix = ("\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4" + "\xd0\xbe 123\n" + "\xd0\xb0\xd0\xb2\xd0\xbe\xd0\xba\xd0\xb0\xd0\xb4" + "\xd0\xbe 123\n" + "avok\xc3\xa1do 123\n" + "avocado 123") + self.assertEqual(astring.tabular_output(matrix), str_matrix) + if __name__ == '__main__': unittest.main()