From a0b7f3537f6c6b2de47496a9ca37dd71a7070638 Mon Sep 17 00:00:00 2001 From: Cleber Rosa Date: Fri, 6 Apr 2018 18:02:19 -0400 Subject: [PATCH] String utilities: add checks for data type (text or binary) To lay some common ground across Python versions, let's define the criteria we have for binary or text data. Signed-off-by: Cleber Rosa --- avocado/utils/astring.py | 26 ++++++++++++++++++++++++++ selftests/unit/test_astring.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/avocado/utils/astring.py b/avocado/utils/astring.py index e4b1e75f..cce7db3f 100644 --- a/avocado/utils/astring.py +++ b/avocado/utils/astring.py @@ -27,6 +27,7 @@ And not notice until their code starts failing. import itertools import re +import sys import string from six import string_types, PY3 @@ -278,3 +279,28 @@ def string_to_safe_path(input_str): for bad_chr in FS_UNSAFE_CHARS: input_str = input_str.replace(bad_chr, "_") return input_str + + +def is_bytes(data): + """ + Checks if the data given is a sequence of bytes + + And not a "text" type, that can be of multi-byte characters. + Also, this does NOT mean a bytearray type. + + :param data: the instance to be checked if it falls under the definition + of an array of bytes. + """ + return isinstance(data, bytes) + + +def is_text(data): + """ + Checks if the data given is a suitable for holding text + + That is, if it can hold text that requires more than one byte for + each character. + """ + if sys.version_info[0] < 3: + return isinstance(data, unicode) + return isinstance(data, str) diff --git a/selftests/unit/test_astring.py b/selftests/unit/test_astring.py index 7c225a21..56e1e0d9 100644 --- a/selftests/unit/test_astring.py +++ b/selftests/unit/test_astring.py @@ -77,6 +77,35 @@ class AstringTest(unittest.TestCase): self.assertEqual(astring.string_to_safe_path(avocado), "%s__" % avocado[:-2]) + def test_is_bytes(self): + """ + Verifies what bytes means, basically that they are the same + thing accross Python 2 and 3 and can be decoded into "text" + """ + binary = b'' + text = u'' + self.assertTrue(astring.is_bytes(binary)) + self.assertFalse(astring.is_bytes(text)) + self.assertTrue(hasattr(binary, 'decode')) + self.assertTrue(astring.is_text(binary.decode())) + # on Python 2, each str member is also a single byte char + if sys.version_info[0] < 3: + self.assertTrue(astring.is_bytes(str(''))) + else: + self.assertFalse(astring.is_bytes(str(''))) + + def test_is_text(self): + """ + Verifies what text means, basically that they can represent + extended set of characters and can be encoded into "bytes" + """ + binary = b'' + text = u'' + self.assertTrue(astring.is_text(text)) + self.assertFalse(astring.is_text(binary)) + self.assertTrue(hasattr(text, 'encode')) + self.assertTrue(astring.is_bytes(text.encode())) + if __name__ == '__main__': unittest.main() -- GitLab