未验证 提交 ca16e8fd 编写于 作者: Y yaoxuefeng 提交者: GitHub

add fs list_files_info (#36224)

上级 7cb19f57
......@@ -1106,3 +1106,35 @@ class HDFSClient(FS):
begin += blocks[i]
return trainer_files[trainer_id]
def list_files_info(self, path_list):
"""
list_files return file path and size
Args:
path_list(list): file list
Returns:
fileist(list): file list with file path and size
"""
if len(path_list) <= 0:
return []
file_list = []
#concat filelist can speed up 'hadoop ls'
str_concat = ""
for path in path_list:
str_concat += path + " "
cmd = "ls " + str_concat + " | awk '{if ($8 != \"\") {print $5\" \"$8 }}'"
ret, lines = self._run_cmd(cmd)
if (len(lines) == 0):
logger.warning("list_files empty, path[%s]" % path_list)
return []
for line in lines:
arr = line.split(' ')
if len(arr) < 2:
continue
file_path = arr[1]
file_size = int(arr[0])
file_list.append({'path': file_path, 'size': file_size})
return file_list
......@@ -245,6 +245,15 @@ class FSTestBase(unittest.TestCase):
self.assertFalse(fs.is_dir(path))
fs.delete(path)
def _test_list_files_info(self, fs):
path = []
fs.list_files_info(path)
path = ["./list_files_info.flag"]
fs.list_files_info(path)
fs.touch(path, exist_ok=True)
fs.list_files_info(path)
fs.delete(path)
if __name__ == '__main__':
unittest.main()
......@@ -35,6 +35,7 @@ class FSTest2(FSTestBase):
self._test_rm(fs)
self._test_touch(fs)
self._test_dirs(fs)
self._test_list_files_info(fs)
def test_local(self):
fs = LocalFS()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册