From 6e4f39a0616ea7994e2a9dcefd7f0da798f7f25a Mon Sep 17 00:00:00 2001 From: xujiaqi01 <173596896@qq.com> Date: Wed, 5 Feb 2020 00:12:02 +0800 Subject: [PATCH] add hdfs ls retry time and sleep time, fix save inference (#22433) * add hdfs ls retry time and sleep time, fix save inference * test=develop --- python/paddle/fluid/incubate/fleet/utils/fleet_util.py | 4 ++-- python/paddle/fluid/incubate/fleet/utils/hdfs.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py index bca602fb8bf..50fde2c47bf 100644 --- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py +++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py @@ -920,7 +920,7 @@ class FleetUtil(object): feeded_var_names=feeded_var_names, target_vars=target_vars, executor=executor, - main_program=program, + main_program=program.clone(), params_filename="params") else: fluid.io.save_inference_model( @@ -928,7 +928,7 @@ class FleetUtil(object): feeded_var_names=feeded_var_names, target_vars=target_vars, executor=executor, - main_program=program) + main_program=program.clone()) configs = { "fs.default.name": hadoop_fs_name, diff --git a/python/paddle/fluid/incubate/fleet/utils/hdfs.py b/python/paddle/fluid/incubate/fleet/utils/hdfs.py index 7474d418911..23a22531a45 100644 --- a/python/paddle/fluid/incubate/fleet/utils/hdfs.py +++ b/python/paddle/fluid/incubate/fleet/utils/hdfs.py @@ -22,7 +22,7 @@ from datetime import datetime import re import copy import errno - +import time import logging __all__ = ["HDFSClient"] @@ -83,6 +83,7 @@ class HDFSClient(object): ret_code = 0 ret_out = None ret_err = None + retry_sleep_second = 3 whole_commands = " ".join(whole_commands) for x in range(retry_times + 1): proc = subprocess.Popen( @@ -99,6 +100,7 @@ class HDFSClient(object): if ret_code == 0: break + time.sleep(retry_sleep_second) return ret_code, ret_out, ret_err @@ -329,7 +331,7 @@ class HDFSClient(object): ls_commands = ['-ls', hdfs_path] returncode, output, errors = self.__run_hdfs_cmd( - ls_commands, retry_times=1) + ls_commands, retry_times=10) if returncode: _logger.error("HDFS list path: {} failed".format(hdfs_path)) -- GitLab