Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
2ef6188b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2ef6188b
编写于
7月 05, 2021
作者:
D
danleifeng
提交者:
GitHub
7月 05, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【HeterPS】fix hdfs and fleet_util for supporting save/load/infer (#33903)
* fix hdfs and fleet_util for supporting save/load infer;test=develop
上级
70100e4f
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
234 addition
and
61 deletion
+234
-61
paddle/fluid/framework/device_worker_factory.cc
paddle/fluid/framework/device_worker_factory.cc
+0
-5
paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt
paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt
+2
-2
paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h
...mework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h
+1
-1
paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h
paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h
+2
-2
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
+2
-4
python/paddle/distributed/fleet/utils/fs.py
python/paddle/distributed/fleet/utils/fs.py
+174
-11
python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py
...addle/fluid/incubate/fleet/parameter_server/pslib/node.py
+2
-0
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+9
-33
python/paddle/fluid/tests/unittests/hdfs_test_utils.py
python/paddle/fluid/tests/unittests/hdfs_test_utils.py
+41
-3
python/paddle/fluid/tests/unittests/test_hdfs3.py
python/paddle/fluid/tests/unittests/test_hdfs3.py
+1
-0
未找到文件。
paddle/fluid/framework/device_worker_factory.cc
浏览文件 @
2ef6188b
...
@@ -69,11 +69,6 @@ REGISTER_DEVICE_WORKER_CLASS(DownpourWorkerOpt);
...
@@ -69,11 +69,6 @@ REGISTER_DEVICE_WORKER_CLASS(DownpourWorkerOpt);
REGISTER_DEVICE_WORKER_CLASS
(
HeterCpuWorker
);
REGISTER_DEVICE_WORKER_CLASS
(
HeterCpuWorker
);
#endif
#endif
#if (defined PADDLE_WITH_NCCL || defined PADDLE_WITH_RCCL) && \
(defined PADDLE_WITH_PSLIB)
REGISTER_DEVICE_WORKER_CLASS
(
HeterBoxWorker
);
#endif
#if (defined PADDLE_WITH_NCCL || defined PADDLE_WITH_RCCL) && \
#if (defined PADDLE_WITH_NCCL || defined PADDLE_WITH_RCCL) && \
(defined PADDLE_WITH_PSLIB)
(defined PADDLE_WITH_PSLIB)
REGISTER_DEVICE_WORKER_CLASS
(
PSGPUWorker
);
REGISTER_DEVICE_WORKER_CLASS
(
PSGPUWorker
);
...
...
paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt
浏览文件 @
2ef6188b
...
@@ -8,11 +8,11 @@ IF(WITH_GPU)
...
@@ -8,11 +8,11 @@ IF(WITH_GPU)
SET
(
HETERPS_DEPS
${
HETERPS_DEPS
}
${
RPC_DEPS
}
)
SET
(
HETERPS_DEPS
${
HETERPS_DEPS
}
${
RPC_DEPS
}
)
endif
()
endif
()
nv_library
(
heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS
${
HETERPS_DEPS
}
)
nv_library
(
heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS
${
HETERPS_DEPS
}
)
nv_test
(
test_heter_comm SRCS
test_heter_comm.cu
feature_value.h DEPS heter_comm
)
nv_test
(
test_heter_comm SRCS feature_value.h DEPS heter_comm
)
nv_library
(
heter_ps SRCS heter_ps.cu DEPS heter_comm
)
nv_library
(
heter_ps SRCS heter_ps.cu DEPS heter_comm
)
ENDIF
()
ENDIF
()
IF
(
WITH_ROCM
)
IF
(
WITH_ROCM
)
hip_library
(
heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context
)
hip_library
(
heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context
)
hip_test
(
test_heter_comm SRCS
test_heter_comm.cu
feature_value.h DEPS heter_comm
)
hip_test
(
test_heter_comm SRCS feature_value.h DEPS heter_comm
)
hip_library
(
heter_ps SRCS heter_ps.cu DEPS heter_comm
)
hip_library
(
heter_ps SRCS heter_ps.cu DEPS heter_comm
)
ENDIF
()
ENDIF
()
paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h
浏览文件 @
2ef6188b
...
@@ -765,7 +765,7 @@ x.second );
...
@@ -765,7 +765,7 @@ x.second );
unsigned
long
long
get_num_collisions
()
const
{
return
m_collisions
;
}
unsigned
long
long
get_num_collisions
()
const
{
return
m_collisions
;
}
void
print
()
{
void
print
()
{
for
(
size_type
i
=
0
;
i
<
10
;
++
i
)
{
for
(
size_type
i
=
0
;
i
<
5
;
++
i
)
{
std
::
cout
<<
i
<<
": "
<<
m_hashtbl_values
[
i
].
first
<<
","
std
::
cout
<<
i
<<
": "
<<
m_hashtbl_values
[
i
].
first
<<
","
<<
m_hashtbl_values
[
i
].
second
<<
std
::
endl
;
<<
m_hashtbl_values
[
i
].
second
<<
std
::
endl
;
}
}
...
...
paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h
浏览文件 @
2ef6188b
...
@@ -115,7 +115,7 @@ void HeterComm<KeyType, ValType, GradType>::init_path() {
...
@@ -115,7 +115,7 @@ void HeterComm<KeyType, ValType, GradType>::init_path() {
path_
.
resize
(
total_gpu
);
path_
.
resize
(
total_gpu
);
if
(
!
topo_aware_
)
{
if
(
!
topo_aware_
)
{
VLOG
(
1
)
<<
"init path without topo aware"
;
VLOG
(
3
)
<<
"init path without topo aware"
;
for
(
int
i
=
0
;
i
<
total_gpu
;
++
i
)
{
for
(
int
i
=
0
;
i
<
total_gpu
;
++
i
)
{
path_
[
i
].
resize
(
total_gpu
);
path_
[
i
].
resize
(
total_gpu
);
for
(
int
j
=
0
;
j
<
total_gpu
;
++
j
)
{
for
(
int
j
=
0
;
j
<
total_gpu
;
++
j
)
{
...
@@ -130,7 +130,7 @@ void HeterComm<KeyType, ValType, GradType>::init_path() {
...
@@ -130,7 +130,7 @@ void HeterComm<KeyType, ValType, GradType>::init_path() {
}
}
}
}
}
else
{
}
else
{
VLOG
(
1
)
<<
"init path with topo aware"
;
VLOG
(
3
)
<<
"init path with topo aware"
;
for
(
int
i
=
0
;
i
<
total_gpu
;
++
i
)
{
for
(
int
i
=
0
;
i
<
total_gpu
;
++
i
)
{
path_
[
i
].
resize
(
total_gpu
);
path_
[
i
].
resize
(
total_gpu
);
for
(
int
j
=
0
;
j
<
total_gpu
;
++
j
)
{
for
(
int
j
=
0
;
j
<
total_gpu
;
++
j
)
{
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
浏览文件 @
2ef6188b
...
@@ -68,8 +68,6 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
...
@@ -68,8 +68,6 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
thread_keys_
.
resize
(
thread_keys_thread_num_
);
thread_keys_
.
resize
(
thread_keys_thread_num_
);
for
(
int
i
=
0
;
i
<
thread_keys_thread_num_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
thread_keys_thread_num_
;
i
++
)
{
thread_keys_
[
i
].
resize
(
thread_keys_shard_num_
);
thread_keys_
[
i
].
resize
(
thread_keys_shard_num_
);
for
(
int
j
=
0
;
j
<
thread_keys_shard_num_
;
j
++
)
{
}
}
}
const
std
::
deque
<
Record
>&
vec_data
=
input_channel
->
GetData
();
const
std
::
deque
<
Record
>&
vec_data
=
input_channel
->
GetData
();
size_t
total_len
=
vec_data
.
size
();
size_t
total_len
=
vec_data
.
size
();
...
@@ -255,7 +253,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
...
@@ -255,7 +253,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
}
}
}
}
#endif
#endif
VLOG
(
1
)
<<
"GpuPs build hbmps done"
;
VLOG
(
3
)
<<
"GpuPs build hbmps done"
;
device_mutex
[
dev
]
->
unlock
();
device_mutex
[
dev
]
->
unlock
();
}
}
...
@@ -295,7 +293,7 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
...
@@ -295,7 +293,7 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
HeterPs_
=
HeterPsBase
::
get_instance
(
size_max
,
resource_
);
HeterPs_
=
HeterPsBase
::
get_instance
(
size_max
,
resource_
);
HeterPs_
->
set_nccl_comm_and_size
(
inner_comms_
,
inter_comms_
,
node_size_
);
HeterPs_
->
set_nccl_comm_and_size
(
inner_comms_
,
inter_comms_
,
node_size_
);
auto
build_func
=
[
this
,
&
gpu_task
,
&
feature_keys_count
](
int
i
)
{
auto
build_func
=
[
this
,
&
gpu_task
,
&
feature_keys_count
](
int
i
)
{
std
::
cout
<<
"building table: "
<<
i
<<
std
::
endl
;
VLOG
(
3
)
<<
"building table: "
<<
i
;
this
->
HeterPs_
->
build_ps
(
i
,
gpu_task
->
device_keys_
[
i
].
data
(),
this
->
HeterPs_
->
build_ps
(
i
,
gpu_task
->
device_keys_
[
i
].
data
(),
gpu_task
->
device_values_
[
i
].
data
(),
gpu_task
->
device_values_
[
i
].
data
(),
feature_keys_count
[
i
],
500000
,
2
);
feature_keys_count
[
i
],
500000
,
2
);
...
...
python/paddle/distributed/fleet/utils/fs.py
浏览文件 @
2ef6188b
...
@@ -111,6 +111,10 @@ class FS(object):
...
@@ -111,6 +111,10 @@ class FS(object):
def
touch
(
self
,
fs_path
,
exist_ok
=
True
):
def
touch
(
self
,
fs_path
,
exist_ok
=
True
):
raise
NotImplementedError
raise
NotImplementedError
@
abc
.
abstractmethod
def
cat
(
self
,
fs_path
=
None
):
raise
NotImplementedError
class
LocalFS
(
FS
):
class
LocalFS
(
FS
):
"""
"""
...
@@ -676,14 +680,35 @@ class HDFSClient(FS):
...
@@ -676,14 +680,35 @@ class HDFSClient(FS):
return
True
return
True
def
upload_dir
(
self
,
local_dir
,
dest_dir
,
overwrite
=
False
):
"""
upload dir to hdfs
Args:
local_dir(str): local dir
dest_dir(str): hdfs dest dir
overwrite(bool): is overwrite
Returns:
return code
"""
local_dir
=
local_dir
.
rstrip
(
"/"
)
dest_dir
=
dest_dir
.
rstrip
(
"/"
)
local_basename
=
os
.
path
.
basename
(
local_dir
)
if
self
.
is_exist
(
dest_dir
+
"/"
+
local_basename
)
and
overwrite
:
self
.
delete
(
dest_dir
+
"/"
+
local_basename
)
if
not
self
.
is_exist
(
dest_dir
):
self
.
mkdirs
(
dest_dir
)
self
.
_try_upload
(
local_dir
,
dest_dir
)
# can't retry
# can't retry
def
upload
(
self
,
local_path
,
fs_path
):
def
upload
(
self
,
local_path
,
fs_path
,
multi_processes
=
1
,
overwrite
=
False
):
"""
"""
Upload the local path to remote HDFS.
Upload the local path to remote HDFS.
Args:
Args:
local_path(str): The local path.
local_path(str): The local path.
fs_path(str): The HDFS path.
fs_path(str): The HDFS path.
multi_processes(int|1): the upload data process at the same time, default=5
overwrite(bool|False): will overwrite file on HDFS or not
Examples:
Examples:
...
@@ -700,21 +725,67 @@ class HDFSClient(FS):
...
@@ -700,21 +725,67 @@ class HDFSClient(FS):
client = HDFSClient(hadoop_home, configs)
client = HDFSClient(hadoop_home, configs)
client.upload("test_hdfs_client", "hdfs:/test_hdfs_client")
client.upload("test_hdfs_client", "hdfs:/test_hdfs_client")
"""
"""
if
self
.
is_exist
(
fs_path
):
raise
FSFileExistsError
(
"{} exists"
.
format
(
fs_path
))
def
__subprocess_upload
(
hdfs_path_single
,
datas
):
for
data
in
datas
:
self
.
_try_upload
(
data
,
hdfs_path_single
)
def
get_local_files
(
path
):
"""
get local files
Args:
path(str): local path
Returns:
list of local files
"""
rlist
=
[]
if
not
os
.
path
.
exists
(
path
):
return
rlist
if
os
.
path
.
isdir
(
path
):
for
file
in
os
.
listdir
(
path
):
t
=
os
.
path
.
join
(
path
,
file
)
rlist
.
append
(
t
)
else
:
rlist
.
append
(
path
)
return
rlist
local
=
LocalFS
()
local
=
LocalFS
()
if
not
local
.
is_exist
(
local_path
):
if
not
local
.
is_exist
(
local_path
):
raise
FSFileNotExistsError
(
"{} not exists"
.
format
(
local_path
))
raise
FSFileNotExistsError
(
"{} not exists"
.
format
(
local_path
))
# upload_dir
if
local
.
is_dir
(
local_path
):
self
.
upload_dir
(
local_path
,
fs_path
,
overwrite
=
overwrite
)
return
# upload files
all_files
=
get_local_files
(
local_path
)
if
not
all_files
:
print
(
"there are nothing need to upload, function exit"
)
return
if
self
.
is_exist
(
fs_path
)
and
overwrite
:
self
.
delete
(
fs_path
)
self
.
mkdirs
(
fs_path
)
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
self
.
_split_files
(
all_files
,
i
,
multi_processes
)
p
=
multiprocessing
.
Process
(
target
=
__subprocess_upload
,
args
=
(
fs_path
,
process_datas
))
procs
.
append
(
p
)
p
.
start
()
return
self
.
_try_upload
(
local_path
,
fs_path
)
# complete the processes
for
proc
in
procs
:
proc
.
join
()
@
_handle_errors
()
@
_handle_errors
()
def
_try_upload
(
self
,
local_path
,
fs_path
):
def
_try_upload
(
self
,
local_path
,
fs_path
):
cmd
=
"put {} {}"
.
format
(
local_path
,
fs_path
)
cmd
=
"put {} {}"
.
format
(
local_path
,
fs_path
)
ret
=
0
ret
=
0
try
:
try
:
ret
,
lines
=
self
.
_run_cmd
(
cmd
)
ret
,
_
=
self
.
_run_cmd
(
cmd
)
if
ret
!=
0
:
if
ret
!=
0
:
raise
ExecuteError
(
cmd
)
raise
ExecuteError
(
cmd
)
except
Exception
as
e
:
except
Exception
as
e
:
...
@@ -722,13 +793,15 @@ class HDFSClient(FS):
...
@@ -722,13 +793,15 @@ class HDFSClient(FS):
raise
e
raise
e
# can't retry
# can't retry
def
download
(
self
,
fs_path
,
local_path
):
def
download
(
self
,
fs_path
,
local_path
,
multi_processes
=
1
,
overwrite
=
False
):
"""
"""
Download remote HDFS path to the local.
Download remote HDFS path to the local.
Args:
Args:
fs_path(str): The HDFS path.
fs_path(str): The HDFS path.
local_path(str): The local path.
local_path(str): The local path.
multi_processes(int|1): the download data process at the same time, default=1
overwrite(bool): is overwrite
Examples:
Examples:
...
@@ -745,17 +818,43 @@ class HDFSClient(FS):
...
@@ -745,17 +818,43 @@ class HDFSClient(FS):
client = HDFSClient(hadoop_home, configs)
client = HDFSClient(hadoop_home, configs)
client.download("hdfs:/test_hdfs_client", "./")
client.download("hdfs:/test_hdfs_client", "./")
"""
"""
def
__subprocess_download
(
local_path
,
datas
):
"""
download file from HDFS
Args:
local_path(str): the local file path
datas(str): the hdfs file path list
"""
for
data
in
datas
:
self
.
_try_download
(
data
,
local_path
)
if
not
self
.
is_exist
(
fs_path
):
if
not
self
.
is_exist
(
fs_path
):
raise
FSFileNotExistsError
(
"{} not exits"
.
format
(
fs_path
))
raise
FSFileNotExistsError
(
"{} not exits"
.
format
(
fs_path
))
# download file
return
self
.
_try_download
(
fs_path
,
local_path
)
if
self
.
is_file
(
fs_path
):
return
self
.
_try_download
(
fs_path
,
local_path
)
# download dir
_
,
all_files
=
self
.
ls_dir
(
fs_path
)
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
self
.
_split_files
(
all_files
,
i
,
multi_processes
)
p
=
multiprocessing
.
Process
(
target
=
__subprocess_download
,
args
=
(
local_path
,
process_datas
))
procs
.
append
(
p
)
p
.
start
()
# complete the processes
for
proc
in
procs
:
proc
.
join
()
@
_handle_errors
()
@
_handle_errors
()
def
_try_download
(
self
,
fs_path
,
local_path
):
def
_try_download
(
self
,
fs_path
,
local_path
):
cmd
=
"get {} {}"
.
format
(
fs_path
,
local_path
)
cmd
=
"get {} {}"
.
format
(
fs_path
,
local_path
)
ret
=
0
ret
=
0
try
:
try
:
ret
,
lines
=
self
.
_run_cmd
(
cmd
)
ret
,
_
=
self
.
_run_cmd
(
cmd
)
if
ret
!=
0
:
if
ret
!=
0
:
raise
ExecuteError
(
cmd
)
raise
ExecuteError
(
cmd
)
except
Exception
as
e
:
except
Exception
as
e
:
...
@@ -803,7 +902,7 @@ class HDFSClient(FS):
...
@@ -803,7 +902,7 @@ class HDFSClient(FS):
if
out_hdfs
and
not
self
.
is_exist
(
fs_path
):
if
out_hdfs
and
not
self
.
is_exist
(
fs_path
):
cmd
=
"mkdir -p {}"
.
format
(
fs_path
)
cmd
=
"mkdir -p {}"
.
format
(
fs_path
)
ret
,
lines
=
self
.
_run_cmd
(
cmd
)
ret
,
_
=
self
.
_run_cmd
(
cmd
)
if
ret
!=
0
:
if
ret
!=
0
:
raise
ExecuteError
(
cmd
)
raise
ExecuteError
(
cmd
)
...
@@ -939,7 +1038,71 @@ class HDFSClient(FS):
...
@@ -939,7 +1038,71 @@ class HDFSClient(FS):
cmd
=
"touchz {}"
.
format
(
fs_path
)
cmd
=
"touchz {}"
.
format
(
fs_path
)
ret
,
_
=
self
.
_run_cmd
(
cmd
)
ret
,
_
=
self
.
_run_cmd
(
cmd
)
if
ret
!=
0
:
if
ret
!=
0
:
raise
ExecuteError
raise
ExecuteError
(
cmd
)
def
need_upload_download
(
self
):
def
need_upload_download
(
self
):
return
True
return
True
def
cat
(
self
,
fs_path
=
None
):
"""
Cat a remote HDFS file.
Args:
fs_path(str): The HDFS file path.
Returns:
file content
Examples:
.. code-block:: text
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
"fs.default.name": "hdfs://xxx.hadoop.com:54310",
"hadoop.job.ugi": "hello,hello123"
}
client = HDFSClient(hadoop_home, configs)
client.cat("hdfs:/test_hdfs_client")
"""
if
self
.
is_file
(
fs_path
):
output
=
self
.
_try_cat
(
fs_path
)
return
"
\n
"
.
join
(
output
)
else
:
return
""
@
_handle_errors
()
def
_try_cat
(
self
,
fs_path
):
cmd
=
"cat {}"
.
format
(
fs_path
)
ret
,
output
=
self
.
_run_cmd
(
cmd
)
if
ret
!=
0
:
raise
ExecuteError
(
cmd
)
return
output
def
_split_files
(
self
,
files
,
trainer_id
,
trainers
):
"""
split file list
Args:
files(list): file list
trainer_id(int): trainer mpi rank id
trainers(int): all trainers num
Returns:
fileist(list): file list of current trainer
"""
remainder
=
len
(
files
)
%
trainers
blocksize
=
len
(
files
)
//
trainers
blocks
=
[
blocksize
]
*
trainers
for
i
in
range
(
remainder
):
blocks
[
i
]
+=
1
trainer_files
=
[[]]
*
trainers
begin
=
0
for
i
in
range
(
trainers
):
trainer_files
[
i
]
=
files
[
begin
:
begin
+
blocks
[
i
]]
begin
+=
blocks
[
i
]
return
trainer_files
[
trainer_id
]
python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py
浏览文件 @
2ef6188b
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
"""Defination of Server and Worker."""
"""Defination of Server and Worker."""
from
.
import
ps_pb2
as
pslib
from
.
import
ps_pb2
as
pslib
# NOTE: reduce removed in fuctools in python3
from
functools
import
reduce
class
Server
(
object
):
class
Server
(
object
):
...
...
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
浏览文件 @
2ef6188b
...
@@ -435,11 +435,7 @@ class FleetUtil(object):
...
@@ -435,11 +435,7 @@ class FleetUtil(object):
f
.
write
(
pre_content
+
"
\n
"
)
f
.
write
(
pre_content
+
"
\n
"
)
f
.
write
(
content
+
"
\n
"
)
f
.
write
(
content
+
"
\n
"
)
client
.
delete
(
donefile_path
)
client
.
delete
(
donefile_path
)
client
.
upload
(
client
.
upload
(
donefile_name
,
output_path
)
output_path
,
donefile_name
,
multi_processes
=
1
,
overwrite
=
False
)
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
(
day
,
pass_id
,
donefile_name
))
(
day
,
pass_id
,
donefile_name
))
else
:
else
:
...
@@ -448,11 +444,7 @@ class FleetUtil(object):
...
@@ -448,11 +444,7 @@ class FleetUtil(object):
else
:
else
:
with
open
(
donefile_name
,
"w"
)
as
f
:
with
open
(
donefile_name
,
"w"
)
as
f
:
f
.
write
(
content
+
"
\n
"
)
f
.
write
(
content
+
"
\n
"
)
client
.
upload
(
client
.
upload
(
donefile_name
,
output_path
)
output_path
,
donefile_name
,
multi_processes
=
1
,
overwrite
=
False
)
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
(
day
,
pass_id
,
donefile_name
))
(
day
,
pass_id
,
donefile_name
))
fleet
.
_role_maker
.
_barrier_worker
()
fleet
.
_role_maker
.
_barrier_worker
()
...
@@ -547,11 +539,7 @@ class FleetUtil(object):
...
@@ -547,11 +539,7 @@ class FleetUtil(object):
f
.
write
(
pre_content
+
"
\n
"
)
f
.
write
(
pre_content
+
"
\n
"
)
f
.
write
(
xbox_str
+
"
\n
"
)
f
.
write
(
xbox_str
+
"
\n
"
)
client
.
delete
(
donefile_path
)
client
.
delete
(
donefile_path
)
client
.
upload
(
client
.
upload
(
donefile_name
,
output_path
)
output_path
,
donefile_name
,
multi_processes
=
1
,
overwrite
=
False
)
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
(
day
,
pass_id
,
donefile_name
))
(
day
,
pass_id
,
donefile_name
))
else
:
else
:
...
@@ -560,11 +548,7 @@ class FleetUtil(object):
...
@@ -560,11 +548,7 @@ class FleetUtil(object):
else
:
else
:
with
open
(
donefile_name
,
"w"
)
as
f
:
with
open
(
donefile_name
,
"w"
)
as
f
:
f
.
write
(
xbox_str
+
"
\n
"
)
f
.
write
(
xbox_str
+
"
\n
"
)
client
.
upload
(
client
.
upload
(
donefile_name
,
output_path
)
output_path
,
donefile_name
,
multi_processes
=
1
,
overwrite
=
False
)
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
self
.
rank0_error
(
"write %s/%s %s succeed"
%
\
(
day
,
pass_id
,
donefile_name
))
(
day
,
pass_id
,
donefile_name
))
fleet
.
_role_maker
.
_barrier_worker
()
fleet
.
_role_maker
.
_barrier_worker
()
...
@@ -638,11 +622,7 @@ class FleetUtil(object):
...
@@ -638,11 +622,7 @@ class FleetUtil(object):
%
(
file_num
,
key_num
)
%
(
file_num
,
key_num
)
with
open
(
donefile_name
,
"w"
)
as
f
:
with
open
(
donefile_name
,
"w"
)
as
f
:
f
.
write
(
meta_str
)
f
.
write
(
meta_str
)
client
.
upload
(
client
.
upload
(
donefile_name
,
model_path
)
model_path
,
donefile_name
,
multi_processes
=
1
,
overwrite
=
False
)
self
.
rank0_error
(
"write %s succeed"
%
donefile_path
)
self
.
rank0_error
(
"write %s succeed"
%
donefile_path
)
fleet
.
_role_maker
.
_barrier_worker
()
fleet
.
_role_maker
.
_barrier_worker
()
...
@@ -962,7 +942,7 @@ class FleetUtil(object):
...
@@ -962,7 +942,7 @@ class FleetUtil(object):
if
not
client
.
is_exist
(
dest
):
if
not
client
.
is_exist
(
dest
):
client
.
makedirs
(
dest
)
client
.
makedirs
(
dest
)
client
.
upload
(
dest
,
model_nam
e
)
client
.
upload
(
model_name
,
dest
,
multi_processes
=
5
,
overwrite
=
Tru
e
)
fleet
.
_role_maker
.
_barrier_worker
()
fleet
.
_role_maker
.
_barrier_worker
()
...
@@ -1059,12 +1039,8 @@ class FleetUtil(object):
...
@@ -1059,12 +1039,8 @@ class FleetUtil(object):
dest
=
"%s/%s/delta-%s/dnn_plugin/"
%
(
output_path
,
day
,
dest
=
"%s/%s/delta-%s/dnn_plugin/"
%
(
output_path
,
day
,
pass_id
)
pass_id
)
if
not
client
.
is_exist
(
dest
):
if
not
client
.
is_exist
(
dest
):
client
.
makedirs
(
dest
)
client
.
mkdirs
(
dest
)
client
.
upload
(
model_name
,
dest
,
multi_processes
=
5
,
overwrite
=
True
)
if
os
.
path
.
isdir
(
model_name
):
client
.
upload_dir
(
dest
,
model_name
)
else
:
client
.
upload
(
dest
,
model_name
)
fleet
.
_role_maker
.
_barrier_worker
()
fleet
.
_role_maker
.
_barrier_worker
()
...
@@ -1248,7 +1224,7 @@ class FleetUtil(object):
...
@@ -1248,7 +1224,7 @@ class FleetUtil(object):
start
=
0
start
=
0
split_path
=
[]
split_path
=
[]
for
i
in
range
(
splits_per_day
):
for
i
in
range
(
splits_per_day
):
h
=
start
/
60
h
=
start
/
/
60
m
=
start
%
60
m
=
start
%
60
if
h
<
left_train_hour
or
h
>
right_train_hour
:
if
h
<
left_train_hour
or
h
>
right_train_hour
:
start
+=
split_interval
start
+=
split_interval
...
...
python/paddle/fluid/tests/unittests/hdfs_test_utils.py
浏览文件 @
2ef6188b
...
@@ -110,6 +110,24 @@ class FSTestBase(unittest.TestCase):
...
@@ -110,6 +110,24 @@ class FSTestBase(unittest.TestCase):
fs
.
delete
(
dst_file
)
fs
.
delete
(
dst_file
)
fs
.
delete
(
src_file
)
fs
.
delete
(
src_file
)
def
_test_upload_dir
(
self
,
fs
):
# upload dir
src_file
=
os
.
path
.
abspath
(
"./test_upload_dir"
)
dst_file
=
os
.
path
.
abspath
(
"./test_uolpad_dir"
)
file1
=
os
.
path
.
abspath
(
"./test_upload_dir/file1"
)
file2
=
os
.
path
.
abspath
(
"./test_upload_dir/file2"
)
local
=
LocalFS
()
local
.
mkdirs
(
src_file
)
local
.
touch
(
file1
)
local
.
touch
(
file2
)
fs
.
upload
(
src_file
,
dst_file
)
self
.
assertTrue
(
fs
.
is_exist
(
dst_file
))
fs
.
delete
(
dst_file
)
local
.
delete
(
src_file
)
def
_test_try_download
(
self
,
fs
):
def
_test_try_download
(
self
,
fs
):
src_file
=
os
.
path
.
abspath
(
"./test_try_download.src"
)
src_file
=
os
.
path
.
abspath
(
"./test_try_download.src"
)
dst_file
=
os
.
path
.
abspath
(
"./test_try_download.dst"
)
dst_file
=
os
.
path
.
abspath
(
"./test_try_download.dst"
)
...
@@ -152,15 +170,35 @@ class FSTestBase(unittest.TestCase):
...
@@ -152,15 +170,35 @@ class FSTestBase(unittest.TestCase):
pass
pass
local
=
LocalFS
()
local
=
LocalFS
()
local
.
touch
(
src_file
)
fs
.
touch
(
src_file
)
fs
.
delete
(
dst_file
)
local
.
delete
(
dst_file
)
assert
fs
.
need_upload_download
()
assert
fs
.
need_upload_download
()
self
.
assertFalse
(
fs
.
is_exist
(
dst_file
))
fs
.
download
(
src_file
,
dst_file
)
self
.
assertTrue
(
local
.
is_exist
(
dst_file
))
local
.
delete
(
dst_file
)
fs
.
delete
(
src_file
)
def
_test_download_dir
(
self
,
fs
):
src_file
=
os
.
path
.
abspath
(
"./test_download_dir_src"
)
dst_file
=
os
.
path
.
abspath
(
"./test_download_dir_dst"
)
file1
=
os
.
path
.
abspath
(
"./test_download_dir_src/file1"
)
file2
=
os
.
path
.
abspath
(
"./test_download_dir_src/file2"
)
fs
.
delete
(
dst_file
)
fs
.
delete
(
dst_file
)
fs
.
delete
(
src_file
)
fs
.
delete
(
src_file
)
fs
.
mkdirs
(
src_file
)
fs
.
touch
(
file1
)
fs
.
touch
(
file2
)
fs
.
download
(
src_file
,
dst_file
)
self
.
assertTrue
(
local
.
is_exist
(
dst_file
))
local
=
LocalFS
()
local
.
delete
(
dst_file
)
fs
.
delete
(
src_file
)
def
_test_mkdirs
(
self
,
fs
):
def
_test_mkdirs
(
self
,
fs
):
dir_name
=
"./test_mkdir"
dir_name
=
"./test_mkdir"
fs
.
mkdirs
(
dir_name
)
fs
.
mkdirs
(
dir_name
)
...
...
python/paddle/fluid/tests/unittests/test_hdfs3.py
浏览文件 @
2ef6188b
...
@@ -38,6 +38,7 @@ class FSTest3(FSTestBase):
...
@@ -38,6 +38,7 @@ class FSTest3(FSTestBase):
self
.
_test_try_download
(
fs
)
self
.
_test_try_download
(
fs
)
self
.
_test_upload
(
fs
)
self
.
_test_upload
(
fs
)
self
.
_test_upload_dir
(
fs
)
self
.
_test_download
(
fs
)
self
.
_test_download
(
fs
)
def
test_local
(
self
):
def
test_local
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录