Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
94a3789f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
94a3789f
编写于
4月 10, 2020
作者:
H
hutuxian
提交者:
GitHub
4月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add AfsAPI in PaddleBox (#23419)
* Involves AfsAPI to resolve slow downloading. * Mainly used in PaddleBox
上级
d7dd4e1d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
247 addition
and
7 deletion
+247
-7
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+11
-2
paddle/fluid/framework/fleet/box_wrapper.cc
paddle/fluid/framework/fleet/box_wrapper.cc
+1
-0
paddle/fluid/framework/fleet/box_wrapper.h
paddle/fluid/framework/fleet/box_wrapper.h
+216
-0
paddle/fluid/pybind/box_helper_py.cc
paddle/fluid/pybind/box_helper_py.cc
+2
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-5
python/paddle/fluid/tests/unittests/test_boxps.py
python/paddle/fluid/tests/unittests/test_boxps.py
+10
-0
未找到文件。
paddle/fluid/framework/data_feed.cc
浏览文件 @
94a3789f
...
@@ -370,8 +370,17 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
...
@@ -370,8 +370,17 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
while
(
this
->
PickOneFile
(
&
filename
))
{
while
(
this
->
PickOneFile
(
&
filename
))
{
VLOG
(
3
)
<<
"PickOneFile, filename="
<<
filename
VLOG
(
3
)
<<
"PickOneFile, filename="
<<
filename
<<
", thread_id="
<<
thread_id_
;
<<
", thread_id="
<<
thread_id_
;
int
err_no
=
0
;
#ifdef PADDLE_WITH_BOX_PS
this
->
fp_
=
fs_open_read
(
filename
,
&
err_no
,
this
->
pipe_command_
);
if
(
BoxWrapper
::
GetInstance
()
->
UseAfsApi
())
{
this
->
fp_
=
BoxWrapper
::
GetInstance
()
->
afs_manager
->
GetFile
(
filename
,
this
->
pipe_command_
);
}
else
{
#endif
int
err_no
=
0
;
this
->
fp_
=
fs_open_read
(
filename
,
&
err_no
,
this
->
pipe_command_
);
#ifdef PADDLE_WITH_BOX_PS
}
#endif
CHECK
(
this
->
fp_
!=
nullptr
);
CHECK
(
this
->
fp_
!=
nullptr
);
__fsetlocking
(
&*
(
this
->
fp_
),
FSETLOCKING_BYCALLER
);
__fsetlocking
(
&*
(
this
->
fp_
),
FSETLOCKING_BYCALLER
);
paddle
::
framework
::
ChannelWriter
<
T
>
writer
(
input_channel_
);
paddle
::
framework
::
ChannelWriter
<
T
>
writer
(
input_channel_
);
...
...
paddle/fluid/framework/fleet/box_wrapper.cc
浏览文件 @
94a3789f
...
@@ -27,6 +27,7 @@ namespace framework {
...
@@ -27,6 +27,7 @@ namespace framework {
std
::
shared_ptr
<
BoxWrapper
>
BoxWrapper
::
s_instance_
=
nullptr
;
std
::
shared_ptr
<
BoxWrapper
>
BoxWrapper
::
s_instance_
=
nullptr
;
cudaStream_t
BoxWrapper
::
stream_list_
[
8
];
cudaStream_t
BoxWrapper
::
stream_list_
[
8
];
std
::
shared_ptr
<
boxps
::
BoxPSBase
>
BoxWrapper
::
boxps_ptr_
=
nullptr
;
std
::
shared_ptr
<
boxps
::
BoxPSBase
>
BoxWrapper
::
boxps_ptr_
=
nullptr
;
AfsManager
*
BoxWrapper
::
afs_manager
=
nullptr
;
void
BasicAucCalculator
::
compute
()
{
void
BasicAucCalculator
::
compute
()
{
double
*
table
[
2
]
=
{
&
_table
[
0
][
0
],
&
_table
[
1
][
0
]};
double
*
table
[
2
]
=
{
&
_table
[
0
][
0
],
&
_table
[
1
][
0
]};
...
...
paddle/fluid/framework/fleet/box_wrapper.h
浏览文件 @
94a3789f
...
@@ -15,7 +15,13 @@ limitations under the License. */
...
@@ -15,7 +15,13 @@ limitations under the License. */
#pragma once
#pragma once
#ifdef PADDLE_WITH_BOX_PS
#ifdef PADDLE_WITH_BOX_PS
#include <afs_filesystem.h>
#include <boxps_public.h>
#include <boxps_public.h>
#include <dirent.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#endif
#endif
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
...
@@ -36,6 +42,7 @@ limitations under the License. */
...
@@ -36,6 +42,7 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/string/string_helper.h"
#define BUF_SIZE 1024 * 1024
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -120,6 +127,203 @@ class BasicAucCalculator {
...
@@ -120,6 +127,203 @@ class BasicAucCalculator {
std
::
mutex
_table_mutex
;
std
::
mutex
_table_mutex
;
};
};
class
AfsStreamFile
{
public:
explicit
AfsStreamFile
(
afs
::
AfsFileSystem
*
afsfile
)
:
afsfile_
(
afsfile
),
reader_
(
nullptr
)
{}
virtual
~
AfsStreamFile
()
{
if
(
reader_
!=
NULL
)
{
afsfile_
->
CloseReader
(
reader_
);
reader_
=
NULL
;
}
}
virtual
int
Open
(
const
char
*
path
)
{
if
(
path
==
NULL
)
{
return
-
1
;
}
reader_
=
afsfile_
->
OpenReader
(
path
);
PADDLE_ENFORCE_NE
(
reader_
,
nullptr
,
platform
::
errors
::
PreconditionNotMet
(
"OpenReader for file[%s] failed."
,
path
));
return
0
;
}
virtual
int
Read
(
char
*
buf
,
int
len
)
{
int
ret
=
reader_
->
Read
(
buf
,
len
);
return
ret
;
}
private:
afs
::
AfsFileSystem
*
afsfile_
;
afs
::
Reader
*
reader_
;
};
class
AfsManager
{
public:
AfsManager
(
const
std
::
string
&
fs_name
,
const
std
::
string
&
fs_ugi
,
const
std
::
string
&
conf_path
)
{
auto
split
=
fs_ugi
.
find
(
","
);
std
::
string
user
=
fs_ugi
.
substr
(
0
,
split
);
std
::
string
pwd
=
fs_ugi
.
substr
(
split
+
1
);
_afshandler
=
new
afs
::
AfsFileSystem
(
fs_name
.
c_str
(),
user
.
c_str
(),
pwd
.
c_str
(),
conf_path
.
c_str
());
VLOG
(
0
)
<<
"AFSAPI Init: user: "
<<
user
<<
", pwd: "
<<
pwd
;
int
ret
=
_afshandler
->
Init
(
true
,
true
);
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Called AFSAPI Init Interface Failed."
));
ret
=
_afshandler
->
Connect
();
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Called AFSAPI Connect Interface Failed"
));
}
virtual
~
AfsManager
()
{
if
(
_afshandler
!=
NULL
)
{
_afshandler
->
DisConnect
();
_afshandler
->
Destroy
();
delete
_afshandler
;
_afshandler
=
nullptr
;
}
}
static
void
ReadFromAfs
(
const
std
::
string
&
path
,
FILE
*
wfp
,
afs
::
AfsFileSystem
*
_afshandler
)
{
AfsStreamFile
*
read_stream
=
new
AfsStreamFile
(
_afshandler
);
int
ret
=
read_stream
->
Open
(
path
.
c_str
());
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Called AFSAPI Open file %s Failed."
,
path
.
c_str
()));
char
*
_buff
=
static_cast
<
char
*>
(
calloc
(
BUF_SIZE
+
2
,
sizeof
(
char
)));
int
size
=
0
;
while
((
size
=
read_stream
->
Read
(
_buff
,
BUF_SIZE
))
>
0
)
{
fwrite
(
_buff
,
1
,
size
,
wfp
);
}
fflush
(
wfp
);
fclose
(
wfp
);
delete
_buff
;
delete
read_stream
;
}
int
PopenBidirectionalInternal
(
const
char
*
command
,
FILE
*&
fp_read
,
// NOLINT
FILE
*&
fp_write
,
pid_t
&
pid
,
// NOLINT
bool
read
,
// NOLINT
bool
write
)
{
std
::
lock_guard
<
std
::
mutex
>
g
(
g_flock
);
int
fd_read
[
2
];
int
fd_write
[
2
];
if
(
read
)
{
if
(
pipe
(
fd_read
)
!=
0
)
{
LOG
(
FATAL
)
<<
"create read pipe failed"
;
return
-
1
;
}
}
if
(
write
)
{
if
(
pipe
(
fd_write
)
!=
0
)
{
LOG
(
FATAL
)
<<
"create write pipe failed"
;
return
-
1
;
}
}
pid
=
vfork
();
if
(
pid
<
0
)
{
LOG
(
FATAL
)
<<
"fork failed"
;
return
-
1
;
}
if
(
pid
==
0
)
{
if
(
read
)
{
if
(
-
1
==
dup2
(
fd_read
[
1
],
STDOUT_FILENO
))
{
LOG
(
FATAL
)
<<
"dup2 failed"
;
}
close
(
fd_read
[
1
]);
close
(
fd_read
[
0
]);
}
if
(
write
)
{
if
(
-
1
==
dup2
(
fd_write
[
0
],
STDIN_FILENO
))
{
LOG
(
FATAL
)
<<
"dup2 failed"
;
}
close
(
fd_write
[
0
]);
close
(
fd_write
[
1
]);
}
struct
dirent
*
item
;
DIR
*
dir
=
opendir
(
"/proc/self/fd"
);
while
((
item
=
readdir
(
dir
))
!=
NULL
)
{
int
fd
=
atoi
(
item
->
d_name
);
if
(
fd
>=
3
)
{
(
void
)
close
(
fd
);
}
}
closedir
(
dir
);
execl
(
"/bin/sh"
,
"sh"
,
"-c"
,
command
,
NULL
);
exit
(
127
);
}
else
{
if
(
read
)
{
close
(
fd_read
[
1
]);
fcntl
(
fd_read
[
0
],
F_SETFD
,
FD_CLOEXEC
);
fp_read
=
fdopen
(
fd_read
[
0
],
"r"
);
if
(
0
==
fp_read
)
{
LOG
(
FATAL
)
<<
"fdopen failed."
;
return
-
1
;
}
}
if
(
write
)
{
close
(
fd_write
[
0
]);
fcntl
(
fd_write
[
1
],
F_SETFD
,
FD_CLOEXEC
);
fp_write
=
fdopen
(
fd_write
[
1
],
"w"
);
if
(
0
==
fp_write
)
{
LOG
(
FATAL
)
<<
"fdopen failed."
;
return
-
1
;
}
}
return
0
;
}
}
std
::
shared_ptr
<
FILE
>
GetFile
(
const
std
::
string
&
path
,
const
std
::
string
&
pipe_command
)
{
pid_t
pid
=
0
;
FILE
*
wfp
=
NULL
;
FILE
*
rfp
=
NULL
;
// Always use set -eo pipefail. Fail fast and be aware of exit codes.
std
::
string
cmd
=
"set -eo pipefail; "
+
pipe_command
;
int
ret
=
PopenBidirectionalInternal
(
cmd
.
c_str
(),
rfp
,
wfp
,
pid
,
true
,
true
);
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Called PopenBidirectionalInternal Failed"
));
std
::
string
filename
(
path
);
if
(
strncmp
(
filename
.
c_str
(),
"afs:"
,
4
)
==
0
)
{
filename
=
filename
.
substr
(
4
);
}
std
::
thread
read_thread
(
&
AfsManager
::
ReadFromAfs
,
filename
,
wfp
,
_afshandler
);
read_thread
.
detach
();
return
{
rfp
,
[
pid
,
cmd
](
FILE
*
rfp
)
{
int
wstatus
=
-
1
;
int
ret
=
-
1
;
do
{
ret
=
waitpid
(
pid
,
&
wstatus
,
0
);
}
while
(
ret
==
-
1
&&
errno
==
EINTR
);
fclose
(
rfp
);
if
(
wstatus
==
0
||
wstatus
==
(
128
+
SIGPIPE
)
*
256
||
(
wstatus
==
-
1
&&
errno
==
ECHILD
))
{
VLOG
(
3
)
<<
"pclose_bidirectional pid["
<<
pid
<<
"], status["
<<
wstatus
<<
"]"
;
}
else
{
LOG
(
WARNING
)
<<
"pclose_bidirectional pid["
<<
pid
<<
"]"
<<
", ret["
<<
ret
<<
"] shell open fail"
;
}
if
(
wstatus
==
-
1
&&
errno
==
ECHILD
)
{
LOG
(
WARNING
)
<<
"errno is ECHILD"
;
}
}};
}
private:
afs
::
AfsFileSystem
*
_afshandler
;
std
::
mutex
g_flock
;
};
class
BoxWrapper
{
class
BoxWrapper
{
public:
public:
virtual
~
BoxWrapper
()
{}
virtual
~
BoxWrapper
()
{}
...
@@ -224,6 +428,14 @@ class BoxWrapper {
...
@@ -224,6 +428,14 @@ class BoxWrapper {
return
s_instance_
;
return
s_instance_
;
}
}
void
InitAfsAPI
(
const
std
::
string
&
fs_name
,
const
std
::
string
&
fs_ugi
,
const
std
::
string
&
conf_path
)
{
afs_manager
=
new
AfsManager
(
fs_name
,
fs_ugi
,
conf_path
);
use_afs_api_
=
true
;
}
bool
UseAfsApi
()
const
{
return
use_afs_api_
;
}
const
std
::
unordered_set
<
std
::
string
>&
GetOmitedSlot
()
const
{
const
std
::
unordered_set
<
std
::
string
>&
GetOmitedSlot
()
const
{
return
slot_name_omited_in_feedpass_
;
return
slot_name_omited_in_feedpass_
;
}
}
...
@@ -521,6 +733,10 @@ class BoxWrapper {
...
@@ -521,6 +733,10 @@ class BoxWrapper {
std
::
vector
<
std
::
string
>
metric_name_list_
;
std
::
vector
<
std
::
string
>
metric_name_list_
;
std
::
vector
<
int
>
slot_vector_
;
std
::
vector
<
int
>
slot_vector_
;
std
::
vector
<
LoDTensor
>
keys_tensor
;
// Cache for pull_sparse
std
::
vector
<
LoDTensor
>
keys_tensor
;
// Cache for pull_sparse
bool
use_afs_api_
=
false
;
public:
static
AfsManager
*
afs_manager
;
};
};
#endif
#endif
...
...
paddle/fluid/pybind/box_helper_py.cc
浏览文件 @
94a3789f
...
@@ -81,6 +81,8 @@ void BindBoxWrapper(py::module* m) {
...
@@ -81,6 +81,8 @@ void BindBoxWrapper(py::module* m) {
py
::
call_guard
<
py
::
gil_scoped_release
>
())
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"flip_pass_flag"
,
&
framework
::
BoxWrapper
::
FlipPassFlag
,
.
def
(
"flip_pass_flag"
,
&
framework
::
BoxWrapper
::
FlipPassFlag
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"init_afs_api"
,
&
framework
::
BoxWrapper
::
InitAfsAPI
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"finalize"
,
&
framework
::
BoxWrapper
::
Finalize
,
.
def
(
"finalize"
,
&
framework
::
BoxWrapper
::
Finalize
,
py
::
call_guard
<
py
::
gil_scoped_release
>
());
py
::
call_guard
<
py
::
gil_scoped_release
>
());
}
// end BoxWrapper
}
// end BoxWrapper
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
94a3789f
...
@@ -1488,11 +1488,13 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -1488,11 +1488,13 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"is_compiled_with_mkldnn"
,
IsCompiledWithMKLDNN
);
m
.
def
(
"is_compiled_with_mkldnn"
,
IsCompiledWithMKLDNN
);
m
.
def
(
"is_compiled_with_brpc"
,
IsCompiledWithBrpc
);
m
.
def
(
"is_compiled_with_brpc"
,
IsCompiledWithBrpc
);
m
.
def
(
"is_compiled_with_dist"
,
IsCompiledWithDIST
);
m
.
def
(
"is_compiled_with_dist"
,
IsCompiledWithDIST
);
m
.
def
(
"run_cmd"
,
[](
const
std
::
string
&
cmd
,
int
time_out
=
-
1
,
m
.
def
(
"run_cmd"
,
int
sleep_inter
=
-
1
)
->
const
std
::
string
{
[](
const
std
::
string
&
cmd
,
int
time_out
=
-
1
,
return
paddle
::
framework
::
shell_get_command_output
(
cmd
,
time_out
,
int
sleep_inter
=
-
1
)
->
const
std
::
string
{
sleep_inter
);
return
paddle
::
framework
::
shell_get_command_output
(
cmd
,
time_out
,
});
sleep_inter
);
},
py
::
arg
(
"cmd"
),
py
::
arg
(
"time_out"
)
=
-
1
,
py
::
arg
(
"sleep_inter"
)
=
-
1
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
CUDAPlace
&
place
)
->
bool
{
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
CUDAPlace
&
place
)
->
bool
{
// Only GPUs with Compute Capability >= 53 support float16
// Only GPUs with Compute Capability >= 53 support float16
...
...
python/paddle/fluid/tests/unittests/test_boxps.py
浏览文件 @
94a3789f
...
@@ -76,6 +76,16 @@ class TestTranspile(unittest.TestCase):
...
@@ -76,6 +76,16 @@ class TestTranspile(unittest.TestCase):
print
(
e
)
print
(
e
)
class
TestRunCmd
(
unittest
.
TestCase
):
""" TestCases for run_cmd"""
def
test_run_cmd
(
self
):
ret1
=
int
(
core
.
run_cmd
(
"ls; echo $?"
).
strip
().
split
(
'
\n
'
)[
-
1
])
ret2
=
int
(
core
.
run_cmd
(
"ls; echo $?"
,
-
1
,
-
1
).
strip
().
split
(
'
\n
'
)[
-
1
])
self
.
assertTrue
(
ret1
==
0
)
self
.
assertTrue
(
ret2
==
0
)
class
TestBoxPSPreload
(
unittest
.
TestCase
):
class
TestBoxPSPreload
(
unittest
.
TestCase
):
""" TestCases for BoxPS Preload """
""" TestCases for BoxPS Preload """
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录