Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
bd1c1724
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bd1c1724
编写于
12月 14, 2018
作者:
H
heqiaozhi
提交者:
dongdaxiang
12月 14, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ps_instance doc
上级
35ce6ac2
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
199 addition
and
219 deletion
+199
-219
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+8
-7
paddle/fluid/framework/async_executor.cc
paddle/fluid/framework/async_executor.cc
+54
-60
paddle/fluid/framework/executor_thread_worker.cc
paddle/fluid/framework/executor_thread_worker.cc
+79
-86
paddle/fluid/framework/executor_thread_worker.h
paddle/fluid/framework/executor_thread_worker.h
+58
-66
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
bd1c1724
#
windows treat symbolic file as a real file, which is different with unix
#
We create a hidden file and compile it instead of origin source file.
#windows treat symbolic file as a real file, which is different with unix
#We create a hidden file and compile it instead of origin source file.
function
(
windows_symbolic TARGET
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
...
...
@@ -11,7 +11,7 @@ function(windows_symbolic TARGET)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
# only copy the xx.cu to
.xx.cu when the content are modified
#only copy the xx.cu to
.xx.cu when the content are modified
set
(
copy_flag 1
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
)
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc SOURCE_STR
)
...
...
@@ -32,7 +32,7 @@ endfunction()
add_subdirectory
(
ir
)
add_subdirectory
(
details
)
#
ddim lib
#ddim lib
proto_library
(
framework_proto SRCS framework.proto
)
proto_library
(
async_executor_proto SRCS data_feed.proto
)
...
...
@@ -89,8 +89,8 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
if
(
WITH_GPU
)
if
(
WIN32
)
#
windows treat symbolic file as a real file, which is different with unix
#
We create a hidden file and compile it instead of origin source file.
#
windows treat symbolic file as a real file, which is different with unix
#
We create a hidden file and compile it instead of origin source file.
windows_symbolic
(
hidden_file SRCS data_type_transform.cu
)
nv_library
(
data_type_transform SRCS .data_type_transform.cu DEPS tensor
)
add_dependencies
(
data_type_transform hidden_file
)
...
...
@@ -137,7 +137,8 @@ cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
py_proto_compile
(
framework_py_proto SRCS framework.proto data_feed.proto
)
# Generate an empty __init__.py to make framework_py_proto as a valid python module.
#Generate an empty \
__init__.py to make framework_py_proto as a valid python module.
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
if
(
NOT WIN32
)
...
...
paddle/fluid/framework/async_executor.cc
浏览文件 @
bd1c1724
...
...
@@ -30,7 +30,7 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/pybind/pybind.h"
#ifdef PADDLE_WITH_PSLIB
#include
"pslib.h"
#include
<pslib.h>
#endif
namespace
paddle
{
...
...
@@ -70,8 +70,7 @@ void PrepareReaders(std::vector<std::shared_ptr<DataFeed>>& readers, // NOLINT
#ifdef PADDLE_WITH_PSLIB
void
AsyncExecutor
::
InitServer
(
const
std
::
string
&
dist_desc
,
int
index
)
{
_pslib_ptr
=
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
(
_pslib_ptr
=
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
(
new
paddle
::
distributed
::
PSlib
());
_pslib_ptr
->
init_server
(
dist_desc
,
index
);
InitParamConfig
();
...
...
@@ -82,38 +81,41 @@ void AsyncExecutor::InitWorker(const std::string& dist_desc,
int
node_num
,
int
index
)
{
_pslib_ptr
=
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
(
new
paddle
::
distributed
::
PSlib
());
_pslib_ptr
->
init_worker
(
dist_desc
,
(
uint64_t
*
)(
host_sign_list
.
data
()),
node_num
,
index
);
_pslib_ptr
->
init_worker
(
dist_desc
,
static_cast
<
uint64_t
*>
(
host_sign_list
.
data
()),
node_num
,
index
);
InitParamConfig
();
}
uint64_t
AsyncExecutor
::
StartServer
()
{
return
_pslib_ptr
->
run_server
();
}
uint64_t
AsyncExecutor
::
StartServer
()
{
return
_pslib_ptr
->
run_server
();
}
void
AsyncExecutor
::
StopServer
()
{
_pslib_ptr
->
stop_server
();
}
void
AsyncExecutor
::
StopServer
()
{
_pslib_ptr
->
stop_server
();
}
void
AsyncExecutor
::
GatherServers
(
const
std
::
vector
<
uint64_t
>&
host_sign_list
,
int
node_num
)
{
_pslib_ptr
->
gather_servers
((
uint64_t
*
)(
host_sign_list
.
data
()),
node_num
);
void
AsyncExecutor
::
GatherServers
(
const
std
::
vector
<
uint64_t
>&
host_sign_list
,
int
node_num
)
{
_pslib_ptr
->
gather_servers
(
static_cast
<
uint64_t
*>
(
host_sign_list
.
data
()),
node_num
);
}
void
AsyncExecutor
::
InitParamConfig
()
{
for
(
int
i
=
0
;
i
<
_pslib_ptr
->
get_param
()
->
server_param
().
\
downpour_server_param
().
\
downpour_table_param_size
();
for
(
int
i
=
0
;
i
<
_pslib_ptr
->
get_param
()
->
server_param
()
.
downpour_server_param
()
.
downpour_table_param_size
();
++
i
)
{
if
(
_pslib_ptr
->
get_param
()
->
server_param
().
\
downpour_server_param
().
downpour_table_param
(
i
).
\
table_class
().
find
(
"SparseTable"
)
!=
-
1
)
{
_param_config
.
fea_dim
=
_pslib_ptr
->
get_param
()
->
server_param
().
\
downpour_server_param
().
\
downpour_table_param
(
i
).
\
accessor
().
fea_dim
();
if
(
_pslib_ptr
->
get_param
()
->
server_param
()
.
downpour_server_param
()
.
downpour_table_param
(
i
)
.
table_class
()
.
find
(
"SparseTable"
)
!=
-
1
)
{
_param_config
.
fea_dim
=
_pslib_ptr
->
get_param
()
->
server_param
()
.
downpour_server_param
()
.
downpour_table_param
(
i
)
.
accessor
()
.
fea_dim
();
break
;
}
}
...
...
@@ -123,27 +125,23 @@ void AsyncExecutor::InitParamConfig() {
_param_config
.
tmp_push_sparse_wait_times
=
static_cast
<
int32_t
>
(
_pslib_ptr
->
get_param
()
->
trainer_param
().
push_sparse_per_batch
());
for
(
auto
t
=
0u
;
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
skip_op_size
();
for
(
auto
t
=
0u
;
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
skip_op_size
();
++
t
)
{
_param_config
.
skip_op
.
push_back
(
_pslib_ptr
->
get_param
()
->
trainer_param
().
skip_op
(
t
));
}
for
(
auto
t
=
0u
;
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
sparse_table_size
();
++
t
)
{
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
sparse_table_size
();
++
t
)
{
auto
&
table
=
_pslib_ptr
->
get_param
()
->
trainer_param
().
sparse_table
(
t
);
std
::
vector
<
std
::
string
>
tmp_sparse_variable_name
;
for
(
int
i
=
0u
;
i
<
table
.
slot_value_size
();
++
i
)
{
tmp_sparse_variable_name
.
push_back
(
table
.
slot_value
(
i
));
_param_config
.
slot_alias_to_table
[
table
.
slot_key
(
i
)]
=
table
.
table_id
();
_param_config
.
slot_alias_to_table
[
table
.
slot_key
(
i
)]
=
table
.
table_id
();
}
std
::
vector
<
std
::
string
>
tmp_sparse_gradient_variable_name
;
for
(
auto
i
=
0u
;
i
<
table
.
slot_gradient_size
();
++
i
)
{
tmp_sparse_gradient_variable_name
.
push_back
(
table
.
slot_gradient
(
i
));
tmp_sparse_gradient_variable_name
.
push_back
(
table
.
slot_gradient
(
i
));
}
_param_config
.
slot_input_vec
[
table
.
table_id
()]
=
std
::
move
(
tmp_sparse_variable_name
);
...
...
@@ -153,8 +151,7 @@ void AsyncExecutor::InitParamConfig() {
}
for
(
auto
t
=
0u
;
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
dense_table_size
();
++
t
)
{
t
<
_pslib_ptr
->
get_param
()
->
trainer_param
().
dense_table_size
();
++
t
)
{
auto
&
table
=
_pslib_ptr
->
get_param
()
->
trainer_param
().
dense_table
(
t
);
std
::
vector
<
std
::
string
>
tmp_dense_variable_name
;
for
(
int
i
=
0u
;
i
<
table
.
dense_variable_name_size
();
++
i
)
{
...
...
@@ -198,8 +195,7 @@ void AsyncExecutor::InitModel() {
regions
.
emplace_back
(
std
::
move
(
reg
));
}
auto
push_status
=
_pslib_ptr
->
_worker_ptr
->
push_dense_param
(
auto
push_status
=
_pslib_ptr
->
_worker_ptr
->
push_dense_param
(
regions
.
data
(),
regions
.
size
(),
table_id
);
push_status
.
wait
();
auto
status
=
push_status
.
get
();
...
...
@@ -225,14 +221,14 @@ void AsyncExecutor::SaveModel(const std::string& path) {
void
AsyncExecutor
::
PrepareDenseThread
(
const
std
::
string
&
mode
)
{
if
(
mode
==
"mpi"
)
{
DensePullThreadParam
param
;
param
.
ps_client
=
_pslib_ptr
->
_worker_ptr
;
;
param
.
ps_client
=
_pslib_ptr
->
_worker_ptr
;
param
.
threshold
=
1
;
param
.
training_thread_num
=
actual_thread_num
;
param
.
root_scope
=
root_scope_
;
param
.
dense_params
=
&
_param_config
.
dense_variable_name
;
_pull_dense_thread
=
std
::
shared_ptr
<
DensePullThread
>
(
new
DensePullThread
(
param
));
_pull_dense_thread
=
std
::
shared_ptr
<
DensePullThread
>
(
new
DensePullThread
(
param
));
_pull_dense_thread
->
start
();
}
}
...
...
@@ -243,8 +239,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
const
std
::
vector
<
std
::
string
>&
filelist
,
const
int
thread_num
,
const
std
::
vector
<
std
::
string
>&
fetch_var_names
,
const
std
::
string
&
mode
,
const
bool
debug
)
{
const
std
::
string
&
mode
,
const
bool
debug
)
{
std
::
vector
<
std
::
thread
>
threads
;
auto
&
block
=
main_program
.
Block
(
0
);
...
...
@@ -308,7 +303,6 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
fetch_var_names
,
root_scope_
,
thidx
,
debug
);
}
// start executing ops in multiple threads
for
(
int
thidx
=
0
;
thidx
<
actual_thread_num
;
++
thidx
)
{
threads
.
push_back
(
...
...
paddle/fluid/framework/executor_thread_worker.cc
浏览文件 @
bd1c1724
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/executor_thread_worker.h"
#include <algorithm>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
...
...
@@ -95,8 +96,7 @@ void DensePullThread::wait_all() {
t
.
wait
();
auto
status
=
t
.
get
();
if
(
status
!=
0
)
{
LOG
(
WARNING
)
<<
"pull dense failed times:"
<<
++
_pull_dense_fail_times
;
LOG
(
WARNING
)
<<
"pull dense failed times:"
<<
++
_pull_dense_fail_times
;
}
}
...
...
@@ -108,8 +108,8 @@ void DensePullThread::wait_all() {
_pull_dense_status
.
resize
(
0
);
}
void
DensePullThread
::
increase_thread_version
(
int
thread_id
,
uint64_t
table_id
)
{
void
DensePullThread
::
increase_thread_version
(
int
thread_id
,
uint64_t
table_id
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
_mutex_for_version
);
_training_versions
[
table_id
][
thread_id
]
++
;
}
...
...
@@ -174,7 +174,6 @@ void ExecutorThreadWorker::SetFetchVarNames(
fetch_var_names
.
end
());
}
void
ExecutorThreadWorker
::
SetDevice
()
{
#if defined _WIN32 || defined __APPLE__
return
;
...
...
@@ -351,8 +350,7 @@ void AsyncExecutorThreadWorker::TrainOneNetwork() {
}
bool
need_skip
=
false
;
for
(
auto
t
=
0u
;
t
<
_param_config
->
skip_op
.
size
();
++
t
)
{
if
(
op
->
Type
().
find
(
_param_config
->
skip_op
[
t
])
!=
std
::
string
::
npos
)
{
if
(
op
->
Type
().
find
(
_param_config
->
skip_op
[
t
])
!=
std
::
string
::
npos
)
{
need_skip
=
true
;
break
;
}
...
...
@@ -437,13 +435,12 @@ void AsyncExecutorThreadWorker::PushDense(int table_id) {
regions
.
emplace_back
(
std
::
move
(
reg
));
}
auto
status
=
_pslib_ptr
->
_worker_ptr
->
push_dense
(
regions
.
data
(),
regions
.
size
(),
table_id
);
auto
status
=
_pslib_ptr
->
_worker_ptr
->
push_dense
(
regions
.
data
(),
regions
.
size
(),
table_id
);
_push_dense_status
.
push_back
(
std
::
move
(
status
));
}
void
AsyncExecutorThreadWorker
::
PullSparse
(
int
table_id
)
{
auto
&
features
=
_features
[
table_id
];
auto
&
feature_value
=
_feature_value
[
table_id
];
auto
fea_dim
=
_param_config
->
fea_dim
;
...
...
@@ -451,8 +448,7 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) {
features
.
clear
();
features
.
resize
(
0
);
features
.
reserve
(
MAX_FEASIGN_NUM
);
const
std
::
vector
<
std
::
string
>&
feed_vec
=
thread_reader_
->
GetUseSlotAlias
();
const
std
::
vector
<
std
::
string
>&
feed_vec
=
thread_reader_
->
GetUseSlotAlias
();
// slot_idx = 0 is label TODO
for
(
auto
slot_idx
=
1u
;
slot_idx
<
feed_vec
.
size
();
++
slot_idx
)
{
Variable
*
var
=
thread_scope_
->
FindVar
(
feed_vec
[
slot_idx
]);
...
...
@@ -468,7 +464,7 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) {
features
.
push_back
(
static_cast
<
uint64_t
>
(
ids
[
i
]));
}
}
check_pull_push_memory
(
features
,
feature_value
,
fea_dim
);
check_pull_push_memory
(
features
,
&
feature_value
,
fea_dim
);
std
::
vector
<
float
*>
pull_feature_value
;
for
(
auto
i
=
0u
;
i
<
features
.
size
();
++
i
)
{
...
...
@@ -480,7 +476,7 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) {
_pull_sparse_status
.
push_back
(
std
::
move
(
status
));
auto
&
push_g
=
_feature_push_value
[
table_id
];
check_pull_push_memory
(
features
,
push_g
,
fea_dim
);
check_pull_push_memory
(
features
,
&
push_g
,
fea_dim
);
collect_feasign_info
(
table_id
);
}
...
...
@@ -497,8 +493,7 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) {
std
::
vector
<
float
>
init_value
(
fea_dim
);
const
std
::
vector
<
std
::
string
>&
feed_vec
=
thread_reader_
->
GetUseSlotAlias
();
const
std
::
vector
<
std
::
string
>&
feed_vec
=
thread_reader_
->
GetUseSlotAlias
();
// slot_idx = 0 is label TODO
for
(
auto
slot_idx
=
1u
;
slot_idx
<
feed_vec
.
size
();
++
slot_idx
)
{
Variable
*
var
=
thread_scope_
->
FindVar
(
feed_vec
[
slot_idx
]);
...
...
@@ -508,8 +503,8 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) {
Variable
*
var_emb
=
thread_scope_
->
FindVar
(
_param_config
->
slot_input_vec
[
table_id
][
slot_idx
-
1
]);
LoDTensor
*
tensor_emb
=
var_emb
->
GetMutable
<
LoDTensor
>
();
float
*
ptr
=
tensor_emb
->
mutable_data
<
float
>
(
{
len
,
slot_dim
},
platform
::
CPUPlace
());
float
*
ptr
=
tensor_emb
->
mutable_data
<
float
>
(
{
len
,
slot_dim
},
platform
::
CPUPlace
());
memset
(
ptr
,
0
,
sizeof
(
float
)
*
len
*
slot_dim
);
auto
&
tensor_lod
=
tensor
->
lod
()[
0
];
...
...
@@ -518,12 +513,12 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) {
for
(
auto
index
=
0u
;
index
<
len
;
++
index
)
{
if
(
ids
[
index
]
==
0u
)
{
memcpy
(
ptr
+
slot_dim
*
index
,
init_value
.
data
()
+
2
,
sizeof
(
float
)
*
slot_dim
);
memcpy
(
ptr
+
slot_dim
*
index
,
init_value
.
data
()
+
2
,
sizeof
(
float
)
*
slot_dim
);
continue
;
}
memcpy
(
ptr
+
slot_dim
*
index
,
fea_value
[
fea_idx
].
data
()
+
2
,
sizeof
(
float
)
*
slot_dim
);
memcpy
(
ptr
+
slot_dim
*
index
,
fea_value
[
fea_idx
].
data
()
+
2
,
sizeof
(
float
)
*
slot_dim
);
fea_idx
++
;
}
}
...
...
@@ -534,31 +529,34 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) {
auto
fea_dim
=
_param_config
->
fea_dim
;
auto
&
features
=
_features
[
table_id
];
auto
&
push_g
=
_feature_push_value
[
table_id
];
check_pull_push_memory
(
features
,
push_g
,
fea_dim
);
CHECK
(
push_g
.
size
()
==
features
.
size
()
+
1
)
<<
"push_g size:"
<<
push_g
.
size
()
<<
" features size:"
<<
features
.
size
();
check_pull_push_memory
(
features
,
&
push_g
,
fea_dim
);
CHECK
(
push_g
.
size
()
==
features
.
size
()
+
1
)
<<
"push_g size:"
<<
push_g
.
size
()
<<
" features size:"
<<
features
.
size
();
uint64_t
fea_idx
=
0u
;
auto
&
fea_info
=
_fea_info
[
table_id
];
int
offset
=
2
;
const
std
::
vector
<
std
::
string
>&
feed_vec
=
thread_reader_
->
GetUseSlotAlias
();
// slot_idx = 0 is label
for
(
auto
slot_idx
=
1u
;
slot_idx
<
feed_vec
.
size
();
++
slot_idx
)
{
if
(
_param_config
->
slot_alias_to_table
.
find
(
feed_vec
[
slot_idx
])
==
_param_config
->
slot_alias_to_table
.
end
())
{
LOG
(
ERROR
)
<<
"ERROR slot_idx:"
<<
slot_idx
<<
" name:"
<<
feed_vec
[
slot_idx
];
}
else
if
(
_param_config
->
slot_alias_to_table
[
feed_vec
[
slot_idx
]]
!=
table_id
)
{
if
(
_param_config
->
slot_alias_to_table
.
find
(
feed_vec
[
slot_idx
])
==
_param_config
->
slot_alias_to_table
.
end
())
{
LOG
(
ERROR
)
<<
"ERROR slot_idx:"
<<
slot_idx
<<
" name:"
<<
feed_vec
[
slot_idx
];
}
else
if
(
_param_config
->
slot_alias_to_table
[
feed_vec
[
slot_idx
]]
!=
table_id
)
{
continue
;
}
Variable
*
g_var
=
thread_scope_
->
FindVar
(
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]);
CHECK
(
g_var
!=
nullptr
)
<<
"var["
<<
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]
<<
"] not found"
;
CHECK
(
g_var
!=
nullptr
)
<<
"var["
<<
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]
<<
"] not found"
;
LoDTensor
*
g_tensor
=
g_var
->
GetMutable
<
LoDTensor
>
();
if
(
g_tensor
==
NULL
)
{
LOG
(
ERROR
)
<<
"var["
<<
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]
<<
"] not found"
;
LOG
(
ERROR
)
<<
"var["
<<
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]
<<
"] not found"
;
exit
(
-
1
);
}
float
*
g
=
g_tensor
->
data
<
float
>
();
...
...
@@ -571,28 +569,27 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) {
exit
(
-
1
);
}
int
len
=
tensor
->
numel
();
CHECK
(
slot_dim
*
len
==
g_tensor
->
numel
())
<<
"len:"
<<
len
<<
" g_numel:"
<<
g_tensor
->
numel
();
CHECK
(
len
==
tensor
->
numel
())
<<
"len:"
<<
len
<<
"t_numel:"
<<
tensor
->
numel
();
CHECK
(
slot_dim
*
len
==
g_tensor
->
numel
())
<<
"len:"
<<
len
<<
" g_numel:"
<<
g_tensor
->
numel
();
CHECK
(
len
==
tensor
->
numel
())
<<
"len:"
<<
len
<<
"t_numel:"
<<
tensor
->
numel
();
int64_t
*
ids
=
tensor
->
data
<
int64_t
>
();
for
(
auto
id_idx
=
0u
;
id_idx
<
len
;
++
id_idx
)
{
if
(
ids
[
id_idx
]
==
0
)
{
g
+=
slot_dim
;
continue
;
}
memcpy
(
push_g
[
fea_idx
].
data
()
+
offset
,
g
,
sizeof
(
float
)
*
slot_dim
);
memcpy
(
push_g
[
fea_idx
].
data
()
+
offset
,
g
,
sizeof
(
float
)
*
slot_dim
);
push_g
[
fea_idx
][
0
]
=
1.0
f
;
CHECK
(
fea_idx
<
fea_info
.
size
())
<<
"fea_idx:"
<<
fea_idx
<<
" size:"
<<
fea_info
.
size
();
CHECK
(
fea_idx
<
fea_info
.
size
())
<<
"fea_idx:"
<<
fea_idx
<<
" size:"
<<
fea_info
.
size
();
push_g
[
fea_idx
][
1
]
=
static_cast
<
float
>
(
fea_info
[
fea_idx
].
label
);
g
+=
slot_dim
;
fea_idx
++
;
}
}
CHECK
(
fea_idx
==
features
.
size
())
<<
"fea_idx:"
<<
fea_idx
<<
" features size:"
<<
features
.
size
();
CHECK
(
fea_idx
==
features
.
size
())
<<
"fea_idx:"
<<
fea_idx
<<
" features size:"
<<
features
.
size
();
CHECK_GT
(
features
.
size
(),
0
);
std
::
vector
<
float
*>
push_g_vec
;
...
...
@@ -600,13 +597,12 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) {
push_g_vec
.
push_back
(
push_g
[
i
].
data
());
}
auto
status
=
_pslib_ptr
->
_worker_ptr
->
push_sparse
(
table_id
,
features
.
data
(),
(
const
float
**
)
push_g_vec
.
data
(),
features
.
size
());
table_id
,
features
.
data
(),
(
const
float
**
)
push_g_vec
.
data
(),
features
.
size
());
_push_sparse_status
.
push_back
(
std
::
move
(
status
));
}
void
AsyncExecutorThreadWorker
::
collect_feasign_info
(
int
table_id
)
{
void
AsyncExecutorThreadWorker
::
collect_feasign_info
(
int
table_id
)
{
auto
&
fea_info
=
_fea_info
[
table_id
];
auto
&
feature
=
_features
[
table_id
];
fea_info
.
resize
(
feature
.
size
());
...
...
@@ -633,31 +629,28 @@ void AsyncExecutorThreadWorker::collect_feasign_info(
}
}
}
CHECK
(
global_index
==
feature
.
size
())
<<
"expect fea info size:"
<<
feature
.
size
()
<<
" real:"
<<
global_index
;
CHECK
(
global_index
==
feature
.
size
())
<<
"expect fea info size:"
<<
feature
.
size
()
<<
" real:"
<<
global_index
;
}
void
AsyncExecutorThreadWorker
::
check_pull_push_memory
(
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
std
::
vector
<
float
>>&
push_g
,
int
dim
)
{
push_g
.
resize
(
features
.
size
()
+
1
);
for
(
auto
&
t
:
push_g
)
{
std
::
vector
<
std
::
vector
<
float
>>*
push_g
,
int
dim
)
{
push_g
->
resize
(
features
.
size
()
+
1
);
for
(
auto
&
t
:
*
push_g
)
{
t
.
resize
(
dim
);
}
}
void
AsyncExecutorThreadWorker
::
check_pull_push_memory
(
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
float
*>&
push_g
,
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
float
*>*
push_g
,
int
dim
)
{
if
(
features
.
size
()
>
push_g
.
size
())
{
push_g
.
reserve
(
features
.
size
()
+
1
);
auto
size
=
features
.
size
()
-
push_g
.
size
()
+
1
;
if
(
features
.
size
()
>
push_g
->
size
())
{
push_g
->
reserve
(
features
.
size
()
+
1
);
auto
size
=
features
.
size
()
-
push_g
->
size
()
+
1
;
for
(
auto
i
=
0u
;
i
<
size
;
++
i
)
{
float
*
ptr
=
new
float
[
dim
];
push_g
.
push_back
(
ptr
);
push_g
->
push_back
(
ptr
);
}
}
}
...
...
paddle/fluid/framework/executor_thread_worker.h
浏览文件 @
bd1c1724
...
...
@@ -26,7 +26,7 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_PSLIB
#include
"pslib.h"
#include
<pslib.h>
#endif
namespace
paddle
{
...
...
@@ -34,7 +34,7 @@ namespace framework {
void
CreateTensor
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
);
#ifdef PADDLE_WITH_PSLIB
const
static
uint32_t
MAX_FEASIGN_NUM
=
1000
*
100
*
100
;
static
const
uint32_t
MAX_FEASIGN_NUM
=
1000
*
100
*
100
;
struct
AsyncWorkerParamConfig
{
int
slot_dim
;
...
...
@@ -66,8 +66,8 @@ struct DensePullThreadParam {
class
DensePullThread
{
public:
explicit
DensePullThread
(
const
DensePullThreadParam
&
param
)
:
_running
(
false
)
{
explicit
DensePullThread
(
const
DensePullThreadParam
&
param
)
:
_running
(
false
)
{
_ps_client
=
param
.
ps_client
;
_threshold
=
param
.
threshold
;
_thread_num
=
param
.
training_thread_num
;
...
...
@@ -75,8 +75,7 @@ class DensePullThread {
_sleep_time_ms
=
param
.
sleep_time_ms
;
for
(
auto
&
t
:
*
param
.
dense_params
)
{
_dense_variable_name
[
t
.
first
].
insert
(
_dense_variable_name
[
t
.
first
].
end
(),
_dense_variable_name
[
t
.
first
].
insert
(
_dense_variable_name
[
t
.
first
].
end
(),
t
.
second
.
begin
(),
t
.
second
.
end
());
_training_versions
[
t
.
first
].
resize
(
_thread_num
,
0
);
_last_versions
[
t
.
first
]
=
0
;
...
...
@@ -136,7 +135,7 @@ class DensePullThread {
class
ExecutorThreadWorker
{
public:
ExecutorThreadWorker
()
ExecutorThreadWorker
()
:
thread_id_
(
-
1
),
root_scope_
(
NULL
),
thread_scope_
(
NULL
),
debug_
(
false
)
{}
virtual
~
ExecutorThreadWorker
()
{}
...
...
@@ -161,10 +160,8 @@ ExecutorThreadWorker()
#ifdef PADDLE_WITH_PSLIB
virtual
void
SetPSlibPtr
(
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
pslib_ptr
)
{}
virtual
void
SetPullDenseThread
(
std
::
shared_ptr
<
DensePullThread
>
dpt
)
{}
virtual
void
SetParamConfig
(
AsyncWorkerParamConfig
*
param_config
)
{}
virtual
void
SetPullDenseThread
(
std
::
shared_ptr
<
DensePullThread
>
dpt
)
{}
virtual
void
SetParamConfig
(
AsyncWorkerParamConfig
*
param_config
)
{}
#endif
private:
...
...
@@ -195,7 +192,7 @@ ExecutorThreadWorker()
};
#ifdef PADDLE_WITH_PSLIB
class
AsyncExecutorThreadWorker
:
public
ExecutorThreadWorker
{
class
AsyncExecutorThreadWorker
:
public
ExecutorThreadWorker
{
public:
AsyncExecutorThreadWorker
()
{}
virtual
~
AsyncExecutorThreadWorker
()
{}
...
...
@@ -211,13 +208,10 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker {
void
PushSparse
(
int
table_id
);
void
PushDense
(
int
table_id
);
void
check_pull_push_memory
(
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
float
*>&
push_g
,
int
dim
);
void
check_pull_push_memory
(
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
std
::
vector
<
float
>>&
push_g
,
int
dim
);
std
::
vector
<
float
*>*
push_g
,
int
dim
);
void
check_pull_push_memory
(
const
std
::
vector
<
uint64_t
>&
features
,
std
::
vector
<
std
::
vector
<
float
>>*
push_g
,
int
dim
);
void
collect_feasign_info
(
int
table_id
);
private:
...
...
@@ -232,7 +226,6 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker {
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
float
>>>
_feature_value
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
float
>>>
_feature_push_value
;
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
_pslib_ptr
;
std
::
shared_ptr
<
DensePullThread
>
_pull_dense_thread
;
...
...
@@ -243,7 +236,6 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker {
std
::
vector
<::
std
::
future
<
int32_t
>>
_push_dense_status
;
AsyncWorkerParamConfig
*
_param_config
;
};
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录