Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
540c5dc0
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
540c5dc0
编写于
9月 04, 2019
作者:
R
rensilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
save_model_params_local
Change-Id: I65ba0979c822db14c45a9c9fd6b00bc54e630cf3
上级
76e8be34
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
100 addition
and
38 deletion
+100
-38
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
...rain/custom_trainer/feed/accessor/dense_input_accessor.cc
+51
-36
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
.../train/custom_trainer/feed/accessor/input_data_accessor.h
+11
-0
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
...ain/custom_trainer/feed/executor/multi_thread_executor.cc
+26
-1
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
...rain/custom_trainer/feed/executor/multi_thread_executor.h
+3
-0
paddle/fluid/train/custom_trainer/feed/io/file_system.h
paddle/fluid/train/custom_trainer/feed/io/file_system.h
+4
-0
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
...luid/train/custom_trainer/feed/process/learner_process.cc
+5
-1
未找到文件。
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
浏览文件 @
540c5dc0
...
...
@@ -70,6 +70,52 @@ int32_t DenseInputAccessor::pull_dense(size_t table_id) {
int32_t
DenseInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
collect_persistables
(
scope
);
if
(
_need_async_pull
)
{
++
_pull_request_num
;
}
return
0
;
}
int32_t
DenseInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
!
_need_gradient
)
{
return
0
;
}
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
grad_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
regions
.
emplace_back
(
grad_data
,
variable
.
dim
);
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
push_status
=
ps_client
->
push_dense
(
regions
.
data
(),
regions
.
size
(),
_table_id
);
//push_status.get();
if
(
!
FLAGS_feed_trainer_debug_dense_name
.
empty
())
{
std
::
stringstream
ssm
;
for
(
auto
&
variable
:
_x_variables
)
{
ssm
.
str
(
""
);
if
(
variable
.
name
!=
FLAGS_feed_trainer_debug_dense_name
)
{
continue
;
}
auto
&
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
Get
<
paddle
::
framework
::
LoDTensor
>
();
const
auto
*
var_data
=
tensor
.
data
<
float
>
();
for
(
size_t
data_idx
=
0
;
data_idx
<
variable
.
dim
;
++
data_idx
)
{
if
(
data_idx
>
0
)
ssm
<<
","
;
ssm
<<
var_data
[
data_idx
];
}
VLOG
(
2
)
<<
"[DEBUG]push_dense: "
<<
ssm
.
str
();
}
}
return
0
;
}
int32_t
DenseInputAccessor
::
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
)
{
// 首次同步pull,之后异步pull
if
(
_data_buffer
==
nullptr
)
{
_pull_mutex
.
lock
();
...
...
@@ -95,7 +141,9 @@ int32_t DenseInputAccessor::forward(SampleInstance* samples, size_t num,
paddle
::
framework
::
DDim
ddim
(
shape_ptr
,
variable
.
shape
.
size
());
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
name
,
ddim
);
auto
*
grad_tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
gradient_name
,
ddim
);
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
;
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
<<
", data_buffer: "
<<
_data_buffer
+
data_buffer_idx
<<
", dim: "
<<
variable
.
dim
*
sizeof
(
float
);
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
memcpy
(
var_data
,
_data_buffer
+
data_buffer_idx
,
variable
.
dim
*
sizeof
(
float
));
data_buffer_idx
+=
variable
.
dim
;
...
...
@@ -120,45 +168,12 @@ int32_t DenseInputAccessor::forward(SampleInstance* samples, size_t num,
VLOG
(
2
)
<<
"[DEBUG]pull_dense: "
<<
ssm
.
str
();
}
}
if
(
_need_async_pull
)
{
++
_pull_request_num
;
}
return
0
;
}
int32_t
DenseInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
!
_need_gradient
)
{
return
0
;
}
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
int32_t
DenseInputAccessor
::
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
)
{
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
grad_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
regions
.
emplace_back
(
grad_data
,
variable
.
dim
);
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
push_status
=
ps_client
->
push_dense
(
regions
.
data
(),
regions
.
size
(),
_table_id
);
//push_status.get();
if
(
!
FLAGS_feed_trainer_debug_dense_name
.
empty
())
{
std
::
stringstream
ssm
;
for
(
auto
&
variable
:
_x_variables
)
{
ssm
.
str
(
""
);
if
(
variable
.
name
!=
FLAGS_feed_trainer_debug_dense_name
)
{
continue
;
}
auto
&
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
Get
<
paddle
::
framework
::
LoDTensor
>
();
const
auto
*
var_data
=
tensor
.
data
<
float
>
();
for
(
size_t
data_idx
=
0
;
data_idx
<
variable
.
dim
;
++
data_idx
)
{
if
(
data_idx
>
0
)
ssm
<<
","
;
ssm
<<
var_data
[
data_idx
];
}
VLOG
(
2
)
<<
"[DEBUG]push_dense: "
<<
ssm
.
str
();
}
persistables
.
push_back
(
variable
.
name
);
}
return
0
;
}
...
...
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
浏览文件 @
540c5dc0
...
...
@@ -38,6 +38,12 @@ public:
// 后向,一般用于更新梯度,在训练网络执行后调用
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
// 收集持久化变量的名称, 并将值拷贝到Scope
virtual
int32_t
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
)
{
return
0
;}
// 填充持久化变量的值,用于保存
virtual
int32_t
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
)
{
return
0
;}
protected:
size_t
_table_id
=
0
;
bool
_need_gradient
=
false
;
...
...
@@ -144,6 +150,11 @@ public:
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
);
virtual
int32_t
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
);
protected:
virtual
int32_t
pull_dense
(
size_t
table_id
);
...
...
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
浏览文件 @
540c5dc0
...
...
@@ -52,6 +52,7 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
CHECK
(
_trainer_context
->
file_system
->
exists
(
model_config_path
))
<<
"miss model config file:"
<<
model_config_path
;
_model_config
=
YAML
::
LoadFile
(
model_config_path
);
_persistables
.
clear
();
for
(
const
auto
&
accessor_config
:
_model_config
[
"input_accessor"
])
{
auto
accessor_class
=
accessor_config
[
"class"
].
as
<
std
::
string
>
();
auto
*
accessor_ptr
=
CREATE_INSTANCE
(
DataInputAccessor
,
accessor_class
);
...
...
@@ -66,7 +67,10 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
_table_to_accessors
[
table_id
]
=
{
accessor_ptr
};
}
}
}
CHECK
(
accessor_ptr
->
collect_persistables_name
(
_persistables
)
==
0
)
<<
"collect_persistables Failed, class:"
<<
accessor_class
;
}
std
::
sort
(
_persistables
.
begin
(),
_persistables
.
end
());
// 持久化变量名一定要排序
// Monitor组件
for
(
const
auto
&
monitor_config
:
_model_config
[
"monitor"
])
{
...
...
@@ -79,6 +83,27 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
return
ret
;
}
int32_t
MultiThreadExecutor
::
save_persistables
(
const
std
::
string
&
filename
)
{
// auto fs = _trainer_context->file_system;
// fs->mkdir(fs->path_split(filename).first);
auto
scope_obj
=
_scope_obj_pool
->
get
();
for
(
size_t
i
=
0
;
i
<
_input_accessors
.
size
();
++
i
)
{
_input_accessors
[
i
]
->
collect_persistables
(
scope_obj
.
get
());
}
framework
::
ProgramDesc
prog
;
auto
*
block
=
prog
.
MutableBlock
(
0
);
auto
*
op
=
block
->
AppendOp
();
op
->
SetType
(
"save_combine"
);
op
->
SetInput
(
"X"
,
_persistables
);
op
->
SetAttr
(
"file_path"
,
filename
);
op
->
CheckAttrs
();
platform
::
CPUPlace
place
;
framework
::
Executor
exe
(
place
);
exe
.
Run
(
prog
,
scope_obj
.
get
(),
0
,
true
,
true
);
return
0
;
}
paddle
::
framework
::
Channel
<
DataItem
>
MultiThreadExecutor
::
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
)
{
...
...
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
浏览文件 @
540c5dc0
...
...
@@ -46,6 +46,8 @@ public:
//执行训练
virtual
paddle
::
framework
::
Channel
<
DataItem
>
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
);
virtual
int32_t
save_persistables
(
const
std
::
string
&
filename
);
virtual
bool
is_dump_all_model
()
{
return
_need_dump_all_model
;
...
...
@@ -79,6 +81,7 @@ protected:
std
::
vector
<
std
::
shared_ptr
<
DataInputAccessor
>>
_input_accessors
;
std
::
map
<
uint32_t
,
std
::
vector
<
DataInputAccessor
*>>
_table_to_accessors
;
std
::
shared_ptr
<
paddle
::
ps
::
ObjectPool
<::
paddle
::
framework
::
Scope
>>
_scope_obj_pool
;
std
::
vector
<
std
::
string
>
_persistables
;
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/io/file_system.h
浏览文件 @
540c5dc0
...
...
@@ -25,6 +25,10 @@ public:
virtual
bool
exists
(
const
std
::
string
&
path
)
=
0
;
virtual
void
mkdir
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
string
path_join
(
const
std
::
string
&
dir
,
const
std
::
string
&
path
);
template
<
class
...
STRS
>
std
::
string
path_join
(
const
std
::
string
&
dir
,
const
std
::
string
&
path
,
const
STRS
&
...
paths
)
{
return
path_join
(
path_join
(
dir
,
path
),
paths
...);
}
virtual
std
::
pair
<
std
::
string
,
std
::
string
>
path_split
(
const
std
::
string
&
path
);
protected:
};
...
...
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
浏览文件 @
540c5dc0
...
...
@@ -27,6 +27,7 @@ int LearnerProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
}
int
LearnerProcess
::
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
)
{
auto
fs
=
_context_ptr
->
file_system
;
auto
*
ps_client
=
_context_ptr
->
pslib
->
ps_client
();
auto
*
environment
=
_context_ptr
->
environment
.
get
();
auto
*
epoch_accessor
=
_context_ptr
->
epoch_accessor
.
get
();
...
...
@@ -39,18 +40,21 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
paddle
::
platform
::
Timer
timer
;
timer
.
Start
();
std
::
set
<
uint32_t
>
table_set
;
auto
model_dir
=
epoch_accessor
->
model_save_path
(
epoch_id
,
way
);
for
(
auto
&
executor
:
_executors
)
{
const
auto
&
table_accessors
=
executor
->
table_accessors
();
for
(
auto
&
itr
:
table_accessors
)
{
table_set
.
insert
(
itr
.
first
);
}
auto
save_path
=
fs
->
path_join
(
model_dir
,
executor
->
train_exe_name
()
+
"_param"
);
VLOG
(
2
)
<<
"Start save model, save_path:"
<<
save_path
;
executor
->
save_persistables
(
save_path
);
}
int
ret_size
=
0
;
auto
table_num
=
table_set
.
size
();
std
::
future
<
int
>
rets
[
table_num
];
for
(
auto
table_id
:
table_set
)
{
VLOG
(
2
)
<<
"Start save model, table_id:"
<<
table_id
;
auto
model_dir
=
epoch_accessor
->
model_save_path
(
epoch_id
,
way
);
rets
[
ret_size
++
]
=
ps_client
->
save
(
table_id
,
model_dir
,
std
::
to_string
((
int
)
way
));
}
int
all_ret
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录