Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
aaea8a39
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aaea8a39
编写于
9月 09, 2019
作者:
X
xiexionghang
浏览文件
操作
浏览文件
下载
差异文件
merge master
上级
6efc30f5
5beaf463
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
84 addition
and
11 deletion
+84
-11
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
...rain/custom_trainer/feed/accessor/dense_input_accessor.cc
+20
-5
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
.../train/custom_trainer/feed/accessor/input_data_accessor.h
+11
-0
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
+2
-0
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.cc
...id/train/custom_trainer/feed/dataset/dataset_container.cc
+1
-1
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
...ain/custom_trainer/feed/executor/multi_thread_executor.cc
+28
-1
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
...rain/custom_trainer/feed/executor/multi_thread_executor.h
+3
-0
paddle/fluid/train/custom_trainer/feed/io/file_system.h
paddle/fluid/train/custom_trainer/feed/io/file_system.h
+4
-0
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
...luid/train/custom_trainer/feed/process/learner_process.cc
+5
-1
paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
...luid/train/custom_trainer/feed/scripts/create_programs.py
+8
-0
paddle/fluid/train/custom_trainer/feed/shuffler/shuffler.cc
paddle/fluid/train/custom_trainer/feed/shuffler/shuffler.cc
+2
-3
未找到文件。
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
浏览文件 @
aaea8a39
...
...
@@ -69,6 +69,15 @@ int32_t DenseInputAccessor::pull_dense(size_t table_id) {
int32_t
DenseInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
collect_persistables
(
scope
);
if
(
_need_async_pull
)
{
++
_pull_request_num
;
}
return
0
;
}
int32_t
DenseInputAccessor
::
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
)
{
// 首次同步pull,之后异步pull
if
(
_data_buffer
==
nullptr
)
{
_pull_mutex
.
lock
();
...
...
@@ -94,7 +103,9 @@ int32_t DenseInputAccessor::forward(SampleInstance* samples, size_t num,
paddle
::
framework
::
DDim
ddim
(
shape_ptr
,
variable
.
shape
.
size
());
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
name
,
ddim
);
auto
*
grad_tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
gradient_name
,
ddim
);
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
;
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
<<
", data_buffer: "
<<
_data_buffer
+
data_buffer_idx
<<
", dim: "
<<
variable
.
dim
*
sizeof
(
float
);
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
memcpy
(
var_data
,
_data_buffer
+
data_buffer_idx
,
variable
.
dim
*
sizeof
(
float
));
data_buffer_idx
+=
variable
.
dim
;
...
...
@@ -107,8 +118,12 @@ int32_t DenseInputAccessor::forward(SampleInstance* samples, size_t num,
VLOG
(
2
)
<<
"[Debug][PullDense]"
<<
ScopeHelper
::
to_string
(
scope
,
variable
.
name
);
}
}
if
(
_need_async_pull
)
{
++
_pull_request_num
;
return
0
;
}
int32_t
DenseInputAccessor
::
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
)
{
for
(
auto
&
variable
:
_x_variables
)
{
persistables
.
push_back
(
variable
.
name
);
}
return
0
;
}
...
...
@@ -117,12 +132,12 @@ int32_t DenseInputAccessor::backward(SampleInstance* samples, size_t num,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
!
_need_gradient
)
{
return
0
;
}
}
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
grad_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
regions
.
emplace_back
(
grad_data
,
variable
.
dim
);
}
...
...
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
浏览文件 @
aaea8a39
...
...
@@ -38,6 +38,12 @@ public:
// 后向,一般用于更新梯度,在训练网络执行后调用
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
// 收集持久化变量的名称, 并将值拷贝到Scope
virtual
int32_t
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
)
{
return
0
;}
// 填充持久化变量的值,用于保存
virtual
int32_t
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
)
{
return
0
;}
protected:
size_t
_table_id
=
0
;
bool
_need_gradient
=
false
;
...
...
@@ -144,6 +150,11 @@ public:
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
collect_persistables_name
(
std
::
vector
<
std
::
string
>&
persistables
);
virtual
int32_t
collect_persistables
(
paddle
::
framework
::
Scope
*
scope
);
protected:
virtual
int32_t
pull_dense
(
size_t
table_id
);
...
...
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
浏览文件 @
aaea8a39
...
...
@@ -30,6 +30,8 @@ dataset:
pipeline_cmd
:
'
./tool/ins_weight.py
|
awk
-f
./tool/format_newcate_hotnews.awk'
parser
:
class
:
AbacusTextDataParser
shuffler
:
class
:
LocalShuffler
epoch
:
epoch_class
:
TimelyEpochAccessor
...
...
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.cc
浏览文件 @
aaea8a39
...
...
@@ -31,7 +31,7 @@ int DatasetContainer::initialize(
_data_root_paths
=
config
[
"root_path"
].
as
<
std
::
vector
<
std
::
string
>>
();
_data_split_interval
=
config
[
"data_spit_interval"
].
as
<
int
>
();
_data_path_formater
=
config
[
"data_path_formater"
].
as
<
std
::
string
>
();
std
::
string
shuffler
=
config
[
"shuffler"
][
"
name
"
].
as
<
std
::
string
>
();
std
::
string
shuffler
=
config
[
"shuffler"
][
"
class
"
].
as
<
std
::
string
>
();
_shuffler
.
reset
(
CREATE_INSTANCE
(
Shuffler
,
shuffler
));
_shuffler
->
initialize
(
config
,
context
);
std
::
string
data_reader_class
=
config
[
"data_reader"
].
as
<
std
::
string
>
();
...
...
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
浏览文件 @
aaea8a39
...
...
@@ -2,6 +2,8 @@
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
#include "paddle/fluid/train/custom_trainer/feed/monitor/monitor.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/program_desc.h"
namespace
paddle
{
namespace
custom_trainer
{
...
...
@@ -55,6 +57,7 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
CHECK
(
_trainer_context
->
file_system
->
exists
(
model_config_path
))
<<
"miss model config file:"
<<
model_config_path
;
_model_config
=
YAML
::
LoadFile
(
model_config_path
);
_persistables
.
clear
();
for
(
const
auto
&
accessor_config
:
_model_config
[
"input_accessor"
])
{
auto
accessor_class
=
accessor_config
[
"class"
].
as
<
std
::
string
>
();
auto
*
accessor_ptr
=
CREATE_INSTANCE
(
DataInputAccessor
,
accessor_class
);
...
...
@@ -69,7 +72,10 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
_table_to_accessors
[
table_id
]
=
{
accessor_ptr
};
}
}
}
CHECK
(
accessor_ptr
->
collect_persistables_name
(
_persistables
)
==
0
)
<<
"collect_persistables Failed, class:"
<<
accessor_class
;
}
// std::sort(_persistables.begin(), _persistables.end()); // 持久化变量名一定要排序
// Monitor组件
for
(
const
auto
&
monitor_config
:
_model_config
[
"monitor"
])
{
...
...
@@ -82,6 +88,27 @@ int MultiThreadExecutor::initialize(YAML::Node exe_config,
return
ret
;
}
int32_t
MultiThreadExecutor
::
save_persistables
(
const
std
::
string
&
filename
)
{
// auto fs = _trainer_context->file_system;
// fs->mkdir(fs->path_split(filename).first);
auto
scope_obj
=
_scope_obj_pool
->
get
();
for
(
size_t
i
=
0
;
i
<
_input_accessors
.
size
();
++
i
)
{
_input_accessors
[
i
]
->
collect_persistables
(
scope_obj
.
get
());
}
framework
::
ProgramDesc
prog
;
auto
*
block
=
prog
.
MutableBlock
(
0
);
auto
*
op
=
block
->
AppendOp
();
op
->
SetType
(
"save_combine"
);
op
->
SetInput
(
"X"
,
_persistables
);
op
->
SetAttr
(
"file_path"
,
filename
);
op
->
CheckAttrs
();
platform
::
CPUPlace
place
;
framework
::
Executor
exe
(
place
);
exe
.
Run
(
prog
,
scope_obj
.
get
(),
0
,
true
,
true
);
return
0
;
}
paddle
::
framework
::
Channel
<
DataItem
>
MultiThreadExecutor
::
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
)
{
...
...
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
浏览文件 @
aaea8a39
...
...
@@ -46,6 +46,8 @@ public:
//执行训练
virtual
paddle
::
framework
::
Channel
<
DataItem
>
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
);
virtual
int32_t
save_persistables
(
const
std
::
string
&
filename
);
virtual
bool
is_dump_all_model
()
{
return
_need_dump_all_model
;
...
...
@@ -80,6 +82,7 @@ protected:
std
::
vector
<
std
::
shared_ptr
<
DataInputAccessor
>>
_input_accessors
;
std
::
map
<
uint32_t
,
std
::
vector
<
DataInputAccessor
*>>
_table_to_accessors
;
std
::
shared_ptr
<
paddle
::
ps
::
ObjectPool
<::
paddle
::
framework
::
Scope
>>
_scope_obj_pool
;
std
::
vector
<
std
::
string
>
_persistables
;
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/io/file_system.h
浏览文件 @
aaea8a39
...
...
@@ -25,6 +25,10 @@ public:
virtual
bool
exists
(
const
std
::
string
&
path
)
=
0
;
virtual
void
mkdir
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
string
path_join
(
const
std
::
string
&
dir
,
const
std
::
string
&
path
);
template
<
class
...
STRS
>
std
::
string
path_join
(
const
std
::
string
&
dir
,
const
std
::
string
&
path
,
const
STRS
&
...
paths
)
{
return
path_join
(
path_join
(
dir
,
path
),
paths
...);
}
virtual
std
::
pair
<
std
::
string
,
std
::
string
>
path_split
(
const
std
::
string
&
path
);
protected:
};
...
...
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
浏览文件 @
aaea8a39
...
...
@@ -27,6 +27,7 @@ int LearnerProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
}
int
LearnerProcess
::
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
)
{
auto
fs
=
_context_ptr
->
file_system
;
auto
*
ps_client
=
_context_ptr
->
pslib
->
ps_client
();
auto
*
environment
=
_context_ptr
->
environment
.
get
();
auto
*
epoch_accessor
=
_context_ptr
->
epoch_accessor
.
get
();
...
...
@@ -39,18 +40,21 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
paddle
::
platform
::
Timer
timer
;
timer
.
Start
();
std
::
set
<
uint32_t
>
table_set
;
auto
model_dir
=
epoch_accessor
->
model_save_path
(
epoch_id
,
way
);
for
(
auto
&
executor
:
_executors
)
{
const
auto
&
table_accessors
=
executor
->
table_accessors
();
for
(
auto
&
itr
:
table_accessors
)
{
table_set
.
insert
(
itr
.
first
);
}
auto
save_path
=
fs
->
path_join
(
model_dir
,
executor
->
train_exe_name
()
+
"_param"
);
VLOG
(
2
)
<<
"Start save model, save_path:"
<<
save_path
;
executor
->
save_persistables
(
save_path
);
}
int
ret_size
=
0
;
auto
table_num
=
table_set
.
size
();
std
::
future
<
int
>
rets
[
table_num
];
for
(
auto
table_id
:
table_set
)
{
VLOG
(
2
)
<<
"Start save model, table_id:"
<<
table_id
;
auto
model_dir
=
epoch_accessor
->
model_save_path
(
epoch_id
,
way
);
rets
[
ret_size
++
]
=
ps_client
->
save
(
table_id
,
model_dir
,
std
::
to_string
((
int
)
way
));
}
int
all_ret
=
0
;
...
...
paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
浏览文件 @
aaea8a39
...
...
@@ -124,6 +124,14 @@ class ModelBuilder:
with
open
(
os
.
path
.
join
(
self
.
_save_path
,
name
+
'.pbtxt'
),
'w'
)
as
fout
:
fout
.
write
(
str
(
program
))
fluid
.
io
.
save_inference_model
(
self
.
_save_path
,
[
var
.
name
for
var
in
inputs
],
outputs
,
executor
=
None
,
main_program
=
test_program
,
model_filename
=
'inference_program'
,
program_only
=
True
)
params
=
filter
(
fluid
.
io
.
is_parameter
,
main_program
.
list_vars
())
vars
=
[]
sums
=
[]
...
...
paddle/fluid/train/custom_trainer/feed/shuffler/shuffler.cc
浏览文件 @
aaea8a39
#pragma once
#include "paddle/fluid/framework/archive.h"
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/train/custom_trainer/feed/shuffler/shuffler.h"
...
...
@@ -30,7 +29,7 @@ public:
return
0
;
}
};
REGIST_CLASS
(
DataPars
er
,
LocalShuffler
);
REGIST_CLASS
(
Shuffl
er
,
LocalShuffler
);
class
GlobalShuffler
:
public
Shuffler
{
public:
...
...
@@ -109,7 +108,7 @@ private:
uint32_t
_max_concurrent_num
=
0
;
};
REGIST_CLASS
(
DataPars
er
,
GlobalShuffler
);
REGIST_CLASS
(
Shuffl
er
,
GlobalShuffler
);
}
// namespace feed
}
// namespace custom_trainer
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录