Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
b410c4e1
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b410c4e1
编写于
8月 27, 2019
作者:
Y
yaopenghui
浏览文件
操作
浏览文件
下载
差异文件
add cost monitor
上级
f0d571da
d1e62a0a
变更
70
隐藏空白更改
内联
并排
Showing
70 changed file
with
3333 addition
and
476 deletion
+3333
-476
BCLOUD
BCLOUD
+4
-2
paddle/fluid/string/to_string.h
paddle/fluid/string/to_string.h
+10
-0
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
...rain/custom_trainer/feed/accessor/dense_input_accessor.cc
+152
-0
paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.cc
...luid/train/custom_trainer/feed/accessor/epoch_accessor.cc
+75
-21
paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.h
...fluid/train/custom_trainer/feed/accessor/epoch_accessor.h
+26
-7
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
.../train/custom_trainer/feed/accessor/input_data_accessor.h
+168
-0
paddle/fluid/train/custom_trainer/feed/accessor/label_input_accessor.cc
...rain/custom_trainer/feed/accessor/label_input_accessor.cc
+74
-0
paddle/fluid/train/custom_trainer/feed/accessor/sparse_input_accessor.cc
...ain/custom_trainer/feed/accessor/sparse_input_accessor.cc
+224
-0
paddle/fluid/train/custom_trainer/feed/common/pipeline.h
paddle/fluid/train/custom_trainer/feed/common/pipeline.h
+43
-26
paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.cc
...e/fluid/train/custom_trainer/feed/common/pslib_warpper.cc
+81
-0
paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.h
...le/fluid/train/custom_trainer/feed/common/pslib_warpper.h
+51
-0
paddle/fluid/train/custom_trainer/feed/common/registerer.cc
paddle/fluid/train/custom_trainer/feed/common/registerer.cc
+3
-3
paddle/fluid/train/custom_trainer/feed/common/registerer.h
paddle/fluid/train/custom_trainer/feed/common/registerer.h
+9
-9
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.cc
...d/train/custom_trainer/feed/common/runtime_environment.cc
+22
-5
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h
...id/train/custom_trainer/feed/common/runtime_environment.h
+22
-17
paddle/fluid/train/custom_trainer/feed/common/scope_helper.h
paddle/fluid/train/custom_trainer/feed/common/scope_helper.h
+55
-0
paddle/fluid/train/custom_trainer/feed/common/yaml_helper.h
paddle/fluid/train/custom_trainer/feed/common/yaml_helper.h
+32
-0
paddle/fluid/train/custom_trainer/feed/conf/gflags.conf
paddle/fluid/train/custom_trainer/feed/conf/gflags.conf
+4
-1
paddle/fluid/train/custom_trainer/feed/conf/ps_table_config
paddle/fluid/train/custom_trainer/feed/conf/ps_table_config
+120
-0
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
+49
-27
paddle/fluid/train/custom_trainer/feed/dataset/abacus_data_reader.cc
...d/train/custom_trainer/feed/dataset/abacus_data_reader.cc
+76
-0
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.cc
...le/fluid/train/custom_trainer/feed/dataset/data_reader.cc
+18
-57
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
+22
-4
paddle/fluid/train/custom_trainer/feed/dataset/dataset.cc
paddle/fluid/train/custom_trainer/feed/dataset/dataset.cc
+18
-18
paddle/fluid/train/custom_trainer/feed/dataset/dataset.h
paddle/fluid/train/custom_trainer/feed/dataset/dataset.h
+7
-4
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.cc
...id/train/custom_trainer/feed/dataset/dataset_container.cc
+29
-4
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.h
...uid/train/custom_trainer/feed/dataset/dataset_container.h
+7
-5
paddle/fluid/train/custom_trainer/feed/executor/executor.cc
paddle/fluid/train/custom_trainer/feed/executor/executor.cc
+28
-30
paddle/fluid/train/custom_trainer/feed/executor/executor.h
paddle/fluid/train/custom_trainer/feed/executor/executor.h
+7
-21
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
...ain/custom_trainer/feed/executor/multi_thread_executor.cc
+150
-0
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
...rain/custom_trainer/feed/executor/multi_thread_executor.h
+61
-0
paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc
...le/fluid/train/custom_trainer/feed/io/auto_file_system.cc
+5
-24
paddle/fluid/train/custom_trainer/feed/io/file_system.h
paddle/fluid/train/custom_trainer/feed/io/file_system.h
+2
-12
paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
.../fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
+7
-6
paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc
...e/fluid/train/custom_trainer/feed/io/local_file_system.cc
+5
-5
paddle/fluid/train/custom_trainer/feed/main.cc
paddle/fluid/train/custom_trainer/feed/main.cc
+50
-21
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
+1
-1
paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc
...uid/train/custom_trainer/feed/process/init_env_process.cc
+2
-14
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
...luid/train/custom_trainer/feed/process/learner_process.cc
+107
-64
paddle/fluid/train/custom_trainer/feed/process/learner_process.h
...fluid/train/custom_trainer/feed/process/learner_process.h
+6
-12
paddle/fluid/train/custom_trainer/feed/process/process.cc
paddle/fluid/train/custom_trainer/feed/process/process.cc
+2
-2
paddle/fluid/train/custom_trainer/feed/process/process.h
paddle/fluid/train/custom_trainer/feed/process/process.h
+1
-1
paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
...luid/train/custom_trainer/feed/scripts/create_programs.py
+22
-2
paddle/fluid/train/custom_trainer/feed/scripts/example.py
paddle/fluid/train/custom_trainer/feed/scripts/example.py
+3
-1
paddle/fluid/train/custom_trainer/feed/scripts/join.py
paddle/fluid/train/custom_trainer/feed/scripts/join.py
+67
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/example/main_program
...in/custom_trainer/feed/scripts/model/example/main_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/example/model.yaml
...rain/custom_trainer/feed/scripts/model/example/model.yaml
+49
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/example/startup_program
...custom_trainer/feed/scripts/model/example/startup_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/example/test_program
...in/custom_trainer/feed/scripts/model/example/test_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/join/main_program
...train/custom_trainer/feed/scripts/model/join/main_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/join/model.yaml
...d/train/custom_trainer/feed/scripts/model/join/model.yaml
+103
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/join/startup_program
...in/custom_trainer/feed/scripts/model/join/startup_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/join/test_program
...train/custom_trainer/feed/scripts/model/join/test_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/update/main_program
...ain/custom_trainer/feed/scripts/model/update/main_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/update/model.yaml
...train/custom_trainer/feed/scripts/model/update/model.yaml
+84
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/update/startup_program
.../custom_trainer/feed/scripts/model/update/startup_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/model/update/test_program
...ain/custom_trainer/feed/scripts/model/update/test_program
+0
-0
paddle/fluid/train/custom_trainer/feed/scripts/update.py
paddle/fluid/train/custom_trainer/feed/scripts/update.py
+60
-0
paddle/fluid/train/custom_trainer/feed/tool/format_newcate_hotnews.awk
...train/custom_trainer/feed/tool/format_newcate_hotnews.awk
+21
-0
paddle/fluid/train/custom_trainer/feed/tool/gdbinit
paddle/fluid/train/custom_trainer/feed/tool/gdbinit
+697
-0
paddle/fluid/train/custom_trainer/feed/tool/ins_weight.py
paddle/fluid/train/custom_trainer/feed/tool/ins_weight.py
+122
-0
paddle/fluid/train/custom_trainer/feed/tool/xbox_compressor_mf.py
...luid/train/custom_trainer/feed/tool/xbox_compressor_mf.py
+162
-0
paddle/fluid/train/custom_trainer/feed/tool/xbox_decompressor_mf.awk
...d/train/custom_trainer/feed/tool/xbox_decompressor_mf.awk
+52
-0
paddle/fluid/train/custom_trainer/feed/tool/xbox_pb_converter
...le/fluid/train/custom_trainer/feed/tool/xbox_pb_converter
+0
-0
paddle/fluid/train/custom_trainer/feed/tool/xbox_pb_deconverter
.../fluid/train/custom_trainer/feed/tool/xbox_pb_deconverter
+0
-0
paddle/fluid/train/custom_trainer/feed/trainer_context.h
paddle/fluid/train/custom_trainer/feed/trainer_context.h
+20
-22
paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc
...ain/custom_trainer/feed/unit_test/test_create_programs.cc
+11
-8
paddle/fluid/train/custom_trainer/feed/unit_test/test_datareader.cc
...id/train/custom_trainer/feed/unit_test/test_datareader.cc
+8
-7
paddle/fluid/train/custom_trainer/feed/unit_test/test_datareader_omp.cc
...rain/custom_trainer/feed/unit_test/test_datareader_omp.cc
+6
-5
paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc
...luid/train/custom_trainer/feed/unit_test/test_executor.cc
+11
-8
未找到文件。
BCLOUD
浏览文件 @
b410c4e1
WORKROOT('../../../')
COMPILER('gcc482')
CPPFLAGS('-D_GNU_SOURCE -DNDEBUG')
GLOBAL_CFLAGS_STR = '-g -O
0
-pipe -fopenmp '
GLOBAL_CFLAGS_STR = '-g -O
3
-pipe -fopenmp '
CFLAGS(GLOBAL_CFLAGS_STR)
GLOBAL_CXXFLAGS_STR = GLOBAL_CFLAGS_STR + ' -std=c++11 '
CXXFLAGS(GLOBAL_CXXFLAGS_STR)
...
...
@@ -35,6 +35,7 @@ CONFIGS('baidu/third-party/pybind11@v2.2.4@git_branch')
CONFIGS('baidu/third-party/python@gcc482output@git_branch')
CONFIGS('baidu/third-party/yaml-cpp@yaml-cpp_0-6-2-0_GEN_PD_BL@git_tag')
CONFIGS('baidu/third-party/openmpi@openmpi_1-4-5-0-feed_mlarch@git_branch')
CONFIGS('baidu/paddlepaddle/pslib@no_abacus_in_proto@git_branch')
CONFIGS('third-64/gtest@base')
HEADERS('paddle/fluid/memory/*.h', '$INC/paddle/fluid/memory/')
HEADERS('paddle/fluid/memory/detail/*.h', '$INC/paddle/fluid/memory/detail/')
...
...
@@ -74,6 +75,7 @@ NEED_OUTPUT("baidu/third-party/openmpi")
OUTPUT('paddle/fluid/train/custom_trainer/feed/conf', '$OUT')
OUTPUT('paddle/fluid/train/custom_trainer/feed/scripts', '$OUT')
def UT_FILE(filename):
UT_DIR = 'paddle/fluid/train/custom_trainer/feed/unit_test'
import os
...
...
@@ -81,7 +83,7 @@ def UT_FILE(filename):
custom_trainer_src = GLOB('paddle/fluid/train/custom_trainer/feed/*/*.cc', Exclude(UT_FILE('*')))
CPPFLAGS_STR = '-DHPPL_STUB_FUNC -DLAPACK_FOUND -DPADDLE_DISABLE_PROFILER -DPADDLE_NO_PYTHON -DCUSTOM_TRAINER -DPADDLE_ON_INFERENCE -DPADDLE_USE_DSO -DPADDLE_USE_PTHREAD_BARRIER -DPADDLE_USE_PTHREAD_SPINLOCK -DPADDLE_VERSION=0.0.0 -DPADDLE_WITH_AVX -DPADDLE_WITH_MKLML -DPADDLE_WITH_XBYAK -DXBYAK64 -DXBYAK_NO_OP_NAMES -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -DPYBIND_AVX_MKLML' + r" -DPADDLE_REVISION=\"%s@%s@%s\"" % (REPO_URL(), REPO_BRANCH(), REPO_REVISION())
CFLAGS_STR = '-m64 -fPIC -fno-omit-frame-pointer -Werror -Wall -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare -Wno-error=unused-local-typedefs -Wno-error=maybe-uninitialized -
fopenmp -mavx -O0
-DNDEBUG '
CFLAGS_STR = '-m64 -fPIC -fno-omit-frame-pointer -Werror -Wall -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare -Wno-error=unused-local-typedefs -Wno-error=maybe-uninitialized -
Wno-narrowing -Wnarrowing -fopenmp -mavx -O3
-DNDEBUG '
CXXFLAGS_STR = '-std=c++11 ' + CFLAGS_STR
SharedLibrary("paddle_fluid_avx_mklml", PreBuilt(True))
...
...
paddle/fluid/string/to_string.h
浏览文件 @
b410c4e1
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <sstream>
#include <string>
#include <vector>
#include <typeindex>
namespace
paddle
{
...
...
@@ -31,6 +32,15 @@ inline std::string to_string(T v) {
return
sout
.
str
();
}
template
<
typename
T
>
inline
std
::
string
to_string
(
const
std
::
vector
<
T
>&
v_list
)
{
std
::
ostringstream
sout
;
for
(
const
auto
&
v
:
v_list
)
{
sout
<<
v
<<
" "
;
}
return
sout
.
str
();
}
template
<
>
inline
std
::
string
to_string
(
std
::
type_index
t
)
{
return
t
.
name
();
...
...
paddle/fluid/train/custom_trainer/feed/accessor/dense_input_accessor.cc
0 → 100644
浏览文件 @
b410c4e1
#include "paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
DenseInputAccessor
::
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
CHECK
(
DataInputAccessor
::
initialize
(
config
,
context_ptr
)
==
0
);
_total_dim
=
0
;
_pull_request_num
.
store
(
0
);
for
(
const
auto
&
input
:
config
[
"input"
])
{
DenseInputVariable
variable
;
variable
.
name
=
input
[
"name"
].
as
<
std
::
string
>
();
variable
.
gradient_name
=
paddle
::
framework
::
GradVarName
(
variable
.
name
);
variable
.
shape
=
input
[
"shape"
].
as
<
std
::
vector
<
int
>>
();
variable
.
dim
=
1
;
for
(
int
i
=
0
;
i
<
variable
.
shape
.
size
();
++
i
)
{
if
(
variable
.
shape
[
i
]
<=
0
)
{
variable
.
shape
[
i
]
=
1
;
}
variable
.
dim
*=
variable
.
shape
[
i
];
}
_total_dim
+=
variable
.
dim
;
_x_variables
.
emplace_back
(
variable
);
}
if
(
config
[
"async_pull"
]
&&
config
[
"async_pull"
].
as
<
bool
>
())
{
_need_async_pull
=
true
;
}
return
0
;
}
int32_t
DenseInputAccessor
::
create
(
::
paddle
::
framework
::
Scope
*
scope
)
{
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
tensor
=
scope
->
Var
(
variable
.
name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
data
=
tensor
->
data
<
float
>
();
regions
.
emplace_back
(
data
,
variable
.
dim
);
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
push_status
=
ps_client
->
push_dense_param
(
regions
.
data
(),
regions
.
size
(),
_table_id
);
return
push_status
.
get
();
}
// rpc拉取数据,需保证单线程运行
int32_t
DenseInputAccessor
::
pull_dense
(
size_t
table_id
)
{
float
*
data_buffer
=
NULL
;
if
(
_data_buffer
==
nullptr
)
{
_data_buffer
=
new
float
[
_total_dim
];
}
// TODO 使用双buffer DataBuffer,避免训练期改写,当前异步SGD下,问题不大
data_buffer
=
_data_buffer
;
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
variable
:
_x_variables
)
{
regions
.
emplace_back
(
data_buffer
+
data_buffer_idx
,
variable
.
dim
);
data_buffer_idx
+=
variable
.
dim
;
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
push_status
=
ps_client
->
pull_dense
(
regions
.
data
(),
regions
.
size
(),
table_id
);
return
push_status
.
get
();
}
int32_t
DenseInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
// 首次同步pull,之后异步pull
if
(
_data_buffer
==
nullptr
)
{
_pull_mutex
.
lock
();
if
(
_data_buffer
==
nullptr
)
{
CHECK
(
pull_dense
(
_table_id
)
==
0
);
_async_pull_thread
=
std
::
make_shared
<
std
::
thread
>
(
[
this
]()
{
while
(
_need_async_pull
)
{
if
(
_pull_request_num
>
0
)
{
pull_dense
(
_table_id
);
_pull_request_num
=
0
;
}
else
{
usleep
(
50000
);
}
}
});
}
_pull_mutex
.
unlock
();
}
size_t
data_buffer_idx
=
0
;
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
shape_ptr
=
&
(
variable
.
shape
[
0
]);
paddle
::
framework
::
DDim
ddim
(
shape_ptr
,
variable
.
shape
.
size
());
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
name
,
ddim
);
auto
*
grad_tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
gradient_name
,
ddim
);
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
;
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
memcpy
(
var_data
,
_data_buffer
+
data_buffer_idx
,
variable
.
dim
*
sizeof
(
float
));
data_buffer_idx
+=
variable
.
dim
;
}
if
(
_need_async_pull
)
{
++
_pull_request_num
;
}
return
0
;
}
int32_t
DenseInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
!
_need_gradient
)
{
return
0
;
}
size_t
data_buffer_idx
=
0
;
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
variable
:
_x_variables
)
{
auto
*
tensor
=
scope
->
Var
(
variable
.
gradient_name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
grad_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
regions
.
emplace_back
(
grad_data
,
variable
.
dim
);
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
push_status
=
ps_client
->
push_dense
(
regions
.
data
(),
regions
.
size
(),
_table_id
);
//return push_status.get();
return
0
;
}
int32_t
EbdVariableInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
CHECK
(
_x_variables
.
size
()
==
1
);
CHECK
(
_x_variables
[
0
].
shape
.
size
()
==
1
);
auto
&
variable
=
_x_variables
[
0
];
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
name
,
{
num
,
variable
.
shape
[
0
]});
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
auto
&
sample
=
samples
[
i
];
CHECK
(
sample
.
embedx
.
size
()
==
variable
.
dim
);
memcpy
(
var_data
,
sample
.
embedx
.
data
(),
variable
.
dim
*
sizeof
(
float
));
var_data
+=
variable
.
dim
;
}
return
0
;
}
int32_t
EbdVariableInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
return
0
;
}
REGIST_CLASS
(
DataInputAccessor
,
DenseInputAccessor
);
REGIST_CLASS
(
DataInputAccessor
,
EbdVariableInputAccessor
);
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.cc
浏览文件 @
b410c4e1
...
...
@@ -23,6 +23,7 @@ namespace feed {
if
(
!
fs
->
exists
(
_done_file_path
))
{
VLOG
(
0
)
<<
"missing done file, path:"
<<
_done_file_path
;
return
-
1
;
}
std
::
string
done_text
=
fs
->
tail
(
_done_file_path
);
...
...
@@ -32,43 +33,90 @@ namespace feed {
_last_checkpoint_path
=
get_status
<
std
::
string
>
(
EpochStatusFiled
::
CheckpointPathField
);
return
0
;
}
int32_t
EpochAccessor
::
epoch_done
(
uint64_t
epoch_id
)
{
struct
timeval
now
;
gettimeofday
(
&
now
,
NULL
);
if
(
need_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveTrainCheckpoint
))
{
_last_checkpoint_epoch_id
=
epoch_id
;
_last_checkpoint_path
=
model_save_path
(
epoch_id
,
ModelSaveWay
::
ModelSaveTrainCheckpoint
);
}
set_status
(
EpochStatusFiled
::
EpochIdField
,
epoch_id
);
set_status
(
EpochStatusFiled
::
TimestampField
,
now
.
tv_sec
);
set_status
(
EpochStatusFiled
::
CheckpointIdField
,
_last_checkpoint_epoch_id
);
set_status
(
EpochStatusFiled
::
CheckpointPathField
,
_last_checkpoint_path
);
set_status
(
EpochStatusFiled
::
DateField
,
format_timestamp
(
epoch_id
,
"%Y%m%d"
));
int
HourlyEpochAccessor
::
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
EpochAccessor
::
initialize
(
config
,
context_ptr
);
// 非主节点不做状态持久化
if
(
!
_trainer_context
->
environment
->
is_master_node
(
EnvironmentRole
::
WORKER
))
{
return
0
;
}
auto
fs
=
_trainer_context
->
file_system
.
get
();
std
::
string
done_str
=
paddle
::
string
::
join_strings
(
_done_status
,
'\t'
);
// 保留末尾1000数据
std
::
string
tail_done_info
=
paddle
::
string
::
trim_spaces
(
fs
->
tail
(
_done_file_path
,
1000
));
if
(
tail_done_info
.
size
()
>
0
)
{
tail_done_info
=
tail_done_info
+
"
\n
"
+
done_str
;
}
else
{
tail_done_info
=
done_str
;
}
VLOG
(
2
)
<<
"Write epoch donefile to "
<<
_done_file_path
<<
", str:"
<<
done_str
;
bool
write_success
=
false
;
while
(
true
)
{
fs
->
remove
(
_done_file_path
);
auto
fp
=
fs
->
open_write
(
_done_file_path
,
""
);
if
(
fwrite
(
tail_done_info
.
c_str
(),
tail_done_info
.
length
(),
1
,
&*
fp
)
==
1
)
{
break
;
}
sleep
(
10
);
}
VLOG
(
2
)
<<
"Write epoch donefile success"
;
return
0
;
}
void
HourlyEpochAccessor
::
next_epoch
()
{
int
TimelyEpochAccessor
::
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
_time_zone_seconds
=
config
[
"time_zone_seconds"
].
as
<
int
>
();
_train_time_interval
=
config
[
"train_time_interval"
].
as
<
int
>
();
CHECK
(
_train_time_interval
>
0
&&
(
_train_time_interval
%
SecondsPerMin
)
==
0
);
_train_num_per_day
=
SecondsPerDay
/
_train_time_interval
;
return
EpochAccessor
::
initialize
(
config
,
context_ptr
);
}
void
TimelyEpochAccessor
::
next_epoch
()
{
_current_epoch_id
=
next_epoch_id
(
_current_epoch_id
);
}
std
::
string
HourlyEpochAccessor
::
text
(
uint64_t
epoch_id
)
{
return
format_timestamp
(
epoch_id
,
"%Y%m%d delta-%H"
);
std
::
string
TimelyEpochAccessor
::
text
(
uint64_t
epoch_id
)
{
auto
delta
=
delta_id
(
epoch_id
);
std
::
string
date
=
format_timestamp
(
epoch_id
,
"%Y%m%d%H%M"
);
return
string
::
format_string
(
"%s delta-%d"
,
date
.
c_str
(),
delta
);
}
uint64_t
Hour
lyEpochAccessor
::
next_epoch_id
(
uint64_t
epoch_id
)
{
uint64_t
Time
lyEpochAccessor
::
next_epoch_id
(
uint64_t
epoch_id
)
{
if
(
epoch_id
==
0
)
{
struct
timeval
now
;
gettimeofday
(
&
now
,
NULL
);
return
now
.
tv_sec
/
(
24
*
3600
)
*
(
24
*
3600
);
// 归整到零点
return
now
.
tv_sec
/
SecondsPerDay
*
SecondsPerDay
;
}
return
epoch_id
+
3600
;
return
epoch_id
+
_train_time_interval
;
}
bool
HourlyEpochAccessor
::
is_last_epoch
(
uint64_t
epoch_id
)
{
return
((
epoch_id
/
3600
)
%
24
)
==
23
;
bool
TimelyEpochAccessor
::
is_last_epoch
(
uint64_t
epoch_id
)
{
auto
delta
=
delta_id
(
epoch_id
);
return
delta
==
_train_num_per_day
;
}
uint64_t
Hour
lyEpochAccessor
::
epoch_time_interval
()
{
return
3600
;
uint64_t
Time
lyEpochAccessor
::
epoch_time_interval
()
{
return
_train_time_interval
;
}
uint64_t
Hour
lyEpochAccessor
::
epoch_timestamp
(
uint64_t
epoch_id
)
{
uint64_t
Time
lyEpochAccessor
::
epoch_timestamp
(
uint64_t
epoch_id
)
{
return
epoch_id
;
}
bool
Hour
lyEpochAccessor
::
need_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
{
bool
Time
lyEpochAccessor
::
need_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
{
if
(
epoch_id
==
0
)
{
return
false
;
}
...
...
@@ -78,24 +126,30 @@ namespace feed {
case
ModelSaveWay
::
ModelSaveInferenceBase
:
return
is_last_epoch
(
epoch_id
);
case
ModelSaveWay
::
ModelSaveTrainCheckpoint
:
return
((
epoch_id
/
3600
)
%
8
)
==
0
;
return
((
epoch_id
/
SecondsPerHour
)
%
8
)
==
0
;
}
return
false
;
}
std
::
string
HourlyEpochAccessor
::
model_save_path
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
{
std
::
string
TimelyEpochAccessor
::
model_save_path
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
{
int32_t
delta
=
delta_id
(
epoch_id
);
std
::
string
date
=
format_timestamp
(
epoch_id
,
"%Y%m%d"
);
std
::
string
date_with_hour
=
format_timestamp
(
epoch_id
,
"%Y%m%d%H"
);
switch
(
save_way
)
{
case
ModelSaveWay
::
ModelSaveInferenceDelta
:
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
"/xbox/delta-"
+
std
::
to_string
(
epoch_id
));
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
string
::
format_string
(
"xbox/%s/delta-%d"
,
date
.
c_str
(),
delta
));
case
ModelSaveWay
::
ModelSaveInferenceBase
:
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
"/xbox/base"
);
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
string
::
format_string
(
"xbox/%s/base"
,
date
.
c_str
()));
case
ModelSaveWay
::
ModelSaveTrainCheckpoint
:
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
"/xbox/checkpoint"
);
return
_trainer_context
->
file_system
->
path_join
(
_model_root_path
,
string
::
format_string
(
"batch_model/%s"
,
date_with_hour
.
c_str
()));
}
return
""
;
}
REGIST
ER_CLASS
(
EpochAccessor
,
Hour
lyEpochAccessor
);
REGIST
_CLASS
(
EpochAccessor
,
Time
lyEpochAccessor
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.h
浏览文件 @
b410c4e1
...
...
@@ -31,24 +31,35 @@ public:
virtual
const
std
::
string
&
checkpoint_path
()
{
return
_last_checkpoint_path
;
}
virtual
int32_t
epoch_done
(
uint64_t
epoch_id
);
template
<
class
T
>
T
get_status
(
EpochStatusFiled
field
)
{
auto
status
=
paddle
::
string
::
trim_spaces
(
_done_status
[
static_cast
<
int
>
(
field
)]);
return
boost
::
lexical_cast
<
T
>
(
status
.
c_str
());
}
virtual
void
next_epoch
()
=
0
;
template
<
class
T
>
void
set_status
(
EpochStatusFiled
field
,
const
T
&
status
)
{
auto
str_status
=
paddle
::
string
::
to_string
(
status
);
_done_status
[
static_cast
<
int
>
(
field
)]
=
str_status
;
return
;
}
virtual
std
::
string
model_root_path
()
{
return
_model_root_path
;
}
virtual
void
next_epoch
()
=
0
;
virtual
std
::
string
text
(
uint64_t
epoch_id
)
=
0
;
virtual
uint64_t
next_epoch_id
(
uint64_t
epoch_id
)
=
0
;
virtual
bool
is_last_epoch
(
uint64_t
epoch_id
)
=
0
;
//epoch间的数据时间间隔(秒)
virtual
uint64_t
epoch_time_interval
()
=
0
;
//获取epoch的样本数据时间
virtual
uint64_t
epoch_timestamp
(
uint64_t
epoch_id
)
=
0
;
virtual
bool
need_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
=
0
;
virtual
std
::
string
model_save_path
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
)
=
0
;
protected:
...
...
@@ -61,12 +72,12 @@ protected:
std
::
vector
<
std
::
string
>
_done_status
;
//当前完成状态,统一存成string
};
REGIST
ER
_REGISTERER
(
EpochAccessor
);
REGIST_REGISTERER
(
EpochAccessor
);
class
Hour
lyEpochAccessor
:
public
EpochAccessor
{
class
Time
lyEpochAccessor
:
public
EpochAccessor
{
public:
Hour
lyEpochAccessor
()
{}
virtual
~
Hour
lyEpochAccessor
()
{}
Time
lyEpochAccessor
()
{}
virtual
~
Time
lyEpochAccessor
()
{}
virtual
int
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
virtual
void
next_epoch
();
...
...
@@ -77,6 +88,14 @@ public:
virtual
uint64_t
epoch_timestamp
(
uint64_t
epoch_id
);
virtual
bool
need_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
);
virtual
std
::
string
model_save_path
(
uint64_t
epoch_id
,
ModelSaveWay
save_way
);
private:
inline
size_t
delta_id
(
uint64_t
epoch_id
)
{
return
((
epoch_id
+
_time_zone_seconds
)
%
SecondsPerDay
)
/
_train_time_interval
;
}
uint32_t
_time_zone_seconds
;
// 相对UTC时差(秒)
uint32_t
_train_time_interval
;
// 训练时间间隔(秒)
uint32_t
_train_num_per_day
;
// 天级训练总轮数
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h
0 → 100644
浏览文件 @
b410c4e1
#pragma once
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/train/custom_trainer/feed/accessor/accessor.h"
#include "paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h"
#include "paddle/fluid/train/custom_trainer/feed/common/scope_helper.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
class
DataInputAccessor
:
public
Accessor
{
public:
DataInputAccessor
()
{}
virtual
~
DataInputAccessor
()
{}
virtual
int
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
_trainer_context
=
context_ptr
.
get
();
_table_id
=
config
[
"table_id"
].
as
<
int
>
();
_need_gradient
=
config
[
"need_gradient"
].
as
<
bool
>
();
return
0
;
}
// 创建,一般用于模型冷启的随机初始化
virtual
int32_t
create
(
::
paddle
::
framework
::
Scope
*
scope
)
{
return
0
;
}
// 前向, 一般用于填充输入,在训练网络执行前调用
virtual
int32_t
forward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
// 后向,一般用于更新梯度,在训练网络执行后调用
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
protected:
size_t
_table_id
=
0
;
bool
_need_gradient
=
false
;
TrainerContext
*
_trainer_context
=
nullptr
;
};
REGIST_REGISTERER
(
DataInputAccessor
);
struct
LabelInputVariable
{
std
::
string
label_name
;
std
::
string
output_name
;
size_t
label_dim
=
0
;
};
class
LabelInputAccessor
:
public
DataInputAccessor
{
public:
LabelInputAccessor
()
{}
virtual
~
LabelInputAccessor
()
{}
virtual
int
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
virtual
int32_t
forward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
::
paddle
::
framework
::
Scope
*
scope
);
protected:
size_t
_label_total_dim
=
0
;
std
::
vector
<
LabelInputVariable
>
_labels
;
};
struct
SparseInputVariable
{
size_t
slot_dim
;
size_t
total_dim
;
std
::
string
name
;
std
::
string
gradient_name
;
std
::
vector
<
int32_t
>
slot_idx
;
std
::
vector
<
uint16_t
>
slot_list
;
};
struct
SparseVarRuntimeData
{
uint32_t
row_size
;
uint32_t
total_size
;
float
*
variable_data
;
float
*
gradient_data
;
};
class
BaseSparseInputAccessor
:
public
DataInputAccessor
{
public:
BaseSparseInputAccessor
()
{}
virtual
~
BaseSparseInputAccessor
()
{}
virtual
int
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
// forword过程的input填充
virtual
int32_t
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
// 取得单个SparseKey的PullValue, 实现单个SparseValue的填充
virtual
void
fill_input
(
float
*
var_data
,
const
float
*
pull_raw
,
paddle
::
ps
::
ValueAccessor
&
,
SparseInputVariable
&
,
SampleInstance
&
)
=
0
;
// 所有SparseValue填充完成后,调用,可进一步全局处理
virtual
void
post_process_input
(
float
*
var_data
,
SparseInputVariable
&
,
SampleInstance
*
,
size_t
num
)
=
0
;
// backward过程的梯度push
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
// SparseGradValue会被依次调用,用于整理push的梯度
virtual
void
fill_gradient
(
float
*
push_value
,
const
float
*
gradient_raw
,
paddle
::
ps
::
ValueAccessor
&
,
SparseInputVariable
&
,
SampleInstance
&
)
=
0
;
protected:
// 输入层列表
std
::
vector
<
SparseInputVariable
>
_x_variables
;
};
struct
DenseInputVariable
{
size_t
dim
;
std
::
string
name
;
std
::
vector
<
int
>
shape
;
std
::
string
gradient_name
;
};
class
DenseInputAccessor
:
public
DataInputAccessor
{
public:
DenseInputAccessor
()
{}
virtual
~
DenseInputAccessor
()
{
if
(
_data_buffer
)
{
delete
[]
_data_buffer
;
}
_need_async_pull
=
false
;
if
(
_async_pull_thread
)
{
_async_pull_thread
->
join
();
}
}
virtual
int
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
virtual
int32_t
create
(
::
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
protected:
virtual
int32_t
pull_dense
(
size_t
table_id
);
size_t
_total_dim
=
0
;
std
::
mutex
_pull_mutex
;
bool
_need_async_pull
=
false
;
float
*
_data_buffer
=
nullptr
;
std
::
atomic
<
int
>
_pull_request_num
;
std
::
vector
<
DenseInputVariable
>
_x_variables
;
std
::
shared_ptr
<
std
::
thread
>
_async_pull_thread
;
};
class
EbdVariableInputAccessor
:
public
DenseInputAccessor
{
public:
EbdVariableInputAccessor
()
{}
virtual
~
EbdVariableInputAccessor
()
{}
virtual
int32_t
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
virtual
int32_t
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
);
};
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/accessor/label_input_accessor.cc
0 → 100644
浏览文件 @
b410c4e1
#include "paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
LabelInputAccessor
::
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
_trainer_context
=
context_ptr
.
get
();
_label_total_dim
=
0
;
for
(
const
auto
&
input
:
config
[
"input"
])
{
LabelInputVariable
variable
;
variable
.
label_name
=
input
[
"label_name"
].
as
<
std
::
string
>
();
variable
.
output_name
=
input
[
"output_name"
].
as
<
std
::
string
>
();
auto
shape
=
input
[
"shape"
].
as
<
std
::
vector
<
int
>>
();
variable
.
label_dim
=
0
;
for
(
auto
dim
:
shape
)
{
variable
.
label_dim
+=
(
dim
>
0
?
dim
:
0
);
}
_label_total_dim
+=
variable
.
label_dim
;
_labels
.
emplace_back
(
variable
);
}
return
0
;
}
int32_t
LabelInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
num
<
1
)
{
return
0
;
}
size_t
sample_label_data_idx
=
0
;
for
(
auto
&
label
:
_labels
)
{
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
label
.
label_name
,
{
num
,
label
.
label_dim
});
auto
*
res_tens
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
label
.
output_name
,
{
num
,
label
.
label_dim
});
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
auto
&
sample
=
samples
[
i
];
CHECK
(
sample
.
labels
.
size
()
>
sample_label_data_idx
);
float
*
sample_label_buffer
=
sample
.
labels
.
data
();
memcpy
(
var_data
+
i
*
label
.
label_dim
,
sample_label_buffer
+
sample_label_data_idx
,
label
.
label_dim
*
sizeof
(
float
));
}
sample_label_data_idx
+=
label
.
label_dim
;
}
return
0
;
}
int32_t
LabelInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
if
(
num
<
1
)
{
return
0
;
}
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
auto
&
sample
=
samples
[
i
];
sample
.
predicts
.
resize
(
_label_total_dim
);
size_t
sample_predict_data_idx
=
0
;
float
*
sample_predict_buffer
=
sample
.
predicts
.
data
();
for
(
auto
&
label
:
_labels
)
{
auto
*
tensor
=
scope
->
Var
(
label
.
output_name
)
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
var_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
memcpy
(
sample_predict_buffer
+
sample_predict_data_idx
,
var_data
+
i
*
label
.
label_dim
,
label
.
label_dim
*
sizeof
(
float
));
sample_predict_data_idx
+=
label
.
label_dim
;
}
}
return
0
;
}
REGIST_CLASS
(
DataInputAccessor
,
LabelInputAccessor
);
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/accessor/sparse_input_accessor.cc
0 → 100644
浏览文件 @
b410c4e1
#include <math.h>
#include <vector>
#include <utility>
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/train/custom_trainer/feed/common/scope_helper.h"
#include "paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
BaseSparseInputAccessor
::
initialize
(
YAML
::
Node
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
CHECK
(
DataInputAccessor
::
initialize
(
config
,
context_ptr
)
==
0
);
for
(
const
auto
&
input
:
config
[
"input"
])
{
SparseInputVariable
variable
;
variable
.
name
=
input
[
"name"
].
as
<
std
::
string
>
();
variable
.
gradient_name
=
paddle
::
framework
::
GradVarName
(
variable
.
name
);
auto
slots
=
input
[
"slots"
].
as
<
std
::
vector
<
int
>>
();
variable
.
slot_idx
.
resize
(
UINT16_MAX
,
-
1
);
for
(
int
i
=
0
;
i
<
slots
.
size
();
++
i
)
{
uint16_t
slot
=
(
uint16_t
)
slots
[
i
];
variable
.
slot_idx
[
slot
]
=
i
;
variable
.
slot_list
.
push_back
(
slot
);
}
variable
.
slot_dim
=
input
[
"slot_dim"
].
as
<
int
>
();
variable
.
total_dim
=
variable
.
slot_list
.
size
()
*
variable
.
slot_dim
;
_x_variables
.
push_back
(
variable
);
}
return
0
;
}
// 取sparse数据
int32_t
BaseSparseInputAccessor
::
forward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
CHECK
(
num
>
0
);
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
*
value_accessor
=
ps_client
->
table_accessor
(
_table_id
);
size_t
key_num
=
0
;
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
key_num
+=
samples
[
i
].
features
.
size
();
}
std
::
vector
<
uint64_t
>
keys
(
key_num
);
float
**
pull_values
=
new
float
*
[
key_num
];
auto
pull_value_dim
=
value_accessor
->
select_dim
();
// 填入sparseKey Request
size_t
key_idx
=
0
;
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
auto
&
features
=
samples
[
i
].
features
;
for
(
auto
&
feature_item
:
features
)
{
feature_item
.
weights
.
resize
(
pull_value_dim
,
0.0
);
keys
[
key_idx
]
=
feature_item
.
sign
();
pull_values
[
key_idx
++
]
=
&
(
feature_item
.
weights
[
0
]);
}
}
auto
pull_status
=
ps_client
->
pull_sparse
(
pull_values
,
_table_id
,
keys
.
data
(),
key_num
);
auto
ret
=
pull_status
.
get
();
delete
[]
pull_values
;
if
(
ret
!=
0
)
{
VLOG
(
0
)
<<
"pull sparse failed, table_id:"
<<
_table_id
<<
", key_num:"
<<
key_num
<<
", ret:"
<<
ret
;
return
ret
;
}
auto
*
runtime_data_ptr
=
new
std
::
vector
<
SparseVarRuntimeData
>
();
auto
&
var_runtime_data
=
*
runtime_data_ptr
;
var_runtime_data
.
resize
(
_x_variables
.
size
());
int64_t
runtime_data_for_scope
=
(
int64_t
)
runtime_data_ptr
;
ScopeHelper
::
fill_value
(
scope
,
_trainer_context
->
cpu_place
,
"sparse_runtime_data"
,
runtime_data_for_scope
);
// Variable空间初始化
for
(
size_t
i
=
0
;
i
<
_x_variables
.
size
();
++
i
)
{
const
auto
&
variable
=
_x_variables
[
i
];
var_runtime_data
[
i
].
row_size
=
num
;
var_runtime_data
[
i
].
total_size
=
num
*
variable
.
total_dim
;
auto
*
tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
name
,
{
num
,
variable
.
total_dim
});
auto
*
grad_tensor
=
ScopeHelper
::
resize_lod_tensor
(
scope
,
variable
.
gradient_name
,
{
num
,
variable
.
total_dim
});
VLOG
(
5
)
<<
"fill scope variable:"
<<
variable
.
name
<<
", "
<<
variable
.
gradient_name
;
var_runtime_data
[
i
].
variable_data
=
tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
var_runtime_data
[
i
].
gradient_data
=
grad_tensor
->
mutable_data
<
float
>
(
_trainer_context
->
cpu_place
);
memset
((
void
*
)
var_runtime_data
[
i
].
variable_data
,
0
,
var_runtime_data
[
i
].
total_size
*
sizeof
(
float
));
memset
((
void
*
)
var_runtime_data
[
i
].
gradient_data
,
0
,
var_runtime_data
[
i
].
total_size
*
sizeof
(
float
));
}
// 参数填入Variable
for
(
size_t
samp_idx
=
0
;
samp_idx
<
num
;
++
samp_idx
)
{
auto
&
features
=
samples
[
samp_idx
].
features
;
for
(
auto
&
feature_item
:
features
)
{
for
(
size_t
i
=
0
;
i
<
_x_variables
.
size
();
++
i
)
{
auto
&
variable
=
_x_variables
[
i
];
auto
slot_idx
=
variable
.
slot_idx
[
feature_item
.
slot
()];
if
(
slot_idx
<
0
)
{
continue
;
}
float
*
item_data
=
var_runtime_data
[
i
].
variable_data
+
samp_idx
*
variable
.
total_dim
+
variable
.
slot_dim
*
slot_idx
;
fill_input
(
item_data
,
&
(
feature_item
.
weights
[
0
]),
*
value_accessor
,
variable
,
samples
[
samp_idx
]);
}
}
}
// Variable后置处理
for
(
size_t
i
=
0
;
i
<
_x_variables
.
size
();
++
i
)
{
auto
&
variable
=
_x_variables
[
i
];
post_process_input
(
var_runtime_data
[
i
].
variable_data
,
variable
,
samples
,
num
);
}
return
0
;
}
// 更新spare数据
int32_t
BaseSparseInputAccessor
::
backward
(
SampleInstance
*
samples
,
size_t
num
,
paddle
::
framework
::
Scope
*
scope
)
{
int64_t
runtime_data_for_scope
=
*
ScopeHelper
::
get_value
<
int64_t
>
(
scope
,
_trainer_context
->
cpu_place
,
"sparse_runtime_data"
);
auto
*
runtime_data_ptr
=
(
std
::
vector
<
SparseVarRuntimeData
>*
)
runtime_data_for_scope
;
auto
&
var_runtime_data
=
*
runtime_data_ptr
;
DoneGuard
gurad
([
runtime_data_ptr
](){
delete
runtime_data_ptr
;
});
if
(
!
_need_gradient
)
{
return
0
;
}
auto
*
ps_client
=
_trainer_context
->
pslib
->
ps_client
();
auto
*
value_accessor
=
ps_client
->
table_accessor
(
_table_id
);
size_t
key_num
=
0
;
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
key_num
+=
samples
[
i
].
features
.
size
();
}
std
::
vector
<
uint64_t
>
keys
(
key_num
);
float
**
push_values
=
new
float
*
[
key_num
];
auto
push_value_dim
=
value_accessor
->
update_dim
();
size_t
key_idx
=
0
;
for
(
size_t
samp_idx
=
0
;
samp_idx
<
num
;
++
samp_idx
)
{
auto
&
features
=
samples
[
samp_idx
].
features
;
for
(
auto
&
feature_item
:
features
)
{
feature_item
.
gradients
.
resize
(
push_value_dim
,
0.0
);
for
(
size_t
i
=
0
;
i
<
_x_variables
.
size
();
++
i
)
{
auto
&
variable
=
_x_variables
[
i
];
auto
slot_idx
=
variable
.
slot_idx
[
feature_item
.
slot
()];
if
(
slot_idx
<
0
)
{
continue
;
}
const
float
*
grad_data
=
var_runtime_data
[
i
].
gradient_data
+
samp_idx
*
variable
.
total_dim
+
variable
.
slot_dim
*
slot_idx
;
fill_gradient
(
&
(
feature_item
.
gradients
[
0
]),
grad_data
,
*
value_accessor
,
variable
,
samples
[
samp_idx
]);
keys
[
key_idx
]
=
feature_item
.
sign
();
push_values
[
key_idx
++
]
=
&
(
feature_item
.
gradients
[
0
]);
}
}
}
auto
push_status
=
ps_client
->
push_sparse
(
_table_id
,
keys
.
data
(),
(
const
float
**
)
push_values
,
key_idx
);
//auto ret = push_status.get();
delete
[]
push_values
;
return
0
;
}
class
AbacusSparseJoinAccessor
:
public
BaseSparseInputAccessor
{
public:
AbacusSparseJoinAccessor
()
{}
virtual
~
AbacusSparseJoinAccessor
()
{}
virtual
void
fill_input
(
float
*
var_data
,
const
float
*
pull_raw
,
paddle
::
ps
::
ValueAccessor
&
value_accessor
,
SparseInputVariable
&
variable
,
SampleInstance
&
sample
)
{
for
(
size_t
i
=
0
;
i
<
variable
.
slot_dim
;
++
i
)
{
var_data
[
i
]
+=
pull_raw
[
i
];
}
}
virtual
void
post_process_input
(
float
*
var_data
,
SparseInputVariable
&
variable
,
SampleInstance
*
samples
,
size_t
num
)
{
for
(
size_t
i
=
0
;
i
<
num
*
variable
.
slot_list
.
size
();
++
i
)
{
var_data
[
0
]
=
log
(
var_data
[
0
]
+
1
);
// show
var_data
[
1
]
=
log
(
var_data
[
1
]
+
1
)
-
var_data
[
0
];
// ctr
var_data
+=
variable
.
slot_dim
;
}
}
virtual
void
fill_gradient
(
float
*
push_value
,
const
float
*
gradient_raw
,
paddle
::
ps
::
ValueAccessor
&
value_accessor
,
SparseInputVariable
&
variable
,
SampleInstance
&
sample
)
{
// join阶段不回填梯度
CHECK
(
false
);
return
;
}
};
REGIST_CLASS
(
DataInputAccessor
,
AbacusSparseJoinAccessor
);
class
AbacusSparseUpdateAccessor
:
public
BaseSparseInputAccessor
{
public:
AbacusSparseUpdateAccessor
()
{}
virtual
~
AbacusSparseUpdateAccessor
()
{}
virtual
void
fill_input
(
float
*
var_data
,
const
float
*
pull_raw
,
paddle
::
ps
::
ValueAccessor
&
value_accessor
,
SparseInputVariable
&
variable
,
SampleInstance
&
sample
)
{
for
(
size_t
i
=
0
;
i
<
variable
.
slot_dim
;
++
i
)
{
var_data
[
i
]
+=
pull_raw
[
i
+
2
];
}
}
virtual
void
post_process_input
(
float
*
var_data
,
SparseInputVariable
&
variable
,
SampleInstance
*
samples
,
size_t
num
)
{
return
;
}
virtual
void
fill_gradient
(
float
*
push_value
,
const
float
*
gradient_raw
,
paddle
::
ps
::
ValueAccessor
&
value_accessor
,
SparseInputVariable
&
variable
,
SampleInstance
&
sample
)
{
push_value
[
0
]
+=
1
;
push_value
[
1
]
+=
sample
.
labels
[
0
];
for
(
size_t
i
=
0
;
i
<
variable
.
slot_dim
;
++
i
)
{
push_value
[
i
+
2
]
+=
gradient_raw
[
i
];
}
return
;
}
};
REGIST_CLASS
(
DataInputAccessor
,
AbacusSparseUpdateAccessor
);
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/common/pipeline.h
浏览文件 @
b410c4e1
...
...
@@ -6,12 +6,22 @@ namespace paddle {
namespace
custom_trainer
{
namespace
feed
{
class
DoneGuard
{
public:
DoneGuard
(
std
::
function
<
void
()
>
func
)
:
_func
(
func
)
{}
virtual
~
DoneGuard
()
{
_func
();
}
private:
std
::
function
<
void
()
>
_func
;
};
class
PipelineOptions
{
public:
PipelineOptions
()
=
default
;
uint32_t
buffer_data_num
=
400
;
//缓冲区数据个数,需大于batch_size
uint32_t
batch_size
=
100
;
//从pipe读数据的batch大小
bool
need_hold_input_data
=
false
;
//是否保存input流数据,否则消费后释放
uint32_t
batch_size
=
10
;
// pipe输出的batch大小
uint32_t
thread_num
=
1
;
// converter的并发线程数
float
input_output_rate
=
1
;
// 输入/输出 qps流量比
uint32_t
buffer_batch_count
=
4
;
// pipe预存count组batch数据
bool
need_hold_input_data
=
false
;
// 是否保存input流数据,否则消费后释放
};
/*
...
...
@@ -29,7 +39,8 @@ public:
Pipeline
()
{}
Pipeline
(
Pipeline
&&
)
=
delete
;
Pipeline
(
const
Pipeline
&
)
=
delete
;
typedef
std
::
function
<
int
(
const
TypeIn
*
,
TypeOut
*
,
size_t
num
)
>
PipeDataConverter
;
typedef
std
::
function
<
int
(
TypeIn
*
,
size_t
in_num
,
TypeOut
*
,
size_t
*
out_num
,
size_t
thread_idx
)
>
PipeDataConverter
;
int
initialize
(
const
PipelineOptions
&
options
,
::
paddle
::
framework
::
Channel
<
TypeIn
>
input_channel
,
...
...
@@ -42,18 +53,16 @@ public:
_converter
=
data_converter
;
_input_channel
=
input_channel
;
_output_channel
=
::
paddle
::
framework
::
MakeChannel
<
TypeOut
>
();
auto
batch_size
=
options
.
batch_siz
e
;
auto
buffer_data_num
=
options
.
buffer_data_num
;
_input_
channel
->
SetBlockSize
(
batch_size
);
_output_
channel
->
SetBlockSize
(
batch_size
);
_
input_data_buffer
.
resize
(
buffer_data_num
);
_output_data_buffer
.
resize
(
buffer_data_num
);
if
(
buffer_data_num
/
batch_size
<
3
)
{
buffer_data_num
=
batch_size
*
3
;
_output_channel
->
SetBlockSize
(
options
.
batch_size
);
size_t
input_batch_size
=
options
.
batch_size
*
options
.
input_output_rat
e
;
_input_channel
->
SetBlockSize
(
input_batch_size
)
;
_input_
data_buffer
.
resize
(
input_batch_size
*
options
.
buffer_batch_count
);
_output_
data_buffer
.
resize
(
options
.
batch_size
*
options
.
buffer_batch_count
);
_
output_channel
->
SetCapacity
(
_output_data_buffer
.
size
()
);
if
(
_options
.
need_hold_input_data
)
{
_input_channel_backup
=
::
paddle
::
framework
::
MakeChannel
<
TypeIn
>
();
_input_channel_backup
->
SetBlockSize
(
input_batch_size
)
;
}
buffer_data_num
=
(
buffer_data_num
/
batch_size
)
*
batch_size
;
_output_channel
->
SetCapacity
(
buffer_data_num
);
CHECK
(
_input_channel
!=
nullptr
)
<<
" Input Channel is null"
;
_convert_thread
=
std
::
make_shared
<
std
::
thread
>
([
this
](){
async_convert_data
();
...
...
@@ -63,7 +72,9 @@ public:
template
<
class
PreTypeIn
>
int
connect_to
(
Pipeline
<
PreTypeIn
,
TypeIn
>&
pre_pipeline
,
PipeDataConverter
data_converter
)
{
PipelineOptions
&
options
,
PipeDataConverter
data_converter
)
{
// 保证全局batch一致
options
.
batch_size
=
pre_pipeline
.
options
().
batch_size
/
options
.
input_output_rate
;
return
initialize
(
pre_pipeline
.
options
(),
pre_pipeline
.
output_chnnel
(),
data_converter
);
}
...
...
@@ -87,30 +98,36 @@ public:
inline
::
paddle
::
framework
::
Channel
<
TypeOut
>
output_chnnel
()
{
return
_output_channel
;
}
// 返回对input_channel的消费备份
inline
::
paddle
::
framework
::
Channel
<
TypeIn
>
backup_channel
()
{
return
_input_channel_backup
;
}
private:
void
async_convert_data
()
{
size_t
convete_batch_size
=
_input_data_buffer
.
size
()
/
4
;
if
(
convete_batch_size
<
_options
.
batch_size
*
3
)
{
convete_batch_size
=
3
*
_options
.
batch_size
;
}
convete_batch_size
=
(
convete_batch_size
/
_options
.
batch_size
)
*
_options
.
batch_size
;
size_t
input_batch_size
=
_options
.
batch_size
*
_options
.
input_output_rate
;
while
(
!
_is_read_end
)
{
while
(
_output_channel
->
Size
()
<
_input_data_buffer
.
size
())
{
size_t
read_size
=
_input_channel
->
Read
(
convete
_batch_size
,
&
_input_data_buffer
[
0
]);
Read
(
input
_batch_size
,
&
_input_data_buffer
[
0
]);
if
(
read_size
==
0
)
{
_is_read_end
=
true
;
break
;
}
CHECK
(
_converter
(
&
_input_data_buffer
[
0
],
&
_output_data_buffer
[
0
],
read_size
)
==
0
)
<<
"Data Converter Do Failed"
;
_output_channel
->
WriteMove
(
read_size
,
&
_output_data_buffer
[
0
]);
if
(
_options
.
need_hold_input_data
)
{
size_t
write_size
=
0
;
CHECK
(
_converter
(
&
_input_data_buffer
[
0
],
read_size
,
&
_output_data_buffer
[
0
],
&
write_size
,
0
)
==
0
)
<<
"Data Converter Do Failed"
;
_output_channel
->
WriteMove
(
write_size
,
&
_output_data_buffer
[
0
]);
if
(
_input_channel_backup
)
{
_input_channel_backup
->
WriteMove
(
read_size
,
&
_input_data_buffer
[
0
]);
}
}
sleep
(
1
);
}
_output_channel
->
Close
();
if
(
_input_channel_backup
)
{
_input_channel_backup
->
Close
();
}
}
...
...
paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.cc
0 → 100644
浏览文件 @
b410c4e1
#include <fcntl.h>
#include <fstream>
#include <sstream>
#include "json2pb/json_to_pb.h"
#include <google/protobuf/text_format.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include "paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.h"
#include "paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
PSlib
::
initialize
(
const
std
::
string
&
conf_path
,
RuntimeEnvironment
*
environment
)
{
_environment
=
environment
;
init_gflag
();
int
file_descriptor
=
open
(
conf_path
.
c_str
(),
O_RDONLY
);
if
(
file_descriptor
==
-
1
){
LOG
(
ERROR
)
<<
"FATAL: cant open "
<<
conf_path
;
return
-
1
;
}
google
::
protobuf
::
io
::
FileInputStream
fileInput
(
file_descriptor
);
if
(
!
google
::
protobuf
::
TextFormat
::
Parse
(
&
fileInput
,
&
_ps_param
))
{
LOG
(
ERROR
)
<<
"FATAL: fail to parse "
<<
conf_path
;
return
-
1
;
}
close
(
file_descriptor
);
init_server
();
init_client
();
return
0
;
}
int
PSlib
::
init_server
()
{
if
(
_environment
->
is_role
(
EnvironmentRole
::
PSERVER
))
{
_server_ptr
.
reset
(
paddle
::
ps
::
PSServerFactory
::
create
(
_ps_param
));
_server_ptr
->
configure
(
_ps_param
,
*
(
_environment
->
ps_environment
()),
_environment
->
rank_id
(
EnvironmentRole
::
PSERVER
));
_server_ptr
->
start
();
}
_environment
->
ps_environment
()
->
gather_ps_servers
();
return
0
;
}
int
PSlib
::
init_client
()
{
_client_ptr
.
reset
(
paddle
::
ps
::
PSClientFactory
::
create
(
_ps_param
));
_client_ptr
->
configure
(
_ps_param
,
*
(
_environment
->
ps_environment
()),
_environment
->
rank_id
(
EnvironmentRole
::
ALL
));
return
0
;
}
paddle
::
ps
::
PSServer
*
PSlib
::
ps_server
()
{
return
_server_ptr
.
get
();
}
paddle
::
ps
::
PSClient
*
PSlib
::
ps_client
()
{
return
_client_ptr
.
get
();
}
paddle
::
PSParameter
*
PSlib
::
get_param
()
{
return
&
_ps_param
;
}
void
PSlib
::
init_gflag
()
{
int
cnt
=
4
;
std
::
shared_ptr
<
char
*>
params
(
new
char
*
[
cnt
]);
char
**
params_ptr
=
params
.
get
();
char
p0
[]
=
"exe default"
;
char
p1
[]
=
"-max_body_size=314217728"
;
char
p2
[]
=
"-bthread_concurrency=40"
;
char
p3
[]
=
"-socket_max_unwritten_bytes=2048000000"
;
params_ptr
[
0
]
=
p0
;
params_ptr
[
1
]
=
p1
;
params_ptr
[
2
]
=
p2
;
params_ptr
[
3
]
=
p3
;
::
google
::
ParseCommandLineFlags
(
&
cnt
,
&
params_ptr
,
true
);
}
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.h
0 → 100644
浏览文件 @
b410c4e1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// Hide BLOG
#define BUTIL_LOGGING_H_
#define COMPACT_GOOGLE_LOG_NOTICE COMPACT_GOOGLE_LOG_INFO
#include "communicate/ps_server.h"
#include "communicate/ps_client.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
class
RuntimeEnvironment
;
class
PSlib
{
public:
PSlib
()
{}
virtual
~
PSlib
()
{}
int
initialize
(
const
std
::
string
&
conf_path
,
RuntimeEnvironment
*
environment
);
virtual
paddle
::
ps
::
PSServer
*
ps_server
();
virtual
paddle
::
ps
::
PSClient
*
ps_client
();
virtual
paddle
::
PSParameter
*
get_param
();
private:
void
init_gflag
();
virtual
int
init_server
();
virtual
int
init_client
();
paddle
::
PSParameter
_ps_param
;
RuntimeEnvironment
*
_environment
;
std
::
shared_ptr
<
paddle
::
ps
::
PSServer
>
_server_ptr
;
std
::
shared_ptr
<
paddle
::
ps
::
PSClient
>
_client_ptr
;
};
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/common/registerer.cc
浏览文件 @
b410c4e1
...
...
@@ -3,12 +3,12 @@ namespace paddle {
namespace
custom_trainer
{
namespace
feed
{
BaseClassMap
&
global_factory_map
()
{
BaseClassMap
&
global_
reg_
factory_map
()
{
static
BaseClassMap
*
base_class
=
new
BaseClassMap
();
return
*
base_class
;
}
BaseClassMap
&
global_factory_map_cpp
()
{
return
global_factory_map
();
BaseClassMap
&
global_
reg_
factory_map_cpp
()
{
return
global_
reg_
factory_map
();
}
}
// feed
...
...
paddle/fluid/train/custom_trainer/feed/common/registerer.h
浏览文件 @
b410c4e1
...
...
@@ -63,23 +63,23 @@ typedef std::map<std::string, FactoryMap> BaseClassMap;
#ifdef __cplusplus
extern
"C"
{
#endif
BaseClassMap
&
global_factory_map
();
BaseClassMap
&
global_
reg_
factory_map
();
#ifdef __cplusplus
}
#endif
BaseClassMap
&
global_factory_map_cpp
();
BaseClassMap
&
global_
reg_
factory_map_cpp
();
#define REGIST
ER
_REGISTERER(base_class) \
#define REGIST_REGISTERER(base_class) \
class base_class ## Registerer { \
public: \
static base_class *CreateInstanceByName(const ::std::string &name) { \
if (global_factory_map_cpp().find(#base_class) \
== global_factory_map_cpp().end()) { \
if (global_
reg_
factory_map_cpp().find(#base_class) \
== global_
reg_
factory_map_cpp().end()) { \
LOG(ERROR) << "Can't Find BaseClass For CreateClass with:" << #base_class; \
return NULL; \
} \
FactoryMap &map = global_factory_map_cpp()[#base_class]; \
FactoryMap &map = global_
reg_
factory_map_cpp()[#base_class]; \
FactoryMap::iterator iter = map.find(name); \
if (iter == map.end()) { \
LOG(ERROR) << "Can't Find Class For Create with:" << name; \
...
...
@@ -90,7 +90,7 @@ BaseClassMap& global_factory_map_cpp();
} \
};
#define REGIST
ER
_CLASS(clazz, name) \
#define REGIST_CLASS(clazz, name) \
class ObjectFactory##name : public ObjectFactory { \
public: \
Any NewInstance() { \
...
...
@@ -98,14 +98,14 @@ BaseClassMap& global_factory_map_cpp();
} \
}; \
void register_factory_##name() { \
FactoryMap &map = global_factory_map_cpp()[#clazz]; \
FactoryMap &map = global_
reg_
factory_map_cpp()[#clazz]; \
if (map.find(#name) == map.end()) { \
map[#name] = new ObjectFactory##name(); \
} \
} \
void register_factory_##name() __attribute__((constructor));
#define CREATE_
CLASS
(base_class, name) \
#define CREATE_
INSTANCE
(base_class, name) \
base_class##Registerer::CreateInstanceByName(name)
}
//namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.cc
浏览文件 @
b410c4e1
...
...
@@ -93,9 +93,14 @@ public:
return
-
1
;
}
_roles_node_info
.
resize
(
static_cast
<
int
>
(
EnvironmentRole
::
ALL
)
+
1
);
set
_role
(
EnvironmentRole
::
ALL
);
add
_role
(
EnvironmentRole
::
ALL
);
return
0
;
}
virtual
paddle
::
ps
::
PSEnvironment
*
ps_environment
()
{
static
paddle
::
ps
::
MpiPSEnvironment
ps_environment
;
return
&
ps_environment
;
}
virtual
uint32_t
rank_id
(
EnvironmentRole
role
)
{
return
mpi_node_info
(
role
).
rank_id
;
...
...
@@ -103,7 +108,7 @@ public:
virtual
uint32_t
node_num
(
EnvironmentRole
role
)
{
return
mpi_node_info
(
role
).
node_num
;
}
virtual
int
set
_role
(
EnvironmentRole
role
)
{
virtual
int
add
_role
(
EnvironmentRole
role
)
{
auto
&
node_info
=
mpi_node_info
(
role
);
if
(
node_info
.
rank_id
<
0
)
{
if
(
role
==
EnvironmentRole
::
ALL
)
{
...
...
@@ -115,8 +120,12 @@ public:
MPI_Comm_rank
(
node_info
.
mpi_comm
,
&
(
node_info
.
rank_id
));
MPI_Comm_size
(
node_info
.
mpi_comm
,
&
(
node_info
.
node_num
));
}
_role_set
.
insert
(
role
);
return
0
;
}
virtual
bool
is_role
(
EnvironmentRole
role
)
{
return
_role_set
.
count
(
role
)
>
0
;
}
virtual
void
barrier
(
EnvironmentRole
role
)
{
MPI_Barrier
(
mpi_node_info
(
role
).
mpi_comm
);
...
...
@@ -154,9 +163,10 @@ protected:
}
private:
std
::
set
<
EnvironmentRole
>
_role_set
;
std
::
vector
<
MpiNodeInfo
>
_roles_node_info
;
};
REGIST
ER
_CLASS
(
RuntimeEnvironment
,
MPIRuntimeEnvironment
);
REGIST_CLASS
(
RuntimeEnvironment
,
MPIRuntimeEnvironment
);
//用于本地模式单机训练
class
LocalRuntimeEnvironment
:
public
RuntimeEnvironment
{
...
...
@@ -169,15 +179,22 @@ public:
virtual
int
wireup
()
{
return
0
;
}
virtual
paddle
::
ps
::
PSEnvironment
*
ps_environment
()
{
static
paddle
::
ps
::
LocalPSEnvironment
ps_environment
;
return
&
ps_environment
;
}
virtual
uint32_t
rank_id
(
EnvironmentRole
role
)
{
return
0
;
}
virtual
uint32_t
node_num
(
EnvironmentRole
role
)
{
return
1
;
}
virtual
int
set
_role
(
EnvironmentRole
role
)
{
virtual
int
add
_role
(
EnvironmentRole
role
)
{
return
0
;
}
virtual
bool
is_role
(
EnvironmentRole
role
)
{
return
true
;
}
virtual
void
barrier
(
EnvironmentRole
role
)
{
return
;
}
...
...
@@ -196,7 +213,7 @@ protected:
VLOG
(
static_cast
<
int
>
(
level
))
<<
log_str
;
}
};
REGIST
ER
_CLASS
(
RuntimeEnvironment
,
LocalRuntimeEnvironment
);
REGIST_CLASS
(
RuntimeEnvironment
,
LocalRuntimeEnvironment
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h
浏览文件 @
b410c4e1
...
...
@@ -6,6 +6,7 @@
*/
#pragma once
#include <yaml-cpp/yaml.h>
#include "communicate/ps_env.h"
#include "paddle/fluid/framework/archive.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/train/custom_trainer/feed/common/registerer.h"
...
...
@@ -38,45 +39,49 @@ class RuntimeEnvironment {
public:
RuntimeEnvironment
();
virtual
~
RuntimeEnvironment
();
//配置初始化
//
配置初始化
virtual
int
initialize
(
YAML
::
Node
config
)
=
0
;
//设置role
virtual
int
set_role
(
EnvironmentRole
role
)
=
0
;
//环境初始化,会在所有依赖模块initialize后调用
// 设置role
virtual
int
add_role
(
EnvironmentRole
role
)
=
0
;
// 判断role
virtual
bool
is_role
(
EnvironmentRole
role
)
=
0
;
// 环境初始化,会在所有依赖模块initialize后调用
virtual
int
wireup
()
=
0
;
//多线程可调用接口 Start
//当前环境rank_idx
//
多线程可调用接口 Start
//
当前环境rank_idx
virtual
uint32_t
rank_id
(
EnvironmentRole
role
)
=
0
;
//运行环境节点数
//
运行环境节点数
virtual
uint32_t
node_num
(
EnvironmentRole
role
)
=
0
;
//环境内主节点
//
环境内主节点
virtual
bool
is_master_node
(
EnvironmentRole
role
);
//For PS
virtual
paddle
::
ps
::
PSEnvironment
*
ps_environment
()
=
0
;
//环境定制化log
//
环境定制化log
template
<
class
...
ARGS
>
void
log
(
EnvironmentRole
role
,
EnvironmentLogType
type
,
EnvironmentLogLevel
level
,
const
char
*
fmt
,
ARGS
&&
...
args
)
{
print_log
(
role
,
type
,
level
,
paddle
::
string
::
format_string
(
fmt
,
args
...));
}
//多线程可调用接口 End
//
多线程可调用接口 End
//接口只允许在主线程调用 Start
//barrier 指定role的节点
//
接口只允许在主线程调用 Start
//
barrier 指定role的节点
virtual
void
barrier
(
EnvironmentRole
role
)
=
0
;
//bcast 广播
//
bcast 广播
virtual
void
bcast
(
paddle
::
framework
::
BinaryArchive
&
ar
,
int
root_id
,
EnvironmentRole
role
)
=
0
;
//all_reduce sum element 规约元素
//
all_reduce sum element 规约元素
virtual
double
all_reduce_ele
(
double
x
)
=
0
;
//all_reduce sum array 规约数组
//
all_reduce sum array 规约数组
virtual
void
all_reduce_arr
(
double
*
x
,
int
n
)
=
0
;
//接口只允许在主线程调用 End
//
接口只允许在主线程调用 End
protected:
virtual
void
print_log
(
EnvironmentRole
role
,
EnvironmentLogType
type
,
EnvironmentLogLevel
level
,
const
std
::
string
&
log_str
)
=
0
;
};
REGIST
ER
_REGISTERER
(
RuntimeEnvironment
);
REGIST_REGISTERER
(
RuntimeEnvironment
);
std
::
string
format_timestamp
(
time_t
time
,
const
char
*
format
);
inline
std
::
string
format_timestamp
(
time_t
time
,
const
std
::
string
&
format
)
{
...
...
paddle/fluid/train/custom_trainer/feed/common/scope_helper.h
0 → 100644
浏览文件 @
b410c4e1
#pragma once
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
class
ScopeHelper
{
public:
//直接取var
template
<
class
T
>
static
const
T
&
var
(
paddle
::
framework
::
Scope
*
scope
,
const
std
::
string
&
name
)
{
return
scope
->
Var
(
name
)
->
Get
<
T
>
();
}
template
<
class
T
>
static
T
*
mutable_var
(
paddle
::
framework
::
Scope
*
scope
,
const
std
::
string
&
name
)
{
return
scope
->
Var
(
name
)
->
GetMutable
<
T
>
();
}
template
<
class
T
>
static
T
*
resize_variable
(
paddle
::
framework
::
Scope
*
scope
,
const
std
::
string
&
name
,
const
paddle
::
framework
::
DDim
&
dim
)
{
auto
*
tensor
=
scope
->
Var
(
name
)
->
GetMutable
<
T
>
();
tensor
->
Resize
(
dim
);
return
tensor
;
}
static
paddle
::
framework
::
LoDTensor
*
resize_lod_tensor
(
paddle
::
framework
::
Scope
*
scope
,
const
std
::
string
&
name
,
const
paddle
::
framework
::
DDim
&
dim
)
{
return
resize_variable
<
paddle
::
framework
::
LoDTensor
>
(
scope
,
name
,
dim
);
}
template
<
class
T
>
static
void
fill_value
(
paddle
::
framework
::
Scope
*
scope
,
paddle
::
platform
::
Place
place
,
const
std
::
string
&
name
,
T
&
value
)
{
auto
*
tensor
=
resize_variable
<
paddle
::
framework
::
Tensor
>
(
scope
,
name
,
{
1
});
T
*
data
=
tensor
->
mutable_data
<
T
>
(
place
);
*
data
=
value
;
return
;
}
template
<
class
T
>
static
T
*
get_value
(
paddle
::
framework
::
Scope
*
scope
,
paddle
::
platform
::
Place
place
,
const
std
::
string
&
name
)
{
auto
*
tensor
=
scope
->
Var
(
name
)
->
GetMutable
<
paddle
::
framework
::
Tensor
>
();
return
tensor
->
mutable_data
<
T
>
(
place
);
}
};
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/common/yaml_helper.h
0 → 100644
浏览文件 @
b410c4e1
#pragma once
#include <glog/logging.h>
#include <yaml-cpp/yaml.h>
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
class
YamlHelper
{
public:
// 直接使用node["key"]判断,会导致node数据被加入key键
static
bool
has_key
(
const
YAML
::
Node
&
node
,
const
std
::
string
&
key
)
{
CHECK
(
node
.
Type
()
==
YAML
::
NodeType
::
Map
);
for
(
const
auto
&
itr
:
node
)
{
if
(
key
==
itr
.
first
.
as
<
std
::
string
>
())
{
return
true
;
}
}
return
false
;
}
template
<
class
T
>
static
T
get_with_default
(
YAML
::
Node
node
,
const
std
::
string
&
key
,
const
T
&
default_v
)
{
if
(
has_key
(
node
,
key
))
{
return
node
[
key
].
as
<
T
>
();
}
return
default_v
;
}
};
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/conf/gflags.conf
浏览文件 @
b410c4e1
-
log_dir
=
log
-
v
=
10
-
v
=
4
-
logbufsecs
=
0
-
pslib_push_dense_merge_limit
=
1
-
pslib_push_sparse_merge_limit
=
1
paddle/fluid/train/custom_trainer/feed/conf/ps_table_config
0 → 100644
浏览文件 @
b410c4e1
server_param {
downpour_server_param {
downpour_table_param {
table_id: 0
table_class: "DownpourSparseTable"
shard_num: 1950
accessor {
accessor_class: "DownpourCtrAccessor"
sparse_sgd_param {
learning_rate: 0.05
initial_g2sum: 3.0
initial_range: 0.0001
weight_bounds: -10.0
weight_bounds: 10.0
}
fea_dim: 11
embedx_dim: 8
embedx_threshold: 10
downpour_accessor_param {
nonclk_coeff: 0.1
click_coeff: 1
base_threshold: 1.5
delta_threshold: 0.25
delta_keep_days: 16
delete_after_unseen_days: 30
show_click_decay_rate: 0.98
delete_threshold: 0.8
}
table_accessor_save_param {
param: 1
converter: "(tool/xbox_compressor_mf.py | tool/xbox_pb_converter)"
deconverter: "(tool/xbox_pb_deconverter | tool/xbox_decompressor_mf.awk)"
}
table_accessor_save_param {
param: 2
converter: "(tool/xbox_compressor_mf.py | tool/xbox_pb_converter)"
deconverter: "(tool/xbox_pb_deconverter | tool/xbox_decompressor_mf.awk)"
}
}
type: PS_SPARSE_TABLE
compress_in_save: true
}
downpour_table_param {
table_id: 1
table_class: "DownpourDenseTable"
accessor {
accessor_class: "DownpourDenseValueAccessor"
dense_sgd_param {
name: "adam"
adam {
learning_rate: 5e-06
avg_decay_rate: 0.999993
ada_decay_rate: 0.9999
ada_epsilon: 1e-08
mom_decay_rate: 0.99
}
naive {
learning_rate: 0.0002
}
}
fea_dim: 2571127
}
type: PS_DENSE_TABLE
compress_in_save: true
}
downpour_table_param {
table_id: 2
table_class: "DownpourDenseDoubleTable"
accessor {
accessor_class: "DownpourDenseValueDoubleAccessor"
dense_sgd_param {
name: "summarydouble"
summary {
summary_decay_rate: 0.999999
}
}
fea_dim: 13464
}
type: PS_DENSE_TABLE
compress_in_save: true
}
downpour_table_param {
table_id: 3
table_class: "DownpourDenseTable"
accessor {
accessor_class: "DownpourDenseValueAccessor"
dense_sgd_param {
name: "adam"
adam {
learning_rate: 5e-06
avg_decay_rate: 0.999993
ada_decay_rate: 0.9999
ada_epsilon: 1e-08
mom_decay_rate: 0.99
}
naive {
learning_rate: 0.0002
}
}
fea_dim: 2072615
}
type: PS_DENSE_TABLE
compress_in_save: true
}
service_param {
server_class: "DownpourBrpcPsServer"
client_class: "DownpourBrpcPsClient"
service_class: "DownpourPsService"
start_server_port: 0
server_thread_num: 12
}
}
}
fs_client_param {
uri: "afs://xingtian.afs.baidu.com:9902"
user: "mlarch"
passwd: "Fv1M87"
hadoop_bin: "$HADOOP_HOME/bin/hadoop"
}
paddle/fluid/train/custom_trainer/feed/conf/trainer.yaml
浏览文件 @
b410c4e1
train_thread_num
:
10
train_thread_num
:
10
environment
:
environment_class
:
LocalRuntimeEnvironment
environment
:
environment_class
:
LocalRuntimeEnvironment
ps
:
./conf/ps_table_config
io
:
file_systems
:
afs
:
class
:
HadoopFileSystem
buffer_size
:
1024000
ugis
:
io
:
file_systems
:
afs
:
class
:
HadoopFileSystem
buffer_size
:
1024000
ugis
:
'
default'
:
'
feed_video,D3a0z8'
'
xingtian.afs.baidu.com:9902'
:
'
feed_video,D3a0z8'
default
:
class
:
LocalFileSystem
buffer_size
:
1024000
dataset
:
data_list
:
train_sample
:
prefetch_num
:
2
default
:
class
:
LocalFileSystem
buffer_size
:
1024000
dataset
:
data_list
:
train_sample
:
prefetch_num
:
2
root_path
:
[
./sample
]
data_spit_interval
:
300
data_path_formater
:
'
%Y%m%d/%H%M'
data_reader
:
LineDataReader
done_file
:
to.hadoop.done
filename_prefix
:
part
pipeline_cmd
:
cat
parser
:
class
:
Line
DataParser
data_spit_interval
:
300
data_path_formater
:
'
%Y%m%d/%H%M'
data_reader
:
LineDataReader
done_file
:
to.hadoop.done
filename_prefix
:
part
pipeline_cmd
:
'
./tool/ins_weight.py
|
awk
-f
./tool/format_newcate_hotnews.awk'
parser
:
class
:
AbacusText
DataParser
epoch
:
epoch_class
:
HourlyEpochAccessor
model_root_path
:
./model/
epoch_class
:
TimelyEpochAccessor
model_root_path
:
./model/
train_time_interval
:
600
time_zone_seconds
:
28800
executor
:
-
name
:
join
class
:
SimpleExecutor
train_data_name
:
train_sample
train_batch_size
:
32
input_parse_thread_num
:
10
push_gradient_thread_num
:
16
train_thread_num
:
16
need_dump_all_model
:
true
-
name
:
update
class
:
SimpleExecutor
train_data_name
:
train_sample
train_batch_size
:
32
input_parse_thread_num
:
10
push_gradient_thread_num
:
16
train_thread_num
:
16
need_dump_all_model
:
false
paddle/fluid/train/custom_trainer/feed/dataset/abacus_data_reader.cc
0 → 100755
浏览文件 @
b410c4e1
#include "paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h"
#include <cstdio>
#include <atomic>
#include <glog/logging.h>
#include <omp.h>
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
/*解析Abacus格式明文Feasign
*/
class
AbacusTextDataParser
:
public
LineDataParser
{
public:
AbacusTextDataParser
()
{}
virtual
~
AbacusTextDataParser
()
{}
virtual
int
parse_to_sample
(
const
DataItem
&
data
,
SampleInstance
&
instance
)
const
{
instance
.
id
=
data
.
id
;
instance
.
labels
.
resize
(
1
);
size_t
len
=
data
.
data
.
size
();
const
char
*
str
=
data
.
data
.
c_str
();
const
char
*
line_end
=
str
+
len
;
char
*
cursor
=
NULL
;
int
show
=
(
int
)
strtol
(
str
,
&
cursor
,
10
);
str
=
cursor
;
instance
.
labels
[
0
]
=
(
float
)
strtol
(
str
,
&
cursor
,
10
);
// click
str
=
cursor
;
while
(
*
(
str
+=
paddle
::
string
::
count_nonspaces
(
str
))
!=
0
)
{
if
(
*
str
==
'*'
)
{
str
++
;
size_t
len
=
paddle
::
string
::
count_nonspaces
(
str
);
str
+=
len
;
}
else
if
(
*
str
==
'$'
)
{
str
++
;
CHECK
(((
int
)
strtol
(
str
,
&
cursor
,
10
),
cursor
!=
str
))
<<
" sample type parse err:"
<<
str
;
str
=
cursor
;
}
else
if
(
*
str
==
'#'
)
{
str
++
;
break
;
}
else
if
(
*
str
==
'@'
)
{
str
++
;
size_t
len
=
paddle
::
string
::
count_nonspaces
(
str
);
std
::
string
all_str
(
str
,
str
+
len
);
str
+=
len
;
}
else
{
FeatureItem
feature_item
;
feature_item
.
sign
()
=
(
uint64_t
)
strtoull
(
str
,
&
cursor
,
10
);
if
(
cursor
==
str
)
{
//FIXME abacus没有这种情况
str
++
;
continue
;
}
str
=
cursor
;
CHECK
(
*
str
++
==
':'
);
CHECK
(
!
isspace
(
*
str
));
CHECK
((
feature_item
.
slot
()
=
(
int
)
strtol
(
str
,
&
cursor
,
10
),
cursor
!=
str
))
<<
" format error: "
<<
str
;
str
=
cursor
;
instance
.
features
.
emplace_back
(
feature_item
);
}
}
VLOG
(
5
)
<<
"parse sample success, id:"
<<
instance
.
id
<<
", fea_sum:"
<<
instance
.
features
.
size
()
<<
", label:"
<<
instance
.
labels
[
0
];
return
0
;
}
};
REGIST_CLASS
(
DataParser
,
AbacusTextDataParser
);
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.cc
浏览文件 @
b410c4e1
...
...
@@ -12,51 +12,21 @@ namespace paddle {
namespace
custom_trainer
{
namespace
feed
{
class
LineDataParser
:
public
DataParser
{
public:
LineDataParser
()
{}
virtual
~
LineDataParser
()
{}
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context
)
{
return
0
;
}
virtual
int
parse
(
const
char
*
str
,
size_t
len
,
DataItem
&
data
)
const
{
size_t
pos
=
0
;
while
(
pos
<
len
&&
str
[
pos
]
!=
' '
)
{
++
pos
;
}
if
(
pos
>=
len
)
{
VLOG
(
2
)
<<
"fail to parse line: "
<<
std
::
string
(
str
,
len
)
<<
", strlen: "
<<
len
;
return
-
1
;
}
VLOG
(
5
)
<<
"getline: "
<<
str
<<
" , pos: "
<<
pos
<<
", len: "
<<
len
;
data
.
id
.
assign
(
str
,
pos
);
data
.
data
.
assign
(
str
+
pos
+
1
,
len
-
pos
-
1
);
return
0
;
}
virtual
int
parse
(
const
char
*
str
,
DataItem
&
data
)
const
{
size_t
pos
=
0
;
while
(
str
[
pos
]
!=
'\0'
&&
str
[
pos
]
!=
' '
)
{
++
pos
;
}
if
(
str
[
pos
]
==
'\0'
)
{
VLOG
(
2
)
<<
"fail to parse line: "
<<
str
<<
", get '
\\
0' at pos: "
<<
pos
;
return
-
1
;
}
VLOG
(
5
)
<<
"getline: "
<<
str
<<
" , pos: "
<<
pos
;
data
.
id
.
assign
(
str
,
pos
);
data
.
data
.
assign
(
str
+
pos
+
1
);
return
0
;
int
LineDataParser
::
parse
(
const
char
*
str
,
size_t
len
,
DataItem
&
data
)
const
{
size_t
pos
=
0
;
while
(
pos
<
len
&&
str
[
pos
]
!=
' '
)
{
++
pos
;
}
virtual
int
parse_to_sample
(
const
DataItem
&
data
,
SampleInstance
&
instance
)
const
{
return
0
;
if
(
pos
>=
len
)
{
VLOG
(
2
)
<<
"fail to parse line: "
<<
std
::
string
(
str
,
len
)
<<
", strlen: "
<<
len
;
return
-
1
;
}
};
REGISTER_CLASS
(
DataParser
,
LineDataParser
);
VLOG
(
5
)
<<
"getline: "
<<
str
<<
" , pos: "
<<
pos
<<
", len: "
<<
len
;
data
.
id
.
assign
(
str
,
pos
);
data
.
data
.
assign
(
str
+
pos
+
1
,
len
-
pos
-
1
);
return
0
;
}
REGIST_CLASS
(
DataParser
,
LineDataParser
);
/********************************
* feasign压缩格式
...
...
@@ -335,10 +305,6 @@ public:
return
0
;
}
virtual
int
parse
(
const
char
*
str
,
DataItem
&
data
)
const
{
}
virtual
int
parse_to_sample
(
const
DataItem
&
data
,
SampleInstance
&
instance
)
const
{
instance
.
id
=
data
.
id
;
if
(
data
.
data
.
empty
())
{
...
...
@@ -428,10 +394,10 @@ private:
std
::
shared_ptr
<
SignCacheDict
>
_index
;
};
REGIST
ER
_CLASS
(
DataParser
,
ArchiveDataParse
);
REGIST_CLASS
(
DataParser
,
ArchiveDataParse
);
int
DataReader
::
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context
)
{
_parser
.
reset
(
CREATE_
CLASS
(
DataParser
,
config
[
"parser"
][
"class"
].
as
<
std
::
string
>
()));
_parser
.
reset
(
CREATE_
INSTANCE
(
DataParser
,
config
[
"parser"
][
"class"
].
as
<
std
::
string
>
()));
if
(
_parser
==
nullptr
)
{
VLOG
(
2
)
<<
"fail to get parser: "
<<
config
[
"parser"
][
"class"
].
as
<
std
::
string
>
();
return
-
1
;
...
...
@@ -457,7 +423,7 @@ public:
if
(
config
[
"file_system"
]
&&
config
[
"file_system"
][
"class"
])
{
_file_system
.
reset
(
CREATE_
CLASS
(
FileSystem
,
config
[
"file_system"
][
"class"
].
as
<
std
::
string
>
()));
CREATE_
INSTANCE
(
FileSystem
,
config
[
"file_system"
][
"class"
].
as
<
std
::
string
>
()));
if
(
_file_system
==
nullptr
||
_file_system
->
initialize
(
config
[
"file_system"
],
context
)
!=
0
)
{
VLOG
(
2
)
<<
"fail to create class: "
...
...
@@ -467,7 +433,7 @@ public:
}
else
if
(
context
->
file_system
!=
nullptr
)
{
_file_system
=
context
->
file_system
;
}
else
{
_file_system
.
reset
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
_file_system
.
reset
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
if
(
_file_system
==
nullptr
||
_file_system
->
initialize
(
YAML
::
Load
(
""
),
context
)
!=
0
)
{
VLOG
(
2
)
<<
"fail to init file system"
;
return
-
1
;
...
...
@@ -565,11 +531,6 @@ public:
is_failed
=
true
;
continue
;
}
if
(
_file_system
->
err_no
()
!=
0
)
{
_file_system
->
reset_err_no
();
is_failed
=
true
;
continue
;
}
}
// omp end
...
...
@@ -593,7 +554,7 @@ private:
std
::
string
_filename_prefix
;
std
::
shared_ptr
<
FileSystem
>
_file_system
;
};
REGIST
ER
_CLASS
(
DataReader
,
LineDataReader
);
REGIST_CLASS
(
DataReader
,
LineDataReader
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
浏览文件 @
b410c4e1
...
...
@@ -20,6 +20,8 @@ namespace feed {
class
TrainerContext
;
struct
FeatureItem
{
std
::
vector
<
float
>
weights
;
std
::
vector
<
float
>
gradients
;
public:
FeatureItem
()
{
}
...
...
@@ -76,13 +78,12 @@ public:
virtual
~
DataParser
()
{}
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context
)
=
0
;
virtual
int
parse
(
const
std
::
string
&
str
,
DataItem
&
data
)
const
{
return
parse
(
str
.
c_str
(),
data
);
return
parse
(
str
.
c_str
(),
str
.
size
(),
data
);
}
virtual
int
parse
(
const
char
*
str
,
size_t
len
,
DataItem
&
data
)
const
=
0
;
virtual
int
parse
(
const
char
*
str
,
DataItem
&
data
)
const
=
0
;
virtual
int
parse_to_sample
(
const
DataItem
&
data
,
SampleInstance
&
instance
)
const
=
0
;
};
REGIST
ER
_REGISTERER
(
DataParser
);
REGIST_REGISTERER
(
DataParser
);
class
DataReader
{
public:
...
...
@@ -104,7 +105,24 @@ protected:
std
::
shared_ptr
<
DataParser
>
_parser
;
//数据格式转换
std
::
string
_pipeline_cmd
;
//将文件流,重定向到pipeline_cmd,再读入
};
REGISTER_REGISTERER
(
DataReader
);
REGIST_REGISTERER
(
DataReader
);
class
LineDataParser
:
public
DataParser
{
public:
LineDataParser
()
{}
virtual
~
LineDataParser
()
{}
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context
)
{
return
0
;
}
virtual
int
parse
(
const
char
*
str
,
size_t
len
,
DataItem
&
data
)
const
;
virtual
int
parse_to_sample
(
const
DataItem
&
data
,
SampleInstance
&
instance
)
const
{
return
0
;
}
};
}
//namespace feed
}
//namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/dataset/dataset.cc
浏览文件 @
b410c4e1
...
...
@@ -48,30 +48,30 @@ inline DatasetStatus Dataset::epoch_data_status(
return
_data_containers
[
data_name
]
->
epoch_data_status
(
epoch_id
);
}
inline
std
::
vector
<
std
::
string
>
Dataset
::
epoch_data_path
(
const
std
::
string
&
data_name
,
uint64_t
epoch_id
)
{
return
_data_containers
[
data_name
]
->
epoch_data_path
(
epoch_id
);
}
inline
std
::
vector
<
std
::
string
>
Dataset
::
epoch_data_path
(
uint64_t
epoch_id
)
{
std
::
vector
<
std
::
string
>
results
;
for
(
auto
it
=
_data_containers
.
begin
();
it
!=
_data_containers
.
end
();
++
it
)
{
auto
items
=
std
::
move
(
it
->
second
->
epoch_data_path
(
epoch_id
));
for
(
auto
&
item
:
items
)
{
results
.
emplace_back
(
item
);
}
}
return
results
;
}
inline
::
paddle
::
framework
::
Channel
<
DataItem
>
Dataset
::
fetch_data
(
const
std
::
string
&
data_name
,
uint64_t
epoch_id
)
{
return
_data_containers
[
data_name
]
->
fetch
(
epoch_id
);
}
SampleInstancePipe
Dataset
::
fetch_sample
(
const
std
::
string
&
data_name
,
uint32_t
batch_size
,
uint64_t
epoch_id
)
{
inline
const
DataParser
*
Dataset
::
data_parser
(
const
std
::
string
&
data_name
)
{
auto
*
data_container
=
_data_containers
[
data_name
].
get
();
auto
data_channel
=
data_container
->
fetch
(
epoch_id
);
const
auto
*
data_parser
=
data_container
->
data_parser
();
PipelineOptions
options
;
options
.
batch_size
=
batch_size
;
options
.
need_hold_input_data
=
true
;
options
.
buffer_data_num
=
batch_size
*
10
;
SampleInstancePipe
pipe
=
make_sample_instance_channel
();
pipe
->
initialize
(
options
,
data_channel
,
[
data_parser
]
(
const
DataItem
*
data
,
SampleInstance
*
sample
,
size_t
num
)
->
int
{
int
ret
=
0
;
for
(
int
i
=
0
;
i
<
num
;
++
i
,
++
data
,
++
sample
)
{
ret
|=
data_parser
->
parse_to_sample
(
*
data
,
*
sample
);
}
return
ret
;
});
return
pipe
;
return
data_container
->
data_parser
();
}
...
...
paddle/fluid/train/custom_trainer/feed/dataset/dataset.h
浏览文件 @
b410c4e1
...
...
@@ -29,14 +29,17 @@ public:
virtual
DatasetStatus
epoch_data_status
(
uint64_t
epoch_id
);
virtual
DatasetStatus
epoch_data_status
(
const
std
::
string
&
data_name
,
uint64_t
epoch_id
);
//获取数据路径
virtual
std
::
vector
<
std
::
string
>
epoch_data_path
(
uint64_t
epoch_id
);
virtual
std
::
vector
<
std
::
string
>
epoch_data_path
(
const
std
::
string
&
data_name
,
uint64_t
epoch_id
);
//返回各DataContainer内的原始数据(maybe 压缩格式)
virtual
::
paddle
::
framework
::
Channel
<
DataItem
>
fetch_data
(
const
std
::
string
&
data_name
,
uint64_t
epoch_id
);
//以管道形式返回标准样本流,管道内会对数据做异步转换
virtual
SampleInstancePipe
fetch_sample
(
const
std
::
string
&
data_name
,
uint32_t
batch_size
,
uint64_t
epoch_id
);
//获取DataItem解析器
virtual
const
DataParser
*
data_parser
(
const
std
::
string
&
data_name
);
private:
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
DatasetContainer
>>
_data_containers
;
};
...
...
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.cc
浏览文件 @
b410c4e1
...
...
@@ -31,7 +31,7 @@ int DatasetContainer::initialize(
_data_split_interval
=
config
[
"data_spit_interval"
].
as
<
int
>
();
_data_path_formater
=
config
[
"data_path_formater"
].
as
<
std
::
string
>
();
std
::
string
data_reader_class
=
config
[
"data_reader"
].
as
<
std
::
string
>
();
DataReader
*
data_reader
=
CREATE_
CLASS
(
DataReader
,
data_reader_class
);
DataReader
*
data_reader
=
CREATE_
INSTANCE
(
DataReader
,
data_reader_class
);
_data_reader
.
reset
(
data_reader
);
return
_data_reader
->
initialize
(
config
,
context
);
}
...
...
@@ -41,6 +41,21 @@ std::shared_ptr<DatasetInfo> DatasetContainer::dataset(uint64_t timestamp) {
auto
data_idx
=
timestamp
/
epoch_accessor
->
epoch_time_interval
();
return
_dataset_list
[
data_idx
%
_prefetch_num
];
}
std
::
vector
<
std
::
string
>
DatasetContainer
::
epoch_data_path
(
uint64_t
epoch_id
)
{
std
::
vector
<
std
::
string
>
results
;
auto
*
epoch_accessor
=
_trainer_context
->
epoch_accessor
.
get
();
time_t
timestamp
=
epoch_accessor
->
epoch_timestamp
(
epoch_id
);
size_t
data_num
=
data_num_for_train
(
timestamp
,
epoch_accessor
->
epoch_time_interval
(),
_data_split_interval
);
uint64_t
data_timestamp
=
timestamp
%
_data_split_interval
==
0
?
timestamp
:
(
timestamp
/
_data_split_interval
+
1
)
*
_data_split_interval
;
for
(
int
i
=
0
;
i
<
_data_root_paths
.
size
();
++
i
)
{
for
(
int
j
=
0
;
j
<
data_num
;
++
j
)
{
std
::
string
path_suffix
=
format_timestamp
(
data_timestamp
+
j
*
_data_split_interval
,
_data_path_formater
);
std
::
string
data_dir
=
_trainer_context
->
file_system
->
path_join
(
_data_root_paths
[
i
],
path_suffix
);
results
.
emplace_back
(
data_dir
);
}
}
return
results
;
}
void
DatasetContainer
::
pre_detect_data
(
uint64_t
epoch_id
)
{
int
status
=
0
;
...
...
@@ -55,7 +70,7 @@ void DatasetContainer::pre_detect_data(uint64_t epoch_id) {
async_download_data
(
timestamp
);
}));
}
for
(
int
detect_idx
=
0
;
detect_idx
<
_prefetch_num
;
++
detect_idx
)
{
for
(
int
detect_idx
=
0
;
detect_idx
<
_prefetch_num
;
++
detect_idx
,
++
epoch_id
)
{
if
(
DatasetStatus
::
Empty
!=
data_status
(
timestamp
))
{
continue
;
}
...
...
@@ -74,6 +89,7 @@ void DatasetContainer::pre_detect_data(uint64_t epoch_id) {
dataset_info
->
timestamp
=
timestamp
;
dataset_info
->
file_path_list
=
std
::
move
(
data_path_list
);
dataset_info
->
status
=
DatasetStatus
::
Detected
;
VLOG
(
2
)
<<
epoch_accessor
->
text
(
epoch_id
)
<<
", data is detected"
;
}
timestamp
+=
epoch_accessor
->
epoch_time_interval
();
}
...
...
@@ -149,16 +165,25 @@ void DatasetContainer::async_download_data(uint64_t start_timestamp) {
}
while
(
!
_stop_download
)
{
auto
dataset_info
=
dataset
(
start_timestamp
);
while
(
data_status
(
start_timestamp
)
!=
DatasetStatus
::
Detected
)
{
while
(
data_status
(
start_timestamp
)
==
DatasetStatus
::
Empty
)
{
sleep
(
30
);
}
dataset_info
->
status
=
DatasetStatus
::
Downloding
;
VLOG
(
2
)
<<
"Start download data, data_timestap:"
<<
start_timestamp
<<
", for epoch:"
<<
epoch_accessor
->
text
(
start_timestamp
);
const
auto
&
file_list
=
dataset_info
->
file_path_list
;
dataset_info
->
data_channel
->
Clear
();
while
(
_data_reader
->
read_all
(
file_list
,
dataset_info
->
data_channel
)
!=
0
)
{
dataset_info
->
data_channel
->
Clear
();
VLOG
(
0
)
<<
"timestamp:"
<<
start_timestamp
<<
" data read failed, retry"
;
VLOG
(
0
)
<<
"Failed download data, data_timestap:"
<<
start_timestamp
<<
", for epoch:"
<<
epoch_accessor
->
text
(
start_timestamp
)
<<
", Retry it"
;
sleep
(
30
);
}
VLOG
(
2
)
<<
"End download data num:"
<<
dataset_info
->
data_channel
->
Size
()
<<
", data_timestap:"
<<
start_timestamp
<<
", for epoch:"
<<
epoch_accessor
->
text
(
start_timestamp
);
dataset_info
->
status
=
DatasetStatus
::
Ready
;
start_timestamp
+=
epoch_accessor
->
epoch_time_interval
();
}
}
...
...
paddle/fluid/train/custom_trainer/feed/dataset/dataset_container.h
浏览文件 @
b410c4e1
...
...
@@ -49,20 +49,22 @@ public:
}
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context
);
//触发可预取的数据判断
//
触发可预取的数据判断
virtual
void
pre_detect_data
(
uint64_t
epoch_id
);
//获取数据状态
// 获取epoch对应的样本数据目录
std
::
vector
<
std
::
string
>
epoch_data_path
(
uint64_t
epoch_id
);
// 获取数据状态
virtual
DatasetStatus
epoch_data_status
(
uint64_t
epoch_id
);
//获取特定epoch_i样本,如果数据未ready,Channel内为空指针
//
获取特定epoch_i样本,如果数据未ready,Channel内为空指针
virtual
::
paddle
::
framework
::
Channel
<
DataItem
>
fetch
(
uint64_t
epoch_id
);
//获取DataItem解析器
//
获取DataItem解析器
virtual
const
DataParser
*
data_parser
()
{
return
_data_reader
->
get_parser
();
}
protected:
virtual
DatasetStatus
data_status
(
uint64_t
timestamp
);
virtual
int
read_data_list
(
const
std
::
string
&
data_dir
,
std
::
vector
<
std
::
string
>&
data_list
);
//异步样本download
//
异步样本download
virtual
void
async_download_data
(
uint64_t
start_timestamp
);
virtual
std
::
shared_ptr
<
DatasetInfo
>
dataset
(
uint64_t
timestamp
);
...
...
paddle/fluid/train/custom_trainer/feed/executor/executor.cc
浏览文件 @
b410c4e1
#include <sstream>
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
@@ -50,57 +51,53 @@ public:
virtual
~
SimpleExecutor
()
{};
virtual
int
initialize
(
YAML
::
Node
exe_config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
paddle
::
framework
::
InitDevices
(
false
);
if
(
exe_config
[
"num_threads"
])
{
paddle
::
platform
::
SetNumThreads
(
exe_config
[
"num_threads"
].
as
<
int
>
());
}
if
(
!
exe_config
[
"startup_program"
]
||
!
exe_config
[
"main_program"
])
{
VLOG
(
2
)
<<
"fail to load config"
;
return
-
1
;
}
//if (exe_config["num_threads"]) {
//}
paddle
::
platform
::
SetNumThreads
(
1
);
std
::
string
name
=
exe_config
[
"name"
].
as
<
std
::
string
>
();
std
::
string
main_program
=
YamlHelper
::
get_with_default
(
exe_config
,
"main_program"
,
string
::
format_string
(
"./model/%s/main_program"
,
name
.
c_str
()));
std
::
string
startup_program
=
YamlHelper
::
get_with_default
(
exe_config
,
"startup_program"
,
string
::
format_string
(
"./model/%s/startup_program"
,
name
.
c_str
()));
try
{
_context
.
reset
(
new
SimpleExecutor
::
Context
(
context_ptr
->
cpu_place
));
auto
startup_program
=
Load
(
&
_context
->
executor
,
exe_config
[
"startup_program"
].
as
<
std
::
string
>
()
);
if
(
startup_program
==
nullptr
)
{
VLOG
(
2
)
<<
"fail to load startup_program: "
<<
exe_config
[
"startup_program"
].
as
<
std
::
string
>
()
;
_context
->
startup_program
=
Load
(
&
_context
->
executor
,
startup_program
);
if
(
_context
->
startup_program
==
nullptr
)
{
VLOG
(
0
)
<<
"fail to load startup_program: "
<<
startup_program
;
return
-
1
;
}
_context
->
executor
.
Run
(
*
startup_program
,
this
->
scope
(),
0
,
false
,
true
);
_context
->
main_program
=
Load
(
&
_context
->
executor
,
exe_config
[
"main_program"
].
as
<
std
::
string
>
());
_context
->
main_program
=
Load
(
&
_context
->
executor
,
main_program
);
if
(
_context
->
main_program
==
nullptr
)
{
VLOG
(
2
)
<<
"fail to load main_program: "
<<
exe_config
[
"main_program"
].
as
<
std
::
string
>
()
;
VLOG
(
0
)
<<
"fail to load main_program: "
<<
main_program
;
return
-
1
;
}
_context
->
prepare_context
=
_context
->
executor
.
Prepare
(
*
_context
->
main_program
,
0
);
_context
->
executor
.
CreateVariables
(
*
_context
->
main_program
,
this
->
scope
(),
0
);
}
catch
(
::
paddle
::
platform
::
EnforceNotMet
&
err
)
{
VLOG
(
2
)
<<
err
.
what
();
VLOG
(
0
)
<<
err
.
what
();
_context
.
reset
(
nullptr
);
return
-
1
;
}
return
0
;
}
virtual
int
run
()
{
virtual
int
initialize_scope
(
::
paddle
::
framework
::
Scope
*
scope
)
{
_context
->
executor
.
Run
(
*
_context
->
startup_program
,
scope
,
0
,
false
,
true
);
_context
->
executor
.
CreateVariables
(
*
_context
->
main_program
,
scope
,
0
);
return
0
;
}
virtual
int
run
(
::
paddle
::
framework
::
Scope
*
scope
)
{
if
(
_context
==
nullptr
)
{
VLOG
(
2
)
<<
"need initialize before run"
;
return
-
1
;
}
try
{
_context
->
executor
.
RunPreparedContext
(
_context
->
prepare_context
.
get
(),
this
->
scope
()
,
false
,
/* don't create local scope each time*/
false
/* don't create variable each time */
);
_context
->
executor
.
RunPreparedContext
(
_context
->
prepare_context
.
get
(),
scope
,
false
,
/* don't create local scope each time*/
false
/* don't create variable each time */
);
// For some other vector like containers not cleaned after each batch.
_context
->
tensor_array_batch_cleaner
.
CollectNoTensorVars
(
this
->
scope
()
);
_context
->
tensor_array_batch_cleaner
.
CollectNoTensorVars
(
scope
);
_context
->
tensor_array_batch_cleaner
.
ResetNoTensorVars
();
}
catch
(
::
paddle
::
platform
::
EnforceNotMet
&
err
)
{
VLOG
(
2
)
<<
err
.
what
();
...
...
@@ -115,13 +112,14 @@ protected:
const
::
paddle
::
platform
::
Place
&
place
;
::
paddle
::
framework
::
Executor
executor
;
::
std
::
unique_ptr
<::
paddle
::
framework
::
ProgramDesc
>
main_program
;
::
std
::
unique_ptr
<::
paddle
::
framework
::
ProgramDesc
>
startup_program
;
::
std
::
unique_ptr
<
framework
::
ExecutorPrepareContext
>
prepare_context
;
details
::
TensorArrayBatchCleaner
tensor_array_batch_cleaner
;
};
std
::
unique_ptr
<
Context
>
_context
;
};
REGIST
ER
_CLASS
(
Executor
,
SimpleExecutor
);
REGIST_CLASS
(
Executor
,
SimpleExecutor
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/executor/executor.h
浏览文件 @
b410c4e1
...
...
@@ -13,30 +13,16 @@ public:
Executor
()
{}
virtual
~
Executor
()
{}
//初始化,包括进行训练网络&配置加载工作
//
初始化,包括进行训练网络&配置加载工作
virtual
int
initialize
(
YAML
::
Node
exe_config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
=
0
;
//scope 可用于填充&取 var
virtual
::
paddle
::
framework
::
Scope
*
scope
()
{
return
&
_scope
;
}
//直接取var
template
<
class
T
>
const
T
&
var
(
const
std
::
string
&
name
)
{
return
_scope
.
Var
(
name
)
->
Get
<
T
>
();
}
template
<
class
T
>
T
*
mutable_var
(
const
std
::
string
&
name
)
{
return
_scope
.
Var
(
name
)
->
GetMutable
<
T
>
();
}
// 初始化scope, 后续反复执行训练,不再初始化
virtual
int
initialize_scope
(
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
// 执行训练
virtual
int
run
(
::
paddle
::
framework
::
Scope
*
scope
)
=
0
;
//执行训练
virtual
int
run
()
=
0
;
virtual
bool
is_dump_all_model
()
{
return
false
;
}
// cost time millisecond
virtual
uint64_t
epoch_cost
()
const
{
return
0
;
...
...
@@ -44,7 +30,7 @@ public:
protected:
::
paddle
::
framework
::
Scope
_scope
;
};
REGIST
ER
_REGISTERER
(
Executor
);
REGIST_REGISTERER
(
Executor
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.cc
0 → 100644
浏览文件 @
b410c4e1
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
MultiThreadExecutor
::
initialize
(
YAML
::
Node
exe_config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
int
ret
=
0
;
_trainer_context
=
context_ptr
.
get
();
_train_data_name
=
exe_config
[
"train_data_name"
].
as
<
std
::
string
>
();
_train_batch_size
=
exe_config
[
"train_batch_size"
].
as
<
int
>
();
_input_parse_thread_num
=
exe_config
[
"input_parse_thread_num"
].
as
<
int
>
();
_push_gradient_thread_num
=
exe_config
[
"push_gradient_thread_num"
].
as
<
int
>
();
_train_thread_num
=
exe_config
[
"train_thread_num"
].
as
<
int
>
();
_need_dump_all_model
=
exe_config
[
"need_dump_all_model"
].
as
<
bool
>
();
CHECK
(
_train_thread_num
>
0
&&
_train_batch_size
>
0
);
_thread_executors
.
resize
(
_train_thread_num
);
auto
e_class
=
exe_config
[
"class"
].
as
<
std
::
string
>
();
_train_exe_name
=
exe_config
[
"name"
].
as
<
std
::
string
>
();
omp_set_num_threads
(
_train_thread_num
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
_train_thread_num
;
++
i
)
{
auto
*
e_ptr
=
CREATE_INSTANCE
(
Executor
,
e_class
);
_thread_executors
[
i
].
reset
(
e_ptr
);
if
(
e_ptr
->
initialize
(
exe_config
,
context_ptr
)
!=
0
)
{
VLOG
(
0
)
<<
"executor initialize failed, name:"
<<
_train_exe_name
<<
" class:"
<<
e_class
;
ret
=
-
1
;
}
}
CHECK
(
ret
==
0
);
// buffer
_scope_obj_pool
.
reset
(
new
paddle
::
ps
::
ObjectPool
<::
paddle
::
framework
::
Scope
>
(
[
this
]()
->
::
paddle
::
framework
::
Scope
*
{
auto
*
scope
=
new
::
paddle
::
framework
::
Scope
();
_thread_executors
[
0
]
->
initialize_scope
(
scope
);
return
scope
;
},
_train_thread_num
*
8
,
0
,
_train_thread_num
*
8
));
// 模型网络加载
std
::
string
model_config_path
=
_trainer_context
->
file_system
->
path_join
(
"./model"
,
string
::
format_string
(
"%s/model.yaml"
,
_train_exe_name
.
c_str
()));
CHECK
(
_trainer_context
->
file_system
->
exists
(
model_config_path
))
<<
"miss model config file:"
<<
model_config_path
;
_model_config
=
YAML
::
LoadFile
(
model_config_path
);
for
(
const
auto
&
accessor_config
:
_model_config
[
"input_accessor"
])
{
auto
accessor_class
=
accessor_config
[
"class"
].
as
<
std
::
string
>
();
auto
*
accessor_ptr
=
CREATE_INSTANCE
(
DataInputAccessor
,
accessor_class
);
_input_accessors
.
emplace_back
(
accessor_ptr
);
CHECK
(
accessor_ptr
->
initialize
(
accessor_config
,
context_ptr
)
==
0
)
<<
"InputAccessor init Failed, class:"
<<
accessor_class
;
if
(
accessor_config
[
"table_id"
])
{
auto
table_id
=
accessor_config
[
"table_id"
].
as
<
int
>
();
if
(
_table_to_accessors
.
count
(
table_id
)
>
0
)
{
_table_to_accessors
[
table_id
].
push_back
(
accessor_ptr
);
}
else
{
_table_to_accessors
[
table_id
]
=
{
accessor_ptr
};
}
}
}
return
ret
;
}
paddle
::
framework
::
Channel
<
DataItem
>
MultiThreadExecutor
::
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
)
{
PipelineOptions
input_pipe_option
;
input_pipe_option
.
need_hold_input_data
=
true
;
input_pipe_option
.
batch_size
=
1
;
input_pipe_option
.
thread_num
=
_input_parse_thread_num
;
input_pipe_option
.
input_output_rate
=
_train_batch_size
;
input_pipe_option
.
buffer_batch_count
=
_train_thread_num
;
auto
input_pipe
=
std
::
make_shared
<
Pipeline
<
DataItem
,
ScopePoolObj
>>
();
input_pipe
->
initialize
(
input_pipe_option
,
input
,
[
this
,
parser
](
DataItem
*
item
,
size_t
item_num
,
ScopePoolObj
*
scope
,
size_t
*
scope_num
,
size_t
thread_idx
)
->
int
{
*
scope_num
=
1
;
auto
scope_obj
=
_scope_obj_pool
->
get
();
auto
*
samples
=
new
SampleInstance
[
item_num
];
for
(
size_t
i
=
0
;
i
<
item_num
;
++
i
)
{
CHECK
(
parser
->
parse_to_sample
(
item
[
i
],
samples
[
i
])
==
0
);
}
for
(
size_t
i
=
0
;
i
<
_input_accessors
.
size
();
++
i
)
{
_input_accessors
[
i
]
->
forward
(
samples
,
item_num
,
scope_obj
.
get
());
}
int64_t
data_for_scope
=
(
int64_t
)
samples
;
ScopeHelper
::
fill_value
(
scope_obj
.
get
(),
_trainer_context
->
cpu_place
,
"sample_data"
,
data_for_scope
);
data_for_scope
=
(
int64_t
)
item_num
;
ScopeHelper
::
fill_value
(
scope_obj
.
get
(),
_trainer_context
->
cpu_place
,
"sample_num"
,
data_for_scope
);
*
scope
=
std
::
move
(
scope_obj
);
return
0
;
});
PipelineOptions
train_pipe_option
;
train_pipe_option
.
input_output_rate
=
1
;
train_pipe_option
.
thread_num
=
_train_thread_num
;
train_pipe_option
.
buffer_batch_count
=
2
*
_train_thread_num
;
auto
train_pipe
=
std
::
make_shared
<
Pipeline
<
ScopePoolObj
,
ScopePoolObj
>>
();
train_pipe
->
connect_to
(
*
input_pipe
,
train_pipe_option
,
[
this
]
(
ScopePoolObj
*
in_items
,
size_t
in_num
,
ScopePoolObj
*
out_items
,
size_t
*
out_num
,
size_t
thread_idx
)
->
int
{
auto
*
executor
=
_thread_executors
[
thread_idx
].
get
();
size_t
&
out_idx
=
*
out_num
;
for
(
out_idx
=
0
;
out_idx
<
in_num
;
++
out_idx
)
{
//CHECK(executor->run(in_items[out_idx].get()) == 0);
out_items
[
out_idx
]
=
std
::
move
(
in_items
[
out_idx
]);
}
return
0
;
});
PipelineOptions
gradient_pipe_option
;
gradient_pipe_option
.
input_output_rate
=
1
;
gradient_pipe_option
.
thread_num
=
_push_gradient_thread_num
;
gradient_pipe_option
.
buffer_batch_count
=
2
*
_train_thread_num
;
auto
gradient_pipe
=
std
::
make_shared
<
Pipeline
<
ScopePoolObj
,
int
>>
();
gradient_pipe
->
connect_to
(
*
train_pipe
,
gradient_pipe_option
,
[
this
]
(
ScopePoolObj
*
in_items
,
size_t
in_num
,
int
*
out_items
,
size_t
*
out_num
,
size_t
thread_idx
)
->
int
{
size_t
&
out_idx
=
*
out_num
;
for
(
out_idx
=
0
;
out_idx
<
in_num
;
++
out_idx
)
{
auto
*
scope
=
in_items
[
out_idx
].
get
();
auto
sample_num
=
*
ScopeHelper
::
get_value
<
int64_t
>
(
scope
,
_trainer_context
->
cpu_place
,
"sample_num"
);
auto
*
samples
=
(
SampleInstance
*
)(
*
ScopeHelper
::
get_value
<
int64_t
>
(
scope
,
_trainer_context
->
cpu_place
,
"sample_data"
));
for
(
size_t
i
=
0
;
i
<
_input_accessors
.
size
();
++
i
)
{
out_items
[
out_idx
]
=
_input_accessors
[
i
]
->
backward
(
samples
,
sample_num
,
scope
);
}
delete
[]
samples
;
// 所有pipe完成后,再回收sample
}
return
0
;
});
std
::
vector
<
int
>
gradient_status
;
while
(
gradient_pipe
->
read
(
gradient_status
)
>
0
)
{
}
return
input_pipe
->
backup_channel
();
}
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/executor/multi_thread_executor.h
0 → 100644
浏览文件 @
b410c4e1
#pragma once
#include <functional>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/train/custom_trainer/feed/accessor/input_data_accessor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
typedef
paddle
::
ps
::
ObjectPool
<::
paddle
::
framework
::
Scope
>::
PooledObject
ScopePoolObj
;
class
MultiThreadExecutor
{
public:
MultiThreadExecutor
()
{}
virtual
~
MultiThreadExecutor
()
{}
//初始化,包括进行训练网络&配置加载工作
virtual
int
initialize
(
YAML
::
Node
exe_config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
//执行训练
virtual
paddle
::
framework
::
Channel
<
DataItem
>
run
(
paddle
::
framework
::
Channel
<
DataItem
>
input
,
const
DataParser
*
parser
);
virtual
bool
is_dump_all_model
()
{
return
_need_dump_all_model
;
}
virtual
const
std
::
string
&
train_exe_name
()
{
return
_train_exe_name
;
}
virtual
const
std
::
string
&
train_data_name
()
{
return
_train_data_name
;
}
virtual
const
std
::
map
<
uint32_t
,
std
::
vector
<
DataInputAccessor
*>>&
table_accessors
()
{
return
_table_to_accessors
;
}
virtual
ScopePoolObj
fetch_scope
()
{
ScopePoolObj
scope_obj
(
_scope_obj_pool
->
get
());
return
scope_obj
;
}
protected:
std
::
string
_train_data_name
;
size_t
_train_batch_size
=
32
;
size_t
_train_thread_num
=
12
;
size_t
_input_parse_thread_num
=
10
;
size_t
_push_gradient_thread_num
=
10
;
bool
_need_dump_all_model
=
false
;
YAML
::
Node
_model_config
;
std
::
string
_train_exe_name
;
TrainerContext
*
_trainer_context
=
nullptr
;
std
::
vector
<
std
::
shared_ptr
<
Executor
>>
_thread_executors
;
std
::
vector
<
std
::
shared_ptr
<
DataInputAccessor
>>
_input_accessors
;
std
::
map
<
uint32_t
,
std
::
vector
<
DataInputAccessor
*>>
_table_to_accessors
;
std
::
shared_ptr
<
paddle
::
ps
::
ObjectPool
<::
paddle
::
framework
::
Scope
>>
_scope_obj_pool
;
};
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc
浏览文件 @
b410c4e1
...
...
@@ -17,7 +17,7 @@ public:
_file_system
.
clear
();
if
(
config
&&
config
[
"file_systems"
]
&&
config
[
"file_systems"
].
Type
()
==
YAML
::
NodeType
::
Map
)
{
for
(
auto
&
prefix_fs
:
config
[
"file_systems"
])
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
prefix_fs
.
second
[
"class"
].
as
<
std
::
string
>
(
""
)));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
prefix_fs
.
second
[
"class"
].
as
<
std
::
string
>
(
""
)));
if
(
fs
==
nullptr
)
{
LOG
(
FATAL
)
<<
"fail to create class: "
<<
prefix_fs
.
second
[
"class"
].
as
<
std
::
string
>
(
""
);
return
-
1
;
...
...
@@ -31,7 +31,7 @@ public:
}
if
(
_file_system
.
find
(
"default"
)
==
_file_system
.
end
())
{
LOG
(
WARNING
)
<<
"miss default file_system, use LocalFileSystem as default"
;
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
if
(
fs
==
nullptr
||
fs
->
initialize
(
YAML
::
Load
(
""
),
context
)
!=
0
)
{
return
-
1
;
}
...
...
@@ -62,8 +62,8 @@ public:
return
get_file_system
(
path
)
->
list
(
path
);
}
std
::
string
tail
(
const
std
::
string
&
path
)
override
{
return
get_file_system
(
path
)
->
tail
(
path
);
std
::
string
tail
(
const
std
::
string
&
path
,
size_t
tail_num
=
1
)
override
{
return
get_file_system
(
path
)
->
tail
(
path
,
tail_num
);
}
bool
exists
(
const
std
::
string
&
path
)
override
{
...
...
@@ -86,29 +86,10 @@ public:
return
_file_system
[
"default"
].
get
();
}
int
err_no
()
const
override
{
if
(
_err_no
==
0
)
{
for
(
const
auto
&
file_system
:
_file_system
)
{
if
(
file_system
.
second
->
err_no
()
!=
0
)
{
const_cast
<
int
&>
(
_err_no
)
=
-
1
;
break
;
}
}
}
return
FileSystem
::
err_no
();
}
void
reset_err_no
()
override
{
_err_no
=
0
;
for
(
auto
&
file_system
:
_file_system
)
{
file_system
.
second
->
reset_err_no
();
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
FileSystem
>>
_file_system
;
};
REGIST
ER
_CLASS
(
FileSystem
,
AutoFileSystem
);
REGIST_CLASS
(
FileSystem
,
AutoFileSystem
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/io/file_system.h
浏览文件 @
b410c4e1
...
...
@@ -21,24 +21,14 @@ public:
virtual
int64_t
file_size
(
const
std
::
string
&
path
)
=
0
;
virtual
void
remove
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
vector
<
std
::
string
>
list
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
string
tail
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
string
tail
(
const
std
::
string
&
path
,
size_t
tail_num
=
1
)
=
0
;
virtual
bool
exists
(
const
std
::
string
&
path
)
=
0
;
virtual
void
mkdir
(
const
std
::
string
&
path
)
=
0
;
virtual
std
::
string
path_join
(
const
std
::
string
&
dir
,
const
std
::
string
&
path
);
virtual
std
::
pair
<
std
::
string
,
std
::
string
>
path_split
(
const
std
::
string
&
path
);
virtual
int
err_no
()
const
{
return
_err_no
;
}
inline
operator
bool
()
{
return
err_no
()
==
0
;
}
virtual
void
reset_err_no
()
{
_err_no
=
0
;
}
protected:
int
_err_no
=
0
;
};
REGIST
ER
_REGISTERER
(
FileSystem
);
REGIST_REGISTERER
(
FileSystem
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
浏览文件 @
b410c4e1
...
...
@@ -33,6 +33,7 @@ public:
std
::
shared_ptr
<
FILE
>
open_read
(
const
std
::
string
&
path
,
const
std
::
string
&
converter
)
override
{
int
err_no
=
0
;
std
::
string
cmd
;
if
(
string
::
end_with
(
path
,
".gz"
))
{
cmd
=
string
::
format_string
(
...
...
@@ -43,11 +44,12 @@ public:
bool
is_pipe
=
true
;
shell_add_read_converter
(
cmd
,
is_pipe
,
converter
);
return
shell_open
(
cmd
,
is_pipe
,
"r"
,
_buffer_size
,
&
_
err_no
);
return
shell_open
(
cmd
,
is_pipe
,
"r"
,
_buffer_size
,
&
err_no
);
}
std
::
shared_ptr
<
FILE
>
open_write
(
const
std
::
string
&
path
,
const
std
::
string
&
converter
)
override
{
int
err_no
=
0
;
std
::
string
cmd
=
string
::
format_string
(
"%s -put -
\"
%s
\"
"
,
hdfs_command
(
path
).
c_str
(),
path
.
c_str
());
bool
is_pipe
=
true
;
...
...
@@ -57,11 +59,10 @@ public:
}
shell_add_write_converter
(
cmd
,
is_pipe
,
converter
);
return
shell_open
(
cmd
,
is_pipe
,
"w"
,
_buffer_size
,
&
_
err_no
);
return
shell_open
(
cmd
,
is_pipe
,
"w"
,
_buffer_size
,
&
err_no
);
}
int64_t
file_size
(
const
std
::
string
&
path
)
override
{
_err_no
=
-
1
;
LOG
(
FATAL
)
<<
"not support"
;
return
0
;
}
...
...
@@ -107,13 +108,13 @@ public:
return
list
;
}
std
::
string
tail
(
const
std
::
string
&
path
)
override
{
std
::
string
tail
(
const
std
::
string
&
path
,
size_t
tail_num
=
1
)
override
{
if
(
path
==
""
)
{
return
""
;
}
return
shell_get_command_output
(
string
::
format_string
(
"%s -text %s | tail -
1 "
,
hdfs_command
(
path
).
c_str
(),
path
.
c_str
()
));
"%s -text %s | tail -
%u"
,
hdfs_command
(
path
).
c_str
(),
path
.
c_str
(),
tail_num
));
}
bool
exists
(
const
std
::
string
&
path
)
override
{
...
...
@@ -189,7 +190,7 @@ private:
std
::
string
_hdfs_command
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
_ugi
;
};
REGIST
ER
_CLASS
(
FileSystem
,
HadoopFileSystem
);
REGIST_CLASS
(
FileSystem
,
HadoopFileSystem
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc
浏览文件 @
b410c4e1
...
...
@@ -64,10 +64,10 @@ public:
if
(
path
==
""
)
{
return
{};
}
int
err_no
;
std
::
shared_ptr
<
FILE
>
pipe
;
pipe
=
shell_popen
(
string
::
format_string
(
"find %s -maxdepth 1 -type f"
,
path
.
c_str
()),
"r"
,
&
_
err_no
);
string
::
format_string
(
"find %s -maxdepth 1 -type f"
,
path
.
c_str
()),
"r"
,
&
err_no
);
string
::
LineFileReader
reader
;
std
::
vector
<
std
::
string
>
list
;
...
...
@@ -78,12 +78,12 @@ public:
return
list
;
}
std
::
string
tail
(
const
std
::
string
&
path
)
override
{
std
::
string
tail
(
const
std
::
string
&
path
,
size_t
tail_num
=
1
)
override
{
if
(
path
==
""
)
{
return
""
;
}
return
shell_get_command_output
(
string
::
format_string
(
"tail -
1 %s "
,
path
.
c_str
()));
return
shell_get_command_output
(
string
::
format_string
(
"tail -
%u %s "
,
tail_num
,
path
.
c_str
()));
}
bool
exists
(
const
std
::
string
&
path
)
override
{
...
...
@@ -115,7 +115,7 @@ public:
private:
size_t
_buffer_size
=
0
;
};
REGIST
ER
_CLASS
(
FileSystem
,
LocalFileSystem
);
REGIST_CLASS
(
FileSystem
,
LocalFileSystem
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/main.cc
浏览文件 @
b410c4e1
#include <time.h>
#include <fstream>
#include <yaml-cpp/yaml.h>
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/train/custom_trainer/feed/process/process.h"
#include "paddle/fluid/train/custom_trainer/feed/process/init_env_process.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -22,28 +22,57 @@ int main(int argc, char* argv[]) {
//load trainer config
auto
trainer_context_ptr
=
std
::
make_shared
<
TrainerContext
>
();
trainer_context_ptr
->
trainer_config
=
YAML
::
LoadFile
(
FLAGS_feed_trainer_conf_path
);
std
::
vector
<
std
::
string
>
process_name_list
=
{
"InitEnvProcess"
,
"LearnerProcess"
};
for
(
const
auto
&
process_name
:
process_name_list
)
{
Process
*
process
=
CREATE_CLASS
(
Process
,
process_name
);
if
(
process
==
NULL
)
{
VLOG
(
1
)
<<
"Process:"
<<
process_name
<<
" does not exist"
;
return
-
1
;
}
if
(
process
->
initialize
(
trainer_context_ptr
)
!=
0
)
{
VLOG
(
1
)
<<
"Process:"
<<
process_name
<<
" initialize failed"
;
return
-
1
;
}
trainer_context_ptr
->
process_list
.
push_back
(
std
::
shared_ptr
<
Process
>
(
process
));
//environment
auto
&
config
=
trainer_context_ptr
->
trainer_config
;
std
::
string
env_class
=
config
[
"environment"
][
"environment_class"
].
as
<
std
::
string
>
();
trainer_context_ptr
->
environment
.
reset
(
CREATE_INSTANCE
(
RuntimeEnvironment
,
env_class
));
if
(
trainer_context_ptr
->
environment
->
initialize
(
config
[
"environment"
])
!=
0
)
{
return
-
1
;
}
auto
*
environment
=
trainer_context_ptr
->
environment
.
get
();
environment
->
wireup
();
if
(
environment
->
node_num
(
EnvironmentRole
::
ALL
)
==
1
)
{
environment
->
add_role
(
EnvironmentRole
::
WORKER
);
environment
->
add_role
(
EnvironmentRole
::
PSERVER
);
}
else
if
(
environment
->
rank_id
(
EnvironmentRole
::
ALL
)
%
2
==
0
)
{
environment
->
add_role
(
EnvironmentRole
::
WORKER
);
}
else
{
environment
->
add_role
(
EnvironmentRole
::
PSERVER
);
}
for
(
auto
&
process
:
trainer_context_ptr
->
process_list
)
{
process
->
run
();
trainer_context_ptr
->
pslib
.
reset
(
new
PSlib
());
std
::
string
ps_config
=
config
[
"environment"
][
"ps"
].
as
<
std
::
string
>
();
trainer_context_ptr
->
pslib
->
initialize
(
ps_config
,
environment
);
//VLOG(3) << "Node Start With Role:" << role;
if
(
environment
->
is_role
(
EnvironmentRole
::
WORKER
))
{
std
::
vector
<
std
::
string
>
process_name_list
=
{
"InitEnvProcess"
,
"LearnerProcess"
};
for
(
const
auto
&
process_name
:
process_name_list
)
{
Process
*
process
=
CREATE_INSTANCE
(
Process
,
process_name
);
if
(
process
==
NULL
)
{
VLOG
(
1
)
<<
"Process:"
<<
process_name
<<
" does not exist"
;
return
-
1
;
}
if
(
process
->
initialize
(
trainer_context_ptr
)
!=
0
)
{
VLOG
(
1
)
<<
"Process:"
<<
process_name
<<
" initialize failed"
;
return
-
1
;
}
trainer_context_ptr
->
process_list
.
push_back
(
std
::
shared_ptr
<
Process
>
(
process
));
}
for
(
auto
&
process
:
trainer_context_ptr
->
process_list
)
{
process
->
run
();
}
}
//TODO exit control
bool
running
=
true
;
while
(
running
)
{
sleep
(
10000
);
}
return
0
;
}
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
浏览文件 @
b410c4e1
...
...
@@ -43,7 +43,7 @@ protected:
std
::
shared_ptr
<
TrainerContext
>
_context_ptr
;
};
REGIST
ER
_REGISTERER
(
Monitor
);
REGIST_REGISTERER
(
Monitor
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc
浏览文件 @
b410c4e1
...
...
@@ -20,22 +20,16 @@ int InitEnvProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
context_ptr
->
cpu_place
=
paddle
::
platform
::
CPUPlace
();
YAML
::
Node
config
=
_context_ptr
->
trainer_config
;
//environment
std
::
string
env_class
=
config
[
"environment"
][
"environment_class"
].
as
<
std
::
string
>
();
context_ptr
->
environment
.
reset
(
CREATE_CLASS
(
RuntimeEnvironment
,
env_class
));
if
(
context_ptr
->
environment
->
initialize
(
config
[
"environment"
])
!=
0
)
{
return
-
1
;
}
//file_system
context_ptr
->
file_system
.
reset
(
CREATE_
CLASS
(
FileSystem
,
"AutoFileSystem"
));
context_ptr
->
file_system
.
reset
(
CREATE_
INSTANCE
(
FileSystem
,
"AutoFileSystem"
));
if
(
context_ptr
->
file_system
->
initialize
(
config
[
"io"
],
context_ptr
)
!=
0
)
{
return
-
1
;
}
//epoch
std
::
string
epoch_class
=
config
[
"epoch"
][
"epoch_class"
].
as
<
std
::
string
>
();
context_ptr
->
epoch_accessor
.
reset
(
CREATE_
CLASS
(
EpochAccessor
,
epoch_class
));
context_ptr
->
epoch_accessor
.
reset
(
CREATE_
INSTANCE
(
EpochAccessor
,
epoch_class
));
if
(
context_ptr
->
epoch_accessor
->
initialize
(
config
[
"epoch"
],
context_ptr
)
!=
0
)
{
return
-
1
;
}
...
...
@@ -55,12 +49,6 @@ int InitEnvProcess::run() {
VLOG
(
3
)
<<
"Trainer Resume From epoch:"
<<
epoch_accessor
->
current_epoch_id
();
auto
next_epoch_id
=
epoch_accessor
->
next_epoch_id
(
epoch_accessor
->
current_epoch_id
());
_context_ptr
->
dataset
->
pre_detect_data
(
next_epoch_id
);
//step 1. psserver init
//step2. psserver load
VLOG
(
3
)
<<
"Psserver Start Success"
;
//context_ptr->pslib_client()->load_model();
VLOG
(
3
)
<<
"Psserver Load Model Success"
;
return
0
;
}
...
...
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
浏览文件 @
b410c4e1
...
...
@@ -3,6 +3,7 @@
*Train样本
*/
#include <omp.h>
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
#include "paddle/fluid/train/custom_trainer/feed/dataset/dataset.h"
#include "paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.h"
#include "paddle/fluid/train/custom_trainer/feed/process/learner_process.h"
...
...
@@ -14,23 +15,11 @@ namespace feed {
int
LearnerProcess
::
initialize
(
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
int
ret
=
Process
::
initialize
(
context_ptr
);
auto
&
config
=
_context_ptr
->
trainer_config
;
_train_thread_num
=
config
[
"train_thread_num"
].
as
<
int
>
();
_threads_executor
.
resize
(
_train_thread_num
);
if
(
config
[
"executor"
])
{
_executor_num
=
config
[
"executor"
].
size
();
omp_set_num_threads
(
_train_thread_num
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
_train_thread_num
;
++
i
)
{
_threads_executor
[
i
].
resize
(
_executor_num
);
for
(
int
e
=
0
;
e
<
_executor_num
;
++
e
)
{
auto
e_class
=
config
[
"executor"
][
e
][
"class"
].
as
<
std
::
string
>
();
auto
*
e_ptr
=
CREATE_CLASS
(
Executor
,
e_class
);
_threads_executor
[
i
][
e
].
reset
(
e_ptr
);
if
(
e_ptr
->
initialize
(
config
[
"executor"
][
e
],
context_ptr
)
!=
0
)
{
ret
=
-
1
;
}
}
_executors
.
resize
(
config
[
"executor"
].
size
());
for
(
size_t
i
=
0
;
i
<
_executors
.
size
();
++
i
)
{
_executors
[
i
].
reset
(
new
MultiThreadExecutor
());
CHECK
(
_executors
[
i
]
->
initialize
(
config
[
"executor"
][
i
],
context_ptr
)
==
0
);
}
}
return
0
;
...
...
@@ -39,9 +28,12 @@ int LearnerProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
std
::
future
<
int
>
LearnerProcess
::
save_model
(
uint64_t
epoch_id
,
int
table_id
,
ModelSaveWay
way
)
{
std
::
promise
<
int
>
p
;
auto
ret
=
p
.
get_future
();
if
(
_context_ptr
->
epoch_accessor
->
need_save_model
(
epoch_id
,
way
))
{
//TODO
//context_ptr->pslib_client()->save();
auto
*
ps_client
=
_context_ptr
->
pslib
->
ps_client
();
auto
*
epoch_accessor
=
_context_ptr
->
epoch_accessor
.
get
();
if
(
epoch_accessor
->
need_save_model
(
epoch_id
,
way
))
{
VLOG
(
2
)
<<
"Start save model, table_id:"
<<
table_id
;
auto
model_dir
=
epoch_accessor
->
model_save_path
(
epoch_id
,
way
);
return
ps_client
->
save
(
table_id
,
model_dir
,
std
::
to_string
((
int
)
way
));
}
else
{
p
.
set_value
(
0
);
}
...
...
@@ -53,14 +45,19 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
if
(
!
environment
->
is_master_node
(
EnvironmentRole
::
WORKER
))
{
return
0
;
}
std
::
set
<
uint32_t
>
table_set
;
for
(
auto
&
executor
:
_executors
)
{
const
auto
&
table_accessors
=
executor
->
table_accessors
();
for
(
auto
&
itr
:
table_accessors
)
{
table_set
.
insert
(
itr
.
first
);
}
}
int
ret_size
=
0
;
auto
table_num
=
_context_ptr
->
params_table_lis
t
.
size
();
auto
table_num
=
table_se
t
.
size
();
std
::
future
<
int
>
rets
[
table_num
];
for
(
int
i
=
0
;
i
<
table_num
;
++
i
)
{
auto
table_id
=
_context_ptr
->
params_table_list
[
i
].
table_id
();
for
(
auto
table_id
:
table_set
)
{
rets
[
ret_size
++
]
=
save_model
(
epoch_id
,
table_id
,
way
);
}
int
all_ret
=
0
;
for
(
int
i
=
0
;
i
<
ret_size
;
++
i
)
{
rets
[
i
].
wait
();
...
...
@@ -69,6 +66,36 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
return
all_ret
;
}
int
LearnerProcess
::
load_model
(
uint64_t
epoch_id
)
{
auto
*
environment
=
_context_ptr
->
environment
.
get
();
if
(
!
environment
->
is_master_node
(
EnvironmentRole
::
WORKER
))
{
return
0
;
}
std
::
set
<
uint32_t
>
loaded_table_set
;
auto
model_dir
=
_context_ptr
->
epoch_accessor
->
checkpoint_path
();
for
(
auto
&
executor
:
_executors
)
{
const
auto
&
table_accessors
=
executor
->
table_accessors
();
for
(
auto
&
itr
:
table_accessors
)
{
if
(
loaded_table_set
.
count
(
itr
.
first
))
{
continue
;
}
auto
table_model_path
=
_context_ptr
->
file_system
->
path_join
(
model_dir
,
string
::
format_string
(
"%03d"
,
itr
.
first
));
if
(
_context_ptr
->
file_system
->
list
(
table_model_path
).
size
()
==
0
)
{
VLOG
(
2
)
<<
"miss table_model:"
<<
table_model_path
<<
", initialize by default"
;
auto
scope
=
std
::
move
(
executor
->
fetch_scope
());
CHECK
(
itr
.
second
[
0
]
->
create
(
scope
.
get
())
==
0
);
}
else
{
auto
status
=
_context_ptr
->
ps_client
()
->
load
(
itr
.
first
,
model_dir
,
std
::
to_string
((
int
)
ModelSaveWay
::
ModelSaveTrainCheckpoint
));
CHECK
(
status
.
get
()
==
0
)
<<
"table load failed, id:"
<<
itr
.
first
;
}
loaded_table_set
.
insert
(
itr
.
first
);
}
}
return
0
;
}
int
LearnerProcess
::
run
()
{
auto
*
dataset
=
_context_ptr
->
dataset
.
get
();
auto
*
environment
=
_context_ptr
->
environment
.
get
();
...
...
@@ -76,61 +103,82 @@ int LearnerProcess::run() {
uint64_t
epoch_id
=
epoch_accessor
->
current_epoch_id
();
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"Resume train with epoch_id:%d
label:
%s"
,
epoch_id
,
_context_ptr
->
epoch_accessor
->
text
(
epoch_id
).
c_str
());
"Resume train with epoch_id:%d %s"
,
epoch_id
,
_context_ptr
->
epoch_accessor
->
text
(
epoch_id
).
c_str
());
//尝试加载模型 or 初始化
CHECK
(
load_model
(
epoch_id
)
==
0
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
//判断是否先dump出base
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceBase
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
while
(
true
)
{
epoch_accessor
->
next_epoch
();
bool
already_dump_inference_model
=
false
;
epoch_id
=
epoch_accessor
->
current_epoch_id
();
std
::
string
epoch_log_title
=
paddle
::
string
::
format_string
(
std
::
string
epoch_log_title
=
paddle
::
string
::
format_string
(
"train epoch_id:%d label:%s"
,
epoch_id
,
epoch_accessor
->
text
(
epoch_id
).
c_str
());
std
::
string
data_path
=
paddle
::
string
::
to_string
<
std
::
string
>
(
dataset
->
epoch_data_path
(
epoch_id
));
//Step1. 等待样本ready
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"Start %s, wait data ready"
,
epoch_log_title
.
c_str
());
while
(
dataset
->
epoch_data_status
(
epoch_id
)
!=
DatasetStatus
::
Ready
)
{
sleep
(
30
);
dataset
->
pre_detect_data
(
epoch_id
);
{
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"%s, data not ready, wait 30s"
,
epoch_log_title
.
c_str
());
}
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"%s, data is ready, start traning"
,
epoch_log_title
.
c_str
());
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
"%s, wait data ready:%s"
,
epoch_log_title
.
c_str
(),
data_path
.
c_str
());
while
(
dataset
->
epoch_data_status
(
epoch_id
)
!=
DatasetStatus
::
Ready
)
{
sleep
(
30
);
dataset
->
pre_detect_data
(
epoch_id
);
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"data not ready, wait 30s"
);
}
environment
->
log
(
EnvironmentRole
::
WORKER
,
EnvironmentLogType
::
MASTER_LOG
,
EnvironmentLogLevel
::
NOTICE
,
"Start %s, data is ready"
,
epoch_log_title
.
c_str
());
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
}
//Step2. 运行训练网络
bool
already_dump_inference_model
=
false
;
for
(
int
i
=
0
;
i
<
_executor_num
;
++
i
)
{
std
::
vector
<
std
::
shared_ptr
<
std
::
thread
>>
train_threads
(
_train_thread_num
);
for
(
int
thread_id
=
0
;
thread_id
<
_train_thread_num
;
++
thread_id
)
{
train_threads
[
i
].
reset
(
new
std
::
thread
([
this
](
int
exe_idx
,
int
thread_idx
)
{
auto
*
executor
=
_threads_executor
[
thread_idx
][
exe_idx
].
get
();
run_executor
(
executor
);
},
i
,
thread_id
));
}
for
(
int
i
=
0
;
i
<
_train_thread_num
;
++
i
)
{
train_threads
[
i
]
->
join
();
{
std
::
map
<
std
::
string
,
paddle
::
framework
::
Channel
<
DataItem
>>
backup_input_map
;
for
(
auto
&
executor
:
_executors
)
{
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
VLOG
(
2
)
<<
"Start executor:"
<<
executor
->
train_exe_name
();
auto
data_name
=
executor
->
train_data_name
();
paddle
::
framework
::
Channel
<
DataItem
>
input_channel
;
if
(
backup_input_map
.
count
(
data_name
))
{
input_channel
=
backup_input_map
[
data_name
];
}
else
{
input_channel
=
dataset
->
fetch_data
(
data_name
,
epoch_id
);
}
input_channel
=
executor
->
run
(
input_channel
,
dataset
->
data_parser
(
data_name
));
VLOG
(
2
)
<<
"End executor:"
<<
executor
->
train_exe_name
();
// 等待异步梯度完成
_context_ptr
->
ps_client
()
->
flush
();
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
if
(
executor
->
is_dump_all_model
())
{
already_dump_inference_model
=
true
;
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceDelta
);
}
backup_input_map
[
data_name
]
=
input_channel
;
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
}
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
}
if
(
_threads_executor
[
0
][
i
]
->
is_dump_all_model
())
{
//Step3. Dump Model For Delta&&Checkpoint
{
if
(
!
already_dump_inference_model
)
{
already_dump_inference_model
=
true
;
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceDelta
);
}
}
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveTrainCheckpoint
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
}
//Step3. Dump Model For Delta&&Checkpoint
if
(
!
already_dump_inference_model
)
{
already_dump_inference_model
=
true
;
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceDelta
);
}
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveTrainCheckpoint
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
epoch_accessor
->
epoch_done
(
epoch_id
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
}
//Step4. Output Monitor && RunStatus
//TODO
}
...
...
@@ -138,11 +186,6 @@ int LearnerProcess::run() {
return
0
;
}
int
LearnerProcess
::
run_executor
(
Executor
*
executor
)
{
//TODO
return
0
;
}
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/process/learner_process.h
浏览文件 @
b410c4e1
...
...
@@ -4,13 +4,11 @@
*/
#pragma once
#include "paddle/fluid/train/custom_trainer/feed/process/process.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/
multi_thread_
executor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
typedef
std
::
vector
<
std
::
shared_ptr
<
Executor
>>
MultiExecutor
;
class
LearnerProcess
:
public
Process
{
public:
LearnerProcess
()
{}
...
...
@@ -20,19 +18,15 @@ public:
virtual
int
initialize
(
std
::
shared_ptr
<
TrainerContext
>
context_ptr
);
protected:
//同步保存所有模型
// 加载所有模型
virtual
int
load_model
(
uint64_t
epoch_id
);
// 同步保存所有模型
virtual
int
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
);
//异步保存指定模型
//
异步保存指定模型
virtual
std
::
future
<
int
>
save_model
(
uint64_t
epoch_id
,
int
table_id
,
ModelSaveWay
way
);
//执行指定训练网络
virtual
int
run_executor
(
Executor
*
executor
);
private:
int
_executor_num
=
0
;
//需要执行训练的网络个数
int
_train_thread_num
=
1
;
//并行训练线程数
std
::
vector
<
MultiExecutor
>
_threads_executor
;
std
::
vector
<
std
::
shared_ptr
<
MultiThreadExecutor
>>
_executors
;
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/process/process.cc
浏览文件 @
b410c4e1
...
...
@@ -5,8 +5,8 @@
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
REGIST
ER
_CLASS
(
Process
,
InitEnvProcess
);
REGIST
ER
_CLASS
(
Process
,
LearnerProcess
);
REGIST_CLASS
(
Process
,
InitEnvProcess
);
REGIST_CLASS
(
Process
,
LearnerProcess
);
int
Process
::
run
()
{
return
0
;
}
...
...
paddle/fluid/train/custom_trainer/feed/process/process.h
浏览文件 @
b410c4e1
...
...
@@ -18,7 +18,7 @@ public:
protected:
TrainerContext
*
_context_ptr
=
NULL
;
};
REGIST
ER
_REGISTERER
(
Process
);
REGIST_REGISTERER
(
Process
);
}
// namespace feed
}
// namespace custom_trainer
...
...
paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
浏览文件 @
b410c4e1
...
...
@@ -95,7 +95,7 @@ class ModelBuilder:
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
inputs
,
outputs
=
self
.
_inference
()
input
_accessor
,
sparses
,
input
s
,
outputs
=
self
.
_inference
()
test_program
=
main_program
.
clone
(
for_test
=
True
)
loss
,
labels
=
self
.
_loss_function
(
*
outputs
)
...
...
@@ -115,14 +115,34 @@ class ModelBuilder:
f
.
write
(
program
.
desc
.
serialize_to_string
())
params
=
filter
(
fluid
.
io
.
is_parameter
,
main_program
.
list_vars
())
vars
=
[]
sums
=
[]
for
param
in
params
:
if
param
.
name
.
find
(
"bn"
)
==
0
:
sums
.
append
({
"name"
:
param
.
name
,
"shape"
:
param
.
shape
});
else
:
vars
.
append
({
"name"
:
param
.
name
,
"shape"
:
param
.
shape
});
for
accessor
in
input_accessor
:
if
(
accessor
[
"input"
]
==
"sparses"
):
accessor
[
"input"
]
=
sparses
if
(
accessor
[
"input"
]
==
"vars"
):
accessor
[
"input"
]
=
vars
if
(
accessor
[
"input"
]
==
"sums"
):
accessor
[
"input"
]
=
sums
if
(
accessor
[
"input"
]
==
"labels"
):
accessor
[
"input"
]
=
[
{
"label_name"
:
label
.
name
,
"shape"
:
label
.
shape
,
"output_name"
:
output
.
name
}
for
(
label
,
output
)
in
zip
(
labels
,
outputs
)
]
model_desc_path
=
os
.
path
.
join
(
self
.
_save_path
,
'model.yaml'
)
model_desc
=
{
'inputs'
:
[{
"name"
:
var
.
name
,
"shape"
:
var
.
shape
}
for
var
in
inputs
],
'outputs'
:
[{
"name"
:
var
.
name
,
"shape"
:
var
.
shape
}
for
var
in
outputs
],
'labels'
:
[{
"name"
:
var
.
name
,
"shape"
:
var
.
shape
}
for
var
in
labels
],
'vars'
:
[{
"name"
:
var
.
name
,
"shape"
:
var
.
shape
}
for
var
in
params
],
'loss'
:
loss
.
name
,
'input_accessor'
:
input_accessor
}
with
open
(
model_desc_path
,
'w'
)
as
f
:
...
...
paddle/fluid/train/custom_trainer/feed/scripts/example.py
浏览文件 @
b410c4e1
...
...
@@ -18,9 +18,11 @@ def inference():
list<Variable>: outputs
"""
# TODO: build network here
cvm_input
=
fluid
.
layers
.
data
(
name
=
'cvm_input'
,
shape
=
[
4488
],
dtype
=
'float32'
)
cvm_input
=
fluid
.
layers
.
data
(
name
=
'cvm_input'
,
shape
=
[
4488
],
dtype
=
'float32'
,
stop_gradient
=
False
)
net
=
cvm_input
net
=
fluid
.
layers
.
data_norm
(
input
=
net
,
name
=
"bn6048"
,
epsilon
=
1e-4
,
param_attr
=
{
"batch_size"
:
1e4
,
"batch_sum_default"
:
0.0
,
"batch_square"
:
1e4
})
net
=
fluid
.
layers
.
fc
(
net
,
512
,
act
=
'relu'
,
name
=
'fc_1'
)
net
=
fluid
.
layers
.
fc
(
net
,
256
,
act
=
'relu'
,
name
=
'fc_2'
)
net
=
fluid
.
layers
.
fc
(
net
,
256
,
act
=
'relu'
,
name
=
'fc_3'
)
...
...
paddle/fluid/train/custom_trainer/feed/scripts/join.py
0 → 100644
浏览文件 @
b410c4e1
#!/usr/bin/env python
#-*- coding:utf-8 -*-
"""
This is an example of network building
"""
from
__future__
import
print_function
,
division
import
paddle
from
paddle
import
fluid
def
sparse_cvm_dim
(
sparse_info
):
return
sparse_info
[
'slot_dim'
]
*
len
(
sparse_info
[
'slots'
])
def
inference
():
"""Build inference network(without loss and optimizer)
Returns:
list<Dict>: sparse_inputs
and
list<Variable>: inputs
and
list<Variable>: outputs
"""
sparse_cvm
=
{
"name"
:
"cvm_input"
,
"slot_dim"
:
11
,
"slots"
:
[
6048
,
6002
,
6145
,
6202
,
6201
,
6121
,
6738
,
6119
,
6146
,
6120
,
6147
,
6122
,
6123
,
6118
,
6142
,
6143
,
6008
,
6148
,
6151
,
6127
,
6144
,
6094
,
6083
,
6952
,
6739
,
6150
,
6109
,
6003
,
6099
,
6149
,
6129
,
6203
,
6153
,
6152
,
6128
,
6106
,
6251
,
7082
,
7515
,
6951
,
6949
,
7080
,
6066
,
7507
,
6186
,
6007
,
7514
,
6125
,
7506
,
10001
,
6006
,
7023
,
6085
,
10000
,
6098
,
6250
,
6110
,
6124
,
6090
,
6082
,
6067
,
6101
,
6004
,
6191
,
7075
,
6948
,
6157
,
6126
,
6188
,
7077
,
6070
,
6111
,
6087
,
6103
,
6107
,
6194
,
6156
,
6005
,
6247
,
6814
,
6158
,
7122
,
6058
,
6189
,
7058
,
6059
,
6115
,
7079
,
7081
,
6833
,
7024
,
6108
,
13342
,
13345
,
13412
,
13343
,
13350
,
13346
,
13409
,
6009
,
6011
,
6012
,
6013
,
6014
,
6015
,
6019
,
6023
,
6024
,
6027
,
6029
,
6031
,
6050
,
6060
,
6068
,
6069
,
6089
,
6095
,
6105
,
6112
,
6130
,
6131
,
6132
,
6134
,
6161
,
6162
,
6163
,
6166
,
6182
,
6183
,
6185
,
6190
,
6212
,
6213
,
6231
,
6233
,
6234
,
6236
,
6238
,
6239
,
6240
,
6241
,
6242
,
6243
,
6244
,
6245
,
6354
,
7002
,
7005
,
7008
,
7010
,
7012
,
7013
,
7015
,
7016
,
7017
,
7018
,
7019
,
7020
,
7045
,
7046
,
7048
,
7049
,
7052
,
7054
,
7056
,
7064
,
7066
,
7076
,
7078
,
7083
,
7084
,
7085
,
7086
,
7087
,
7088
,
7089
,
7090
,
7099
,
7100
,
7101
,
7102
,
7103
,
7104
,
7105
,
7109
,
7124
,
7126
,
7136
,
7142
,
7143
,
7144
,
7145
,
7146
,
7147
,
7148
,
7150
,
7151
,
7152
,
7153
,
7154
,
7155
,
7156
,
7157
,
7047
,
7050
,
6253
,
6254
,
6255
,
6256
,
6257
,
6259
,
6260
,
6261
,
7170
,
7185
,
7186
,
6751
,
6755
,
6757
,
6759
,
6760
,
6763
,
6764
,
6765
,
6766
,
6767
,
6768
,
6769
,
6770
,
7502
,
7503
,
7504
,
7505
,
7510
,
7511
,
7512
,
7513
,
6806
,
6807
,
6808
,
6809
,
6810
,
6811
,
6812
,
6813
,
6815
,
6816
,
6817
,
6819
,
6823
,
6828
,
6831
,
6840
,
6845
,
6875
,
6879
,
6881
,
6888
,
6889
,
6947
,
6950
,
6956
,
6957
,
6959
,
10006
,
10008
,
10009
,
10010
,
10011
,
10016
,
10017
,
10018
,
10019
,
10020
,
10021
,
10022
,
10023
,
10024
,
10029
,
10030
,
10031
,
10032
,
10033
,
10034
,
10035
,
10036
,
10037
,
10038
,
10039
,
10040
,
10041
,
10042
,
10044
,
10045
,
10046
,
10051
,
10052
,
10053
,
10054
,
10055
,
10056
,
10057
,
10060
,
10066
,
10069
,
6820
,
6821
,
6822
,
13333
,
13334
,
13335
,
13336
,
13337
,
13338
,
13339
,
13340
,
13341
,
13351
,
13352
,
13353
,
13359
,
13361
,
13362
,
13363
,
13366
,
13367
,
13368
,
13369
,
13370
,
13371
,
13375
,
13376
,
5700
,
5702
,
13400
,
13401
,
13402
,
13403
,
13404
,
13406
,
13407
,
13408
,
13410
,
13417
,
13418
,
13419
,
13420
,
13422
,
13425
,
13427
,
13428
,
13429
,
13430
,
13431
,
13433
,
13434
,
13436
,
13437
,
13326
,
13330
,
13331
,
5717
,
13442
,
13451
,
13452
,
13455
,
13456
,
13457
,
13458
,
13459
,
13460
,
13461
,
13462
,
13463
,
13464
,
13465
,
13466
,
13467
,
13468
,
1104
,
1106
,
1107
,
1108
,
1109
,
1110
,
1111
,
1112
,
1113
,
1114
,
1115
,
1116
,
1117
,
1119
,
1120
,
1121
,
1122
,
1123
,
1124
,
1125
,
1126
,
1127
,
1128
,
1129
,
13812
,
13813
,
6740
,
1490
,
1491
]}
# TODO: build network here
cvm_input
=
fluid
.
layers
.
data
(
name
=
'cvm_input'
,
shape
=
[
sparse_cvm_dim
(
sparse_cvm
)],
dtype
=
'float32'
,
stop_gradient
=
False
)
net
=
cvm_input
net
=
fluid
.
layers
.
data_norm
(
input
=
net
,
name
=
"bn6048"
,
epsilon
=
1e-4
,
param_attr
=
{
"batch_size"
:
1e4
,
"batch_sum_default"
:
0.0
,
"batch_square"
:
1e4
})
net
=
fluid
.
layers
.
fc
(
net
,
511
,
act
=
'relu'
,
name
=
'fc_1'
)
net
=
fluid
.
layers
.
fc
(
net
,
255
,
act
=
'relu'
,
name
=
'fc_2'
)
net
=
fluid
.
layers
.
fc
(
net
,
255
,
act
=
'relu'
,
name
=
'fc_3'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_4'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_5'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_6'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_7'
)
ctr_output
=
fluid
.
layers
.
fc
(
net
,
1
,
act
=
'sigmoid'
,
name
=
'ctr'
)
accessors
=
[
{
"class"
:
"AbacusSparseUpdateAccessor"
,
"input"
:
"sparses"
,
"table_id"
:
0
,
"need_gradient"
:
False
},
{
"class"
:
"DenseInputAccessor"
,
"input"
:
"vars"
,
"table_id"
:
1
,
"need_gradient"
:
True
,
"async_pull"
:
True
},
{
"class"
:
"DenseInputAccessor"
,
"input"
:
"sums"
,
"table_id"
:
2
,
"need_gradient"
:
True
,
"async_pull"
:
True
},
{
"class"
:
"LabelInputAccessor"
,
"input"
:
"labels"
}
]
return
accessors
,
[
sparse_cvm
],
[
cvm_input
],
[
ctr_output
]
def
loss_function
(
ctr_output
):
"""
Args:
*outputs: the second result of inference()
Returns:
Variable: loss
and
list<Variable>: labels
"""
# TODO: calc loss here
label
=
fluid
.
layers
.
data
(
name
=
'label_ctr'
,
shape
=
ctr_output
.
shape
,
dtype
=
'float32'
)
loss
=
fluid
.
layers
.
square_error_cost
(
input
=
ctr_output
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
,
name
=
'loss_ctr'
)
return
loss
,
[
label
]
paddle/fluid/train/custom_trainer/feed/scripts/model/example/main_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/example/model.yaml
0 → 100644
浏览文件 @
b410c4e1
inputs
:
-
name
:
cvm_input
shape
:
[
-1
,
4488
]
labels
:
-
name
:
label_ctr
shape
:
[
-1
,
1
]
loss
:
loss_ctr
outputs
:
-
name
:
ctr.tmp_2
shape
:
[
-1
,
1
]
vars
:
-
name
:
bn6048.batch_size
shape
:
[
4488
]
-
name
:
bn6048.batch_sum
shape
:
[
4488
]
-
name
:
bn6048.batch_square_sum
shape
:
[
4488
]
-
name
:
fc_1.w_0
shape
:
[
4488
,
512
]
-
name
:
fc_1.b_0
shape
:
[
512
]
-
name
:
fc_2.w_0
shape
:
[
512
,
256
]
-
name
:
fc_2.b_0
shape
:
[
256
]
-
name
:
fc_3.w_0
shape
:
[
256
,
256
]
-
name
:
fc_3.b_0
shape
:
[
256
]
-
name
:
fc_4.w_0
shape
:
[
256
,
128
]
-
name
:
fc_4.b_0
shape
:
[
128
]
-
name
:
fc_5.w_0
shape
:
[
128
,
128
]
-
name
:
fc_5.b_0
shape
:
[
128
]
-
name
:
fc_6.w_0
shape
:
[
128
,
128
]
-
name
:
fc_6.b_0
shape
:
[
128
]
-
name
:
fc_7.w_0
shape
:
[
128
,
128
]
-
name
:
fc_7.b_0
shape
:
[
128
]
-
name
:
ctr.w_0
shape
:
[
128
,
1
]
-
name
:
ctr.b_0
shape
:
[
1
]
paddle/fluid/train/custom_trainer/feed/scripts/model/example/startup_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/example/test_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/join/main_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/join/model.yaml
0 → 100644
浏览文件 @
b410c4e1
input_accessor
:
-
class
:
AbacusSparseUpdateAccessor
input
:
-
name
:
cvm_input
slot_dim
:
11
slots
:
[
6048
,
6002
,
6145
,
6202
,
6201
,
6121
,
6738
,
6119
,
6146
,
6120
,
6147
,
6122
,
6123
,
6118
,
6142
,
6143
,
6008
,
6148
,
6151
,
6127
,
6144
,
6094
,
6083
,
6952
,
6739
,
6150
,
6109
,
6003
,
6099
,
6149
,
6129
,
6203
,
6153
,
6152
,
6128
,
6106
,
6251
,
7082
,
7515
,
6951
,
6949
,
7080
,
6066
,
7507
,
6186
,
6007
,
7514
,
6125
,
7506
,
10001
,
6006
,
7023
,
6085
,
10000
,
6098
,
6250
,
6110
,
6124
,
6090
,
6082
,
6067
,
6101
,
6004
,
6191
,
7075
,
6948
,
6157
,
6126
,
6188
,
7077
,
6070
,
6111
,
6087
,
6103
,
6107
,
6194
,
6156
,
6005
,
6247
,
6814
,
6158
,
7122
,
6058
,
6189
,
7058
,
6059
,
6115
,
7079
,
7081
,
6833
,
7024
,
6108
,
13342
,
13345
,
13412
,
13343
,
13350
,
13346
,
13409
,
6009
,
6011
,
6012
,
6013
,
6014
,
6015
,
6019
,
6023
,
6024
,
6027
,
6029
,
6031
,
6050
,
6060
,
6068
,
6069
,
6089
,
6095
,
6105
,
6112
,
6130
,
6131
,
6132
,
6134
,
6161
,
6162
,
6163
,
6166
,
6182
,
6183
,
6185
,
6190
,
6212
,
6213
,
6231
,
6233
,
6234
,
6236
,
6238
,
6239
,
6240
,
6241
,
6242
,
6243
,
6244
,
6245
,
6354
,
7002
,
7005
,
7008
,
7010
,
7012
,
7013
,
7015
,
7016
,
7017
,
7018
,
7019
,
7020
,
7045
,
7046
,
7048
,
7049
,
7052
,
7054
,
7056
,
7064
,
7066
,
7076
,
7078
,
7083
,
7084
,
7085
,
7086
,
7087
,
7088
,
7089
,
7090
,
7099
,
7100
,
7101
,
7102
,
7103
,
7104
,
7105
,
7109
,
7124
,
7126
,
7136
,
7142
,
7143
,
7144
,
7145
,
7146
,
7147
,
7148
,
7150
,
7151
,
7152
,
7153
,
7154
,
7155
,
7156
,
7157
,
7047
,
7050
,
6253
,
6254
,
6255
,
6256
,
6257
,
6259
,
6260
,
6261
,
7170
,
7185
,
7186
,
6751
,
6755
,
6757
,
6759
,
6760
,
6763
,
6764
,
6765
,
6766
,
6767
,
6768
,
6769
,
6770
,
7502
,
7503
,
7504
,
7505
,
7510
,
7511
,
7512
,
7513
,
6806
,
6807
,
6808
,
6809
,
6810
,
6811
,
6812
,
6813
,
6815
,
6816
,
6817
,
6819
,
6823
,
6828
,
6831
,
6840
,
6845
,
6875
,
6879
,
6881
,
6888
,
6889
,
6947
,
6950
,
6956
,
6957
,
6959
,
10006
,
10008
,
10009
,
10010
,
10011
,
10016
,
10017
,
10018
,
10019
,
10020
,
10021
,
10022
,
10023
,
10024
,
10029
,
10030
,
10031
,
10032
,
10033
,
10034
,
10035
,
10036
,
10037
,
10038
,
10039
,
10040
,
10041
,
10042
,
10044
,
10045
,
10046
,
10051
,
10052
,
10053
,
10054
,
10055
,
10056
,
10057
,
10060
,
10066
,
10069
,
6820
,
6821
,
6822
,
13333
,
13334
,
13335
,
13336
,
13337
,
13338
,
13339
,
13340
,
13341
,
13351
,
13352
,
13353
,
13359
,
13361
,
13362
,
13363
,
13366
,
13367
,
13368
,
13369
,
13370
,
13371
,
13375
,
13376
,
5700
,
5702
,
13400
,
13401
,
13402
,
13403
,
13404
,
13406
,
13407
,
13408
,
13410
,
13417
,
13418
,
13419
,
13420
,
13422
,
13425
,
13427
,
13428
,
13429
,
13430
,
13431
,
13433
,
13434
,
13436
,
13437
,
13326
,
13330
,
13331
,
5717
,
13442
,
13451
,
13452
,
13455
,
13456
,
13457
,
13458
,
13459
,
13460
,
13461
,
13462
,
13463
,
13464
,
13465
,
13466
,
13467
,
13468
,
1104
,
1106
,
1107
,
1108
,
1109
,
1110
,
1111
,
1112
,
1113
,
1114
,
1115
,
1116
,
1117
,
1119
,
1120
,
1121
,
1122
,
1123
,
1124
,
1125
,
1126
,
1127
,
1128
,
1129
,
13812
,
13813
,
6740
,
1490
,
1491
]
need_gradient
:
false
table_id
:
0
-
async_pull
:
true
class
:
DenseInputAccessor
input
:
-
name
:
fc_1.w_0
shape
:
[
4488
,
511
]
-
name
:
fc_1.b_0
shape
:
[
511
]
-
name
:
fc_2.w_0
shape
:
[
511
,
255
]
-
name
:
fc_2.b_0
shape
:
[
255
]
-
name
:
fc_3.w_0
shape
:
[
255
,
255
]
-
name
:
fc_3.b_0
shape
:
[
255
]
-
name
:
fc_4.w_0
shape
:
[
255
,
127
]
-
name
:
fc_4.b_0
shape
:
[
127
]
-
name
:
fc_5.w_0
shape
:
[
127
,
127
]
-
name
:
fc_5.b_0
shape
:
[
127
]
-
name
:
fc_6.w_0
shape
:
[
127
,
127
]
-
name
:
fc_6.b_0
shape
:
[
127
]
-
name
:
fc_7.w_0
shape
:
[
127
,
127
]
-
name
:
fc_7.b_0
shape
:
[
127
]
-
name
:
ctr.w_0
shape
:
[
127
,
1
]
-
name
:
ctr.b_0
shape
:
[
1
]
need_gradient
:
true
table_id
:
1
-
async_pull
:
true
class
:
DenseInputAccessor
input
:
-
name
:
bn6048.batch_size
shape
:
[
4488
]
-
name
:
bn6048.batch_sum
shape
:
[
4488
]
-
name
:
bn6048.batch_square_sum
shape
:
[
4488
]
need_gradient
:
true
table_id
:
2
-
class
:
LabelInputAccessor
input
:
-
label_name
:
label_ctr
output_name
:
ctr.tmp_2
shape
:
[
-1
,
1
]
inputs
:
-
name
:
cvm_input
shape
:
[
-1
,
4488
]
labels
:
-
name
:
label_ctr
shape
:
[
-1
,
1
]
loss
:
loss_ctr
outputs
:
-
name
:
ctr.tmp_2
shape
:
[
-1
,
1
]
paddle/fluid/train/custom_trainer/feed/scripts/model/join/startup_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/join/test_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/update/main_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/update/model.yaml
0 → 100644
浏览文件 @
b410c4e1
input_accessor
:
-
class
:
AbacusSparseUpdateAccessor
input
:
-
name
:
cvm_input
slot_dim
:
9
slots
:
[
6048
,
6002
,
6145
,
6202
,
6201
,
6121
,
6738
,
6119
,
6146
,
6120
,
6147
,
6122
,
6123
,
6118
,
6142
,
6143
,
6008
,
6148
,
6151
,
6127
,
6144
,
6094
,
6083
,
6952
,
6739
,
6150
,
6109
,
6003
,
6099
,
6149
,
6129
,
6203
,
6153
,
6152
,
6128
,
6106
,
6251
,
7082
,
7515
,
6951
,
6949
,
7080
,
6066
,
7507
,
6186
,
6007
,
7514
,
6125
,
7506
,
10001
,
6006
,
7023
,
6085
,
10000
,
6098
,
6250
,
6110
,
6124
,
6090
,
6082
,
6067
,
6101
,
6004
,
6191
,
7075
,
6948
,
6157
,
6126
,
6188
,
7077
,
6070
,
6111
,
6087
,
6103
,
6107
,
6194
,
6156
,
6005
,
6247
,
6814
,
6158
,
7122
,
6058
,
6189
,
7058
,
6059
,
6115
,
7079
,
7081
,
6833
,
7024
,
6108
,
13342
,
13345
,
13412
,
13343
,
13350
,
13346
,
13409
,
6009
,
6011
,
6012
,
6013
,
6014
,
6015
,
6019
,
6023
,
6024
,
6027
,
6029
,
6031
,
6050
,
6060
,
6068
,
6069
,
6089
,
6095
,
6105
,
6112
,
6130
,
6131
,
6132
,
6134
,
6161
,
6162
,
6163
,
6166
,
6182
,
6183
,
6185
,
6190
,
6212
,
6213
,
6231
,
6233
,
6234
,
6236
,
6238
,
6239
,
6240
,
6241
,
6242
,
6243
,
6244
,
6245
,
6354
,
7002
,
7005
,
7008
,
7010
,
7012
,
7013
,
7015
,
7016
,
7017
,
7018
,
7019
,
7020
,
7045
,
7046
,
7048
,
7049
,
7052
,
7054
,
7056
,
7064
,
7066
,
7076
,
7078
,
7083
,
7084
,
7085
,
7086
,
7087
,
7088
,
7089
,
7090
,
7099
,
7100
,
7101
,
7102
,
7103
,
7104
,
7105
,
7109
,
7124
,
7126
,
7136
,
7142
,
7143
,
7144
,
7145
,
7146
,
7147
,
7148
,
7150
,
7151
,
7152
,
7153
,
7154
,
7155
,
7156
,
7157
,
7047
,
7050
,
6253
,
6254
,
6255
,
6256
,
6257
,
6259
,
6260
,
6261
,
7170
,
7185
,
7186
,
6751
,
6755
,
6757
,
6759
,
6760
,
6763
,
6764
,
6765
,
6766
,
6767
,
6768
,
6769
,
6770
,
7502
,
7503
,
7504
,
7505
,
7510
,
7511
,
7512
,
7513
,
6806
,
6807
,
6808
,
6809
,
6810
,
6811
,
6812
,
6813
,
6815
,
6816
,
6817
,
6819
,
6823
,
6828
,
6831
,
6840
,
6845
,
6875
,
6879
,
6881
,
6888
,
6889
,
6947
,
6950
,
6956
,
6957
,
6959
,
10006
,
10008
,
10009
,
10010
,
10011
,
10016
,
10017
,
10018
,
10019
,
10020
,
10021
,
10022
,
10023
,
10024
,
10029
,
10030
,
10031
,
10032
,
10033
,
10034
,
10035
,
10036
,
10037
,
10038
,
10039
,
10040
,
10041
,
10042
,
10044
,
10045
,
10046
,
10051
,
10052
,
10053
,
10054
,
10055
,
10056
,
10057
,
10060
,
10066
,
10069
,
6820
,
6821
,
6822
,
13333
,
13334
,
13335
,
13336
,
13337
,
13338
,
13339
,
13340
,
13341
,
13351
,
13352
,
13353
,
13359
,
13361
,
13362
,
13363
,
13366
,
13367
,
13368
,
13369
,
13370
,
13371
,
13375
,
13376
,
5700
,
5702
,
13400
,
13401
,
13402
,
13403
,
13404
,
13406
,
13407
,
13408
,
13410
,
13417
,
13418
,
13419
,
13420
,
13422
,
13425
,
13427
,
13428
,
13429
,
13430
,
13431
,
13433
,
13434
,
13436
,
13437
,
13326
,
13330
,
13331
,
5717
,
13442
,
13451
,
13452
,
13455
,
13456
,
13457
,
13458
,
13459
,
13460
,
13461
,
13462
,
13463
,
13464
,
13465
,
13466
,
13467
,
13468
,
1104
,
1106
,
1107
,
1108
,
1109
,
1110
,
1111
,
1112
,
1113
,
1114
,
1115
,
1116
,
1117
,
1119
,
1120
,
1121
,
1122
,
1123
,
1124
,
1125
,
1126
,
1127
,
1128
,
1129
,
13812
,
13813
,
6740
,
1490
,
1491
]
need_gradient
:
true
table_id
:
0
-
async_pull
:
true
class
:
DenseInputAccessor
input
:
-
name
:
fc_1.w_0
shape
:
[
3672
,
511
]
-
name
:
fc_1.b_0
shape
:
[
511
]
-
name
:
fc_2.w_0
shape
:
[
511
,
255
]
-
name
:
fc_2.b_0
shape
:
[
255
]
-
name
:
fc_3.w_0
shape
:
[
255
,
127
]
-
name
:
fc_3.b_0
shape
:
[
127
]
-
name
:
fc_4.w_0
shape
:
[
127
,
127
]
-
name
:
fc_4.b_0
shape
:
[
127
]
-
name
:
fc_5.w_0
shape
:
[
127
,
127
]
-
name
:
fc_5.b_0
shape
:
[
127
]
-
name
:
ctr.w_0
shape
:
[
127
,
1
]
-
name
:
ctr.b_0
shape
:
[
1
]
need_gradient
:
true
table_id
:
3
-
class
:
LabelInputAccessor
input
:
-
label_name
:
label_ctr
output_name
:
ctr.tmp_2
shape
:
[
-1
,
1
]
inputs
:
-
name
:
cvm_input
shape
:
[
-1
,
3672
]
labels
:
-
name
:
label_ctr
shape
:
[
-1
,
1
]
loss
:
loss_ctr
outputs
:
-
name
:
ctr.tmp_2
shape
:
[
-1
,
1
]
paddle/fluid/train/custom_trainer/feed/scripts/model/update/startup_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/model/update/test_program
0 → 100644
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/scripts/update.py
0 → 100644
浏览文件 @
b410c4e1
#!/usr/bin/env python
#-*- coding:utf-8 -*-
"""
This is an example of network building
"""
from
__future__
import
print_function
,
division
import
paddle
from
paddle
import
fluid
def
sparse_cvm_dim
(
sparse_info
):
return
sparse_info
[
'slot_dim'
]
*
len
(
sparse_info
[
'slots'
])
def
inference
():
"""Build inference network(without loss and optimizer)
Returns:
list<Variable>: inputs
and
list<Variable>: outputs
"""
sparse_cvm
=
{
"name"
:
"cvm_input"
,
"slot_dim"
:
9
,
"slots"
:
[
6048
,
6002
,
6145
,
6202
,
6201
,
6121
,
6738
,
6119
,
6146
,
6120
,
6147
,
6122
,
6123
,
6118
,
6142
,
6143
,
6008
,
6148
,
6151
,
6127
,
6144
,
6094
,
6083
,
6952
,
6739
,
6150
,
6109
,
6003
,
6099
,
6149
,
6129
,
6203
,
6153
,
6152
,
6128
,
6106
,
6251
,
7082
,
7515
,
6951
,
6949
,
7080
,
6066
,
7507
,
6186
,
6007
,
7514
,
6125
,
7506
,
10001
,
6006
,
7023
,
6085
,
10000
,
6098
,
6250
,
6110
,
6124
,
6090
,
6082
,
6067
,
6101
,
6004
,
6191
,
7075
,
6948
,
6157
,
6126
,
6188
,
7077
,
6070
,
6111
,
6087
,
6103
,
6107
,
6194
,
6156
,
6005
,
6247
,
6814
,
6158
,
7122
,
6058
,
6189
,
7058
,
6059
,
6115
,
7079
,
7081
,
6833
,
7024
,
6108
,
13342
,
13345
,
13412
,
13343
,
13350
,
13346
,
13409
,
6009
,
6011
,
6012
,
6013
,
6014
,
6015
,
6019
,
6023
,
6024
,
6027
,
6029
,
6031
,
6050
,
6060
,
6068
,
6069
,
6089
,
6095
,
6105
,
6112
,
6130
,
6131
,
6132
,
6134
,
6161
,
6162
,
6163
,
6166
,
6182
,
6183
,
6185
,
6190
,
6212
,
6213
,
6231
,
6233
,
6234
,
6236
,
6238
,
6239
,
6240
,
6241
,
6242
,
6243
,
6244
,
6245
,
6354
,
7002
,
7005
,
7008
,
7010
,
7012
,
7013
,
7015
,
7016
,
7017
,
7018
,
7019
,
7020
,
7045
,
7046
,
7048
,
7049
,
7052
,
7054
,
7056
,
7064
,
7066
,
7076
,
7078
,
7083
,
7084
,
7085
,
7086
,
7087
,
7088
,
7089
,
7090
,
7099
,
7100
,
7101
,
7102
,
7103
,
7104
,
7105
,
7109
,
7124
,
7126
,
7136
,
7142
,
7143
,
7144
,
7145
,
7146
,
7147
,
7148
,
7150
,
7151
,
7152
,
7153
,
7154
,
7155
,
7156
,
7157
,
7047
,
7050
,
6253
,
6254
,
6255
,
6256
,
6257
,
6259
,
6260
,
6261
,
7170
,
7185
,
7186
,
6751
,
6755
,
6757
,
6759
,
6760
,
6763
,
6764
,
6765
,
6766
,
6767
,
6768
,
6769
,
6770
,
7502
,
7503
,
7504
,
7505
,
7510
,
7511
,
7512
,
7513
,
6806
,
6807
,
6808
,
6809
,
6810
,
6811
,
6812
,
6813
,
6815
,
6816
,
6817
,
6819
,
6823
,
6828
,
6831
,
6840
,
6845
,
6875
,
6879
,
6881
,
6888
,
6889
,
6947
,
6950
,
6956
,
6957
,
6959
,
10006
,
10008
,
10009
,
10010
,
10011
,
10016
,
10017
,
10018
,
10019
,
10020
,
10021
,
10022
,
10023
,
10024
,
10029
,
10030
,
10031
,
10032
,
10033
,
10034
,
10035
,
10036
,
10037
,
10038
,
10039
,
10040
,
10041
,
10042
,
10044
,
10045
,
10046
,
10051
,
10052
,
10053
,
10054
,
10055
,
10056
,
10057
,
10060
,
10066
,
10069
,
6820
,
6821
,
6822
,
13333
,
13334
,
13335
,
13336
,
13337
,
13338
,
13339
,
13340
,
13341
,
13351
,
13352
,
13353
,
13359
,
13361
,
13362
,
13363
,
13366
,
13367
,
13368
,
13369
,
13370
,
13371
,
13375
,
13376
,
5700
,
5702
,
13400
,
13401
,
13402
,
13403
,
13404
,
13406
,
13407
,
13408
,
13410
,
13417
,
13418
,
13419
,
13420
,
13422
,
13425
,
13427
,
13428
,
13429
,
13430
,
13431
,
13433
,
13434
,
13436
,
13437
,
13326
,
13330
,
13331
,
5717
,
13442
,
13451
,
13452
,
13455
,
13456
,
13457
,
13458
,
13459
,
13460
,
13461
,
13462
,
13463
,
13464
,
13465
,
13466
,
13467
,
13468
,
1104
,
1106
,
1107
,
1108
,
1109
,
1110
,
1111
,
1112
,
1113
,
1114
,
1115
,
1116
,
1117
,
1119
,
1120
,
1121
,
1122
,
1123
,
1124
,
1125
,
1126
,
1127
,
1128
,
1129
,
13812
,
13813
,
6740
,
1490
,
1491
]}
# TODO: build network here
cvm_input
=
fluid
.
layers
.
data
(
name
=
'cvm_input'
,
shape
=
[
sparse_cvm_dim
(
sparse_cvm
)],
dtype
=
'float32'
,
stop_gradient
=
False
)
net
=
cvm_input
net
=
fluid
.
layers
.
fc
(
net
,
511
,
act
=
'relu'
,
name
=
'fc_1'
)
net
=
fluid
.
layers
.
fc
(
net
,
255
,
act
=
'relu'
,
name
=
'fc_2'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_3'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_4'
)
net
=
fluid
.
layers
.
fc
(
net
,
127
,
act
=
'relu'
,
name
=
'fc_5'
)
ctr_output
=
fluid
.
layers
.
fc
(
net
,
1
,
act
=
'sigmoid'
,
name
=
'ctr'
)
accessors
=
[
{
"class"
:
"AbacusSparseUpdateAccessor"
,
"input"
:
"sparses"
,
"table_id"
:
0
,
"need_gradient"
:
True
},
{
"class"
:
"DenseInputAccessor"
,
"input"
:
"vars"
,
"table_id"
:
3
,
"need_gradient"
:
True
,
"async_pull"
:
True
},
{
"class"
:
"LabelInputAccessor"
,
"input"
:
"labels"
}
]
return
accessors
,
[
sparse_cvm
],
[
cvm_input
],
[
ctr_output
]
def
loss_function
(
ctr_output
):
"""
Args:
*outputs: the second result of inference()
Returns:
Variable: loss
and
list<Variable>: labels
"""
# TODO: calc loss here
label
=
fluid
.
layers
.
data
(
name
=
'label_ctr'
,
shape
=
ctr_output
.
shape
,
dtype
=
'float32'
)
loss
=
fluid
.
layers
.
square_error_cost
(
input
=
ctr_output
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
,
name
=
'loss_ctr'
)
return
loss
,
[
label
]
paddle/fluid/train/custom_trainer/feed/tool/format_newcate_hotnews.awk
0 → 100755
浏览文件 @
b410c4e1
#!/bin/awk -f
{
if
(
$1
!~
/^
([
0-9a-zA-Z
])
+$/
||
$2
!~
/^
([
0-9
])
+$/
||
$3
!~
/^
([
0-9
])
+$/
)
{
next
;
}
show
=
$2
;
clk
=
$3
;
if
(
clk
>
show
)
{
clk
=
show
;
}
for
(
i
=
0
;
i
<
clk
;
i
++
)
{
$2
=
"1"
;
$3
=
"1"
;
print
$0
;
}
for
(
i
=
0
;
i
<
show
-
clk
;
i
++
)
{
$2
=
"1"
;
$3
=
"0"
;
print
$0
;
}
}
paddle/fluid/train/custom_trainer/feed/tool/gdbinit
0 → 100644
浏览文件 @
b410c4e1
#
# STL GDB evaluators/views/utilities - 1.03
#
# The new GDB commands:
# are entirely non instrumental
# do not depend on any "inline"(s) - e.g. size(), [], etc
# are extremely tolerant to debugger settings
#
# This file should be "included" in .gdbinit as following:
# source stl-views.gdb or just paste it into your .gdbinit file
#
# The following STL containers are currently supported:
#
# std::vector<T> -- via pvector command
# std::list<T> -- via plist or plist_member command
# std::map<T,T> -- via pmap or pmap_member command
# std::multimap<T,T> -- via pmap or pmap_member command
# std::set<T> -- via pset command
# std::multiset<T> -- via pset command
# std::deque<T> -- via pdequeue command
# std::stack<T> -- via pstack command
# std::queue<T> -- via pqueue command
# std::priority_queue<T> -- via ppqueue command
# std::bitset<n> -- via pbitset command
# std::string -- via pstring command
# std::widestring -- via pwstring command
#
# The end of this file contains (optional) C++ beautifiers
# Make sure your debugger supports $argc
#
# Simple GDB Macros writen by Dan Marinescu (H-PhD) - License GPL
# Inspired by intial work of Tom Malnar,
# Tony Novac (PhD) / Cornell / Stanford,
# Gilad Mishne (PhD) and Many Many Others.
# Contact: dan_c_marinescu@yahoo.com (Subject: STL)
#
# Modified to work with g++ 4.3 by Anders Elton
# Also added _member functions, that instead of printing the entire class in map, prints a member.
#
# std::vector<>
#
define pvector
if $argc == 0
help pvector
else
set $size = $arg0._M_impl._M_finish - $arg0._M_impl._M_start
set $capacity = $arg0._M_impl._M_end_of_storage - $arg0._M_impl._M_start
set $size_max = $size - 1
end
if $argc == 1
set $i = 0
while $i < $size
printf "elem[%u]: ", $i
p *($arg0._M_impl._M_start + $i)
set $i++
end
end
if $argc == 2
set $idx = $arg1
if $idx < 0 || $idx > $size_max
printf "idx1, idx2 are not in acceptable range: [0..%u].\n", $size_max
else
printf "elem[%u]: ", $idx
p *($arg0._M_impl._M_start + $idx)
end
end
if $argc == 3
set $start_idx = $arg1
set $stop_idx = $arg2
if $start_idx > $stop_idx
set $tmp_idx = $start_idx
set $start_idx = $stop_idx
set $stop_idx = $tmp_idx
end
if $start_idx < 0 || $stop_idx < 0 || $start_idx > $size_max || $stop_idx > $size_max
printf "idx1, idx2 are not in acceptable range: [0..%u].\n", $size_max
else
set $i = $start_idx
while $i <= $stop_idx
printf "elem[%u]: ", $i
p *($arg0._M_impl._M_start + $i)
set $i++
end
end
end
if $argc > 0
printf "Vector size = %u\n", $size
printf "Vector capacity = %u\n", $capacity
printf "Element "
whatis $arg0._M_impl._M_start
end
end
document pvector
Prints std::vector<T> information.
Syntax: pvector <vector> <idx1> <idx2>
Note: idx, idx1 and idx2 must be in acceptable range [0..<vector>.size()-1].
Examples:
pvector v - Prints vector content, size, capacity and T typedef
pvector v 0 - Prints element[idx] from vector
pvector v 1 2 - Prints elements in range [idx1..idx2] from vector
end
#
# std::list<>
#
define plist
if $argc == 0
help plist
else
set $head = &$arg0._M_impl._M_node
set $current = $arg0._M_impl._M_node._M_next
set $size = 0
while $current != $head
if $argc == 2
printf "elem[%u]: ", $size
p *($arg1*)($current + 1)
end
if $argc == 3
if $size == $arg2
printf "elem[%u]: ", $size
p *($arg1*)($current + 1)
end
end
set $current = $current._M_next
set $size++
end
printf "List size = %u \n", $size
if $argc == 1
printf "List "
whatis $arg0
printf "Use plist <variable_name> <element_type> to see the elements in the list.\n"
end
end
end
document plist
Prints std::list<T> information.
Syntax: plist <list> <T> <idx>: Prints list size, if T defined all elements or just element at idx
Examples:
plist l - prints list size and definition
plist l int - prints all elements and list size
plist l int 2 - prints the third element in the list (if exists) and list size
end
define plist_member
if $argc == 0
help plist_member
else
set $head = &$arg0._M_impl._M_node
set $current = $arg0._M_impl._M_node._M_next
set $size = 0
while $current != $head
if $argc == 3
printf "elem[%u]: ", $size
p (*($arg1*)($current + 1)).$arg2
end
if $argc == 4
if $size == $arg3
printf "elem[%u]: ", $size
p (*($arg1*)($current + 1)).$arg2
end
end
set $current = $current._M_next
set $size++
end
printf "List size = %u \n", $size
if $argc == 1
printf "List "
whatis $arg0
printf "Use plist_member <variable_name> <element_type> <member> to see the elements in the list.\n"
end
end
end
document plist_member
Prints std::list<T> information.
Syntax: plist <list> <T> <idx>: Prints list size, if T defined all elements or just element at idx
Examples:
plist_member l int member - prints all elements and list size
plist_member l int member 2 - prints the third element in the list (if exists) and list size
end
#
# std::map and std::multimap
#
define pmap
if $argc == 0
help pmap
else
set $tree = $arg0
set $i = 0
set $node = $tree._M_t._M_impl._M_header._M_left
set $end = $tree._M_t._M_impl._M_header
set $tree_size = $tree._M_t._M_impl._M_node_count
if $argc == 1
printf "Map "
whatis $tree
printf "Use pmap <variable_name> <left_element_type> <right_element_type> to see the elements in the map.\n"
end
if $argc == 3
while $i < $tree_size
set $value = (void *)($node + 1)
printf "elem[%u].left: ", $i
p *($arg1*)$value
set $value = $value + sizeof($arg1)
printf "elem[%u].right: ", $i
p *($arg2*)$value
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
end
if $argc == 4
set $idx = $arg3
set $ElementsFound = 0
while $i < $tree_size
set $value = (void *)($node + 1)
if *($arg1*)$value == $idx
printf "elem[%u].left: ", $i
p *($arg1*)$value
set $value = $value + sizeof($arg1)
printf "elem[%u].right: ", $i
p *($arg2*)$value
set $ElementsFound++
end
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
printf "Number of elements found = %u\n", $ElementsFound
end
if $argc == 5
set $idx1 = $arg3
set $idx2 = $arg4
set $ElementsFound = 0
while $i < $tree_size
set $value = (void *)($node + 1)
set $valueLeft = *($arg1*)$value
set $valueRight = *($arg2*)($value + sizeof($arg1))
if $valueLeft == $idx1 && $valueRight == $idx2
printf "elem[%u].left: ", $i
p $valueLeft
printf "elem[%u].right: ", $i
p $valueRight
set $ElementsFound++
end
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
printf "Number of elements found = %u\n", $ElementsFound
end
printf "Map size = %u\n", $tree_size
end
end
document pmap
Prints std::map<TLeft and TRight> or std::multimap<TLeft and TRight> information. Works for std::multimap as well.
Syntax: pmap <map> <TtypeLeft> <TypeRight> <valLeft> <valRight>: Prints map size, if T defined all elements or just element(s) with val(s)
Examples:
pmap m - prints map size and definition
pmap m int int - prints all elements and map size
pmap m int int 20 - prints the element(s) with left-value = 20 (if any) and map size
pmap m int int 20 200 - prints the element(s) with left-value = 20 and right-value = 200 (if any) and map size
end
define pmap_member
if $argc == 0
help pmap_member
else
set $tree = $arg0
set $i = 0
set $node = $tree._M_t._M_impl._M_header._M_left
set $end = $tree._M_t._M_impl._M_header
set $tree_size = $tree._M_t._M_impl._M_node_count
if $argc == 1
printf "Map "
whatis $tree
printf "Use pmap <variable_name> <left_element_type> <right_element_type> to see the elements in the map.\n"
end
if $argc == 5
while $i < $tree_size
set $value = (void *)($node + 1)
printf "elem[%u].left: ", $i
p (*($arg1*)$value).$arg2
set $value = $value + sizeof($arg1)
printf "elem[%u].right: ", $i
p (*($arg3*)$value).$arg4
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
end
if $argc == 6
set $idx = $arg5
set $ElementsFound = 0
while $i < $tree_size
set $value = (void *)($node + 1)
if *($arg1*)$value == $idx
printf "elem[%u].left: ", $i
p (*($arg1*)$value).$arg2
set $value = $value + sizeof($arg1)
printf "elem[%u].right: ", $i
p (*($arg3*)$value).$arg4
set $ElementsFound++
end
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
printf "Number of elements found = %u\n", $ElementsFound
end
printf "Map size = %u\n", $tree_size
end
end
document pmap_member
Prints std::map<TLeft and TRight> or std::multimap<TLeft and TRight> information. Works for std::multimap as well.
Syntax: pmap <map> <TtypeLeft> <TypeRight> <valLeft> <valRight>: Prints map size, if T defined all elements or just element(s) with val(s)
Examples:
pmap_member m class1 member1 class2 member2 - prints class1.member1 : class2.member2
pmap_member m class1 member1 class2 member2 lvalue - prints class1.member1 : class2.member2 where class1 == lvalue
end
#
# std::set and std::multiset
#
define pset
if $argc == 0
help pset
else
set $tree = $arg0
set $i = 0
set $node = $tree._M_t._M_impl._M_header._M_left
set $end = $tree._M_t._M_impl._M_header
set $tree_size = $tree._M_t._M_impl._M_node_count
if $argc == 1
printf "Set "
whatis $tree
printf "Use pset <variable_name> <element_type> to see the elements in the set.\n"
end
if $argc == 2
while $i < $tree_size
set $value = (void *)($node + 1)
printf "elem[%u]: ", $i
p *($arg1*)$value
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
end
if $argc == 3
set $idx = $arg2
set $ElementsFound = 0
while $i < $tree_size
set $value = (void *)($node + 1)
if *($arg1*)$value == $idx
printf "elem[%u]: ", $i
p *($arg1*)$value
set $ElementsFound++
end
if $node._M_right != 0
set $node = $node._M_right
while $node._M_left != 0
set $node = $node._M_left
end
else
set $tmp_node = $node._M_parent
while $node == $tmp_node._M_right
set $node = $tmp_node
set $tmp_node = $tmp_node._M_parent
end
if $node._M_right != $tmp_node
set $node = $tmp_node
end
end
set $i++
end
printf "Number of elements found = %u\n", $ElementsFound
end
printf "Set size = %u\n", $tree_size
end
end
document pset
Prints std::set<T> or std::multiset<T> information. Works for std::multiset as well.
Syntax: pset <set> <T> <val>: Prints set size, if T defined all elements or just element(s) having val
Examples:
pset s - prints set size and definition
pset s int - prints all elements and the size of s
pset s int 20 - prints the element(s) with value = 20 (if any) and the size of s
end
#
# std::dequeue
#
define pdequeue
if $argc == 0
help pdequeue
else
set $size = 0
set $start_cur = $arg0._M_impl._M_start._M_cur
set $start_last = $arg0._M_impl._M_start._M_last
set $start_stop = $start_last
while $start_cur != $start_stop
p *$start_cur
set $start_cur++
set $size++
end
set $finish_first = $arg0._M_impl._M_finish._M_first
set $finish_cur = $arg0._M_impl._M_finish._M_cur
set $finish_last = $arg0._M_impl._M_finish._M_last
if $finish_cur < $finish_last
set $finish_stop = $finish_cur
else
set $finish_stop = $finish_last
end
while $finish_first != $finish_stop
p *$finish_first
set $finish_first++
set $size++
end
printf "Dequeue size = %u\n", $size
end
end
document pdequeue
Prints std::dequeue<T> information.
Syntax: pdequeue <dequeue>: Prints dequeue size, if T defined all elements
Deque elements are listed "left to right" (left-most stands for front and right-most stands for back)
Example:
pdequeue d - prints all elements and size of d
end
#
# std::stack
#
define pstack
if $argc == 0
help pstack
else
set $start_cur = $arg0.c._M_impl._M_start._M_cur
set $finish_cur = $arg0.c._M_impl._M_finish._M_cur
set $size = $finish_cur - $start_cur
set $i = $size - 1
while $i >= 0
p *($start_cur + $i)
set $i--
end
printf "Stack size = %u\n", $size
end
end
document pstack
Prints std::stack<T> information.
Syntax: pstack <stack>: Prints all elements and size of the stack
Stack elements are listed "top to buttom" (top-most element is the first to come on pop)
Example:
pstack s - prints all elements and the size of s
end
#
# std::queue
#
define pqueue
if $argc == 0
help pqueue
else
set $start_cur = $arg0.c._M_impl._M_start._M_cur
set $finish_cur = $arg0.c._M_impl._M_finish._M_cur
set $size = $finish_cur - $start_cur
set $i = 0
while $i < $size
p *($start_cur + $i)
set $i++
end
printf "Queue size = %u\n", $size
end
end
document pqueue
Prints std::queue<T> information.
Syntax: pqueue <queue>: Prints all elements and the size of the queue
Queue elements are listed "top to bottom" (top-most element is the first to come on pop)
Example:
pqueue q - prints all elements and the size of q
end
#
# std::priority_queue
#
define ppqueue
if $argc == 0
help ppqueue
else
set $size = $arg0.c._M_impl._M_finish - $arg0.c._M_impl._M_start
set $capacity = $arg0.c._M_impl._M_end_of_storage - $arg0.c._M_impl._M_start
set $i = $size - 1
while $i >= 0
p *($arg0.c._M_impl._M_start + $i)
set $i--
end
printf "Priority queue size = %u\n", $size
printf "Priority queue capacity = %u\n", $capacity
end
end
document ppqueue
Prints std::priority_queue<T> information.
Syntax: ppqueue <priority_queue>: Prints all elements, size and capacity of the priority_queue
Priority_queue elements are listed "top to buttom" (top-most element is the first to come on pop)
Example:
ppqueue pq - prints all elements, size and capacity of pq
end
#
# std::bitset
#
define pbitset
if $argc == 0
help pbitset
else
p /t $arg0._M_w
end
end
document pbitset
Prints std::bitset<n> information.
Syntax: pbitset <bitset>: Prints all bits in bitset
Example:
pbitset b - prints all bits in b
end
#
# std::string
#
define pstring
if $argc == 0
help pstring
else
printf "String \t\t\t= \"%s\"\n", $arg0._M_data()
printf "String size/length \t= %u\n", $arg0._M_rep()._M_length
printf "String capacity \t= %u\n", $arg0._M_rep()._M_capacity
printf "String ref-count \t= %d\n", $arg0._M_rep()._M_refcount
end
end
document pstring
Prints std::string information.
Syntax: pstring <string>
Example:
pstring s - Prints content, size/length, capacity and ref-count of string s
end
#
# std::wstring
#
define pwstring
if $argc == 0
help pwstring
else
call printf("WString \t\t= \"%ls\"\n", $arg0._M_data())
printf "WString size/length \t= %u\n", $arg0._M_rep()._M_length
printf "WString capacity \t= %u\n", $arg0._M_rep()._M_capacity
printf "WString ref-count \t= %d\n", $arg0._M_rep()._M_refcount
end
end
document pwstring
Prints std::wstring information.
Syntax: pwstring <wstring>
Example:
pwstring s - Prints content, size/length, capacity and ref-count of wstring s
end
#
# C++ related beautifiers (optional)
#
set print pretty on
set print object on
set print static-members on
set print vtbl on
set print demangle on
set demangle-style gnu-v3
set print sevenbit-strings off
set follow-fork-mode child
set detach-on-fork off
paddle/fluid/train/custom_trainer/feed/tool/ins_weight.py
0 → 100755
浏览文件 @
b410c4e1
#!/usr/bin/python
import
sys
import
re
import
math
del_text_slot
=
True
g_ratio
=
1
w_ratio
=
0.01
slots_str
=
"6048 6145 6202 6201 6121 6119 6146 6120 6147 6122 6123 6118 6142 6143 6008 6148 6151 6127 6144 6150 6109 6003 6096 6149 6129 6203 6153 6152 6128 6106 6251 7082 7515 7080 6066 7507 6186 6007 7514 6054 6125 7506 10001 6006 6080 7023 6085 10000 6250 6110 6124 6090 6082 6067 7516 6101 6004 6191 6188 6070 6194 6247 6814 7512 10007 6058 6189 6059 7517 10005 7510 7024 7502 7503 6183 7511 6060 6806 7504 6185 6810 6248 10004 6815 6182 10068 6069 6073 6196 6816 7513 6071 6809 6072 6817 6190 7505 6813 6192 6807 6808 6195 6826 6184 6197 6068 6812 7107 6811 6823 6824 6819 6818 6821 6822 6820 6094 6083 6952 6099 6951 6949 6098 7075 6948 6157 6126 7077 6111 6087 6103 6107 6156 6005 6158 7122 6155 7058 6115 7079 7081 6833 6108 6840 6837 7147 7129 6097 6231 6957 7145 6956 7143 6130 7149 7142 6212 6827 7144 6089 6161 7055 6233 6105 7057 6237 6828 6850 6163 7124 6354 6162 7146 6830 7123 6160 6235 7056 6081 6841 6132 6954 6131 6236 6831 6845 6832 6953 6839 6950 7125 7054 6138 6166 6076 6851 6353 7076 7148 6858 6842 6860 7126 6829 6835 7078 6866 6869 6871 7052 6134 6855 6947 6862 6215 6852 7128 6092 6112 6213 6232 6863 6113 6165 6214 6216 6873 6865 6870 6077 6234 6861 6164 6217 7127 6218 6962 7053 7051 6961 6002 6738 6739 10105 7064 6751 6770 7100 6014 6765 6755 10021 10022 6010 10056 6011 6756 10055 6768 10024 6023 10003 6769 10002 6767 6759 10018 6024 6064 6012 6050 10042 6168 6253 10010 10020 6015 6018 10033 10041 10039 10031 10016 6764 7083 7152 7066 6171 7150 7085 6255 10044 10008 7102 6167 6240 6238 6095 10017 10046 6019 6031 6763 6256 6169 6254 10034 7108 7186 6257 10019 6757 10040 6025 7019 7086 10029 10011 7104 6261 6013 6766 10106 7105 7153 7089 6057 7134 7151 7045 7005 7008 7101 6035 7137 10023 6036 6172 7099 7087 6239 7185 6170 10006 6243 6350 7103 7090 7157 6259 7171 6875 7084 7154 6242 6260 7155 7017 7048 7156 6959 7047 10053 7135 6244 7136 10030 7063 6760 7016 7065 7179 6881 7018 6876 10081 10052 10054 10038 6886 10069 7004 10051 7007 7109 10057 6029 6888 10009 6889 7021 10047 6245 6878 10067 6879 6884 7180 7182 10071 7002 6880 6890 6887 10061 6027 6877 6892 10060 6893 7050 10036 7049 10012 10025 7012 7183 10058 7181 10086 6891 6258 6894 6883 7046 6037 7106 10043 10048 10045 10087 6885 10013 10028 7187 10037 10035 10050 6895 7011 7170 7172 10026 10063 10095 10082 10084 6960 10092 10075 6038 7010 7015 10015 10027 10064 7184 10014 10059 7013 7020 10072 10066 10080 6896 10083 10090 6039 10049 7164 7165 10091 10099 6963 7166 10079 10103 7006 7009 7169 6034 7028 7029 7030 7034 7035 7036 7040 7041 7042 10032 6009 6241 7003 7014 7088 13326 13330 13331 13352 13353 6198"
slot_whitelist
=
slots_str
.
split
(
" "
)
def
calc_ins_weight
(
params
,
label
):
"""calc ins weight"""
global
g_ratio
global
w_ratio
slots
=
[]
s_clk_num
=
0
s_show_num
=
0
active
=
0
attclk_num
=
0
attshow_num
=
0
attclk_avg
=
0
for
items
in
params
:
if
len
(
items
)
!=
2
:
continue
slot_name
=
items
[
0
]
slot_val
=
items
[
1
]
if
slot_name
not
in
slots
:
slots
.
append
(
slot_name
)
if
slot_name
==
"session_click_num"
:
s_clk_num
=
int
(
slot_val
)
if
slot_name
==
"session_show_num"
:
s_show_num
=
int
(
slot_val
)
if
slot_name
==
"activity"
:
active
=
float
(
slot_val
)
/
10000.0
w
=
1
# for inactive user
if
active
>=
0
and
active
<
0.4
and
s_show_num
>=
0
and
s_show_num
<
20
:
w
=
math
.
log
(
w_ratio
*
(
420
-
(
active
*
50
+
1
)
*
(
s_show_num
+
1
))
+
math
.
e
)
if
label
==
"0"
:
w
=
1
+
(
w
-
1
)
*
g_ratio
return
w
def
filter_whitelist_slot
(
tmp_line
):
terms
=
tmp_line
.
split
()
line
=
"%s %s %s"
%
(
terms
[
0
],
terms
[
1
],
terms
[
2
])
for
item
in
terms
[
3
:]:
feasign
=
item
.
split
(
':'
)
if
len
(
feasign
)
==
2
and
\
feasign
[
1
]
in
slot_whitelist
:
line
=
"%s %s"
%
(
line
,
item
)
return
line
def
get_sample_type
(
line
):
# vertical_type = 20
# if line.find("13038012583501790:6738") > 0:
# return 30
# vertical_type = 0/5/1/2/9/11/13/16/29/-1
if
(
line
.
find
(
"7408512894065610:6738"
)
>
0
)
or
\
(
line
.
find
(
"8815887816424655:6738"
)
>
0
)
or
\
(
line
.
find
(
"7689987878537419:6738"
)
>
0
)
or
\
(
line
.
find
(
"7971462863009228:6738"
)
>
0
)
or
\
(
line
.
find
(
"9941787754311891:6738"
)
>
0
)
or
\
(
line
.
find
(
"10504737723255509:6738"
)
>
0
)
or
\
(
line
.
find
(
"11067687692199127:6738"
)
>
0
)
or
\
(
line
.
find
(
"11912112645614554:6738"
)
>
0
)
or
\
(
line
.
find
(
"15571287443748071:6738"
)
>
0
)
or
\
(
line
.
find
(
"7127025017546227:6738"
)
>
0
):
return
20
return
-
1
def
main
():
"""ins adjust"""
global
del_text_slot
for
l
in
sys
.
stdin
:
l
=
l
.
rstrip
(
"
\n
"
)
items
=
l
.
split
(
" "
)
if
len
(
items
)
<
3
:
continue
label
=
items
[
2
]
lines
=
l
.
split
(
"
\t
"
)
line
=
lines
[
0
]
# streaming ins include all ins, sample_type only handle NEWS ins
sample_type
=
-
1
if
'NEWS'
in
l
:
sample_type
=
get_sample_type
(
line
)
#line = filter_whitelist_slot(tmp_line)
if
len
(
lines
)
>=
4
:
if
'VIDEO'
in
lines
[
3
]:
continue
params
=
lines
[
2
]
params
=
params
.
split
(
" "
)
m
=
[
tuple
(
i
.
split
(
":"
))
for
i
in
params
]
if
m
is
None
or
len
(
m
)
==
0
:
if
sample_type
>
0
:
print
"%s $%s *1"
%
(
line
,
sample_type
)
else
:
print
"%s *1"
%
line
sys
.
stdout
.
flush
()
continue
weight
=
calc_ins_weight
(
m
,
label
)
if
sample_type
>
0
:
print
"%s $%s *%s"
%
(
line
,
sample_type
,
weight
)
else
:
print
"%s *%s"
%
(
line
,
weight
)
sys
.
stdout
.
flush
()
else
:
if
sample_type
>
0
:
print
"%s $%s *1"
%
(
line
,
sample_type
)
else
:
print
"%s *1"
%
line
sys
.
stdout
.
flush
()
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
>
1
:
if
sys
.
argv
[
1
]
==
"0"
:
del_text_slot
=
False
if
len
(
sys
.
argv
)
>
2
:
g_ratio
=
float
(
sys
.
argv
[
2
])
if
len
(
sys
.
argv
)
>
3
:
w_ratio
=
float
(
sys
.
argv
[
3
])
main
()
paddle/fluid/train/custom_trainer/feed/tool/xbox_compressor_mf.py
0 → 100755
浏览文件 @
b410c4e1
#!/usr/bin/python
"""
xbox model compressor
"""
import
sys
import
math
import
time
import
re
#WISE
#SHOW_COMPRESS_RATIO : 8192
#CLICK_COMPRESS_RATIO : 8192
#LR_COMPRESS_RATIO : 1048576
#MIO_COMPRESS_RATIO:8192
#PC
#MIO_COMPRESS_RATIO : 1024
#SHOW_COMPRESS_RATIO : 128
#CLICK_COMPRESS_RATIO : 1024
#LR_COMPRESS_RATIO : 8192
#STAMP_COL = 2
SHOW_COL
=
3
CLICK_COL
=
4
LR_W_COL
=
5
LR_G2SUM_COL
=
6
FM_COL
=
9
#DAY_SPAN = 300
#show clk lr = float
SHOW_RATIO
=
1
#SHOW_RATIO = 1024
CLK_RATIO
=
8
#CLK_RATIO = 1024
LR_RATIO
=
1024
MF_RATIO
=
1024
base_update_threshold
=
0.965
base_xbox_clk_cof
=
1
base_xbox_nonclk_cof
=
0.2
def
as_num
(
x
):
y
=
'{:.5f}'
.
format
(
x
)
return
(
y
)
def
compress_show
(
xx
):
"""
compress show
"""
preci
=
SHOW_RATIO
x
=
float
(
xx
)
return
str
(
int
(
math
.
floor
(
x
*
preci
+
0.5
)))
def
compress_clk
(
xx
):
"""
compress clk
"""
preci
=
CLK_RATIO
x
=
float
(
xx
)
clk
=
int
(
math
.
floor
(
x
*
preci
+
0.5
))
if
clk
==
0
:
return
""
return
str
(
clk
)
def
compress_lr
(
xx
):
"""
compress lr
"""
preci
=
LR_RATIO
x
=
float
(
xx
)
lr
=
int
(
math
.
floor
(
x
*
preci
+
0.5
))
if
lr
==
0
:
return
""
return
str
(
lr
)
def
compress_mf
(
xx
):
"""
compress mf
"""
preci
=
MF_RATIO
x
=
float
(
xx
)
return
int
(
math
.
floor
(
x
*
preci
+
0.5
))
def
show_clk_score
(
show
,
clk
):
"""
calculate show_clk score
"""
return
(
show
-
clk
)
*
0.2
+
clk
for
l
in
sys
.
stdin
:
cols
=
re
.
split
(
r
'\s+'
,
l
.
strip
())
key
=
cols
[
0
].
strip
()
#day = int(cols[STAMP_COL].strip())
#cur_day = int(time.time()/3600/24)
#if (day + DAY_SPAN) <= cur_day:
# continue
# cvm features
show
=
cols
[
SHOW_COL
]
click
=
cols
[
CLICK_COL
]
pred
=
""
f_show
=
float
(
show
)
f_clk
=
float
(
click
)
"""
if f_show != 0:
show_log = math.log(f_show)
else:
show_log = 0
if f_clk != 0:
click_log = math.log(f_clk) - show_log
else:
click_log = 0
"""
show_log
=
f_show
click_log
=
f_clk
#print f_show, f_clk
#if show_clk_score(f_show, f_clk) < base_update_threshold:
# continue
#show = compress_show(show)
show
=
compress_show
(
show_log
)
#clk = compress_clk(click)
clk
=
compress_clk
(
click_log
)
# personal lr weight
lr_w
=
cols
[
LR_W_COL
].
strip
()
lr_wei
=
compress_lr
(
lr_w
)
# fm weight
fm_wei
=
[]
fm_sum
=
0
if
len
(
cols
)
>
7
:
#fm_dim = int(cols[FM_COL].strip())
#if fm_dim != 0:
for
v
in
xrange
(
FM_COL
,
len
(
cols
),
1
):
mf_v
=
compress_mf
(
cols
[
v
])
#print mf_v
fm_wei
.
append
(
str
(
mf_v
))
fm_sum
+=
(
mf_v
*
mf_v
)
sys
.
stdout
.
write
(
"%s
\t
%s
\t
%s
\t
%s"
%
(
key
,
show
,
clk
,
pred
))
sys
.
stdout
.
write
(
"
\t
"
)
sys
.
stdout
.
write
(
"%s"
%
lr_wei
)
if
len
(
fm_wei
)
>
0
and
fm_sum
>
0
:
sys
.
stdout
.
write
(
"
\t
%s"
%
"
\t
"
.
join
(
fm_wei
))
else
:
sys
.
stdout
.
write
(
"
\t
[
\t
]"
)
sys
.
stdout
.
write
(
"
\n
"
)
paddle/fluid/train/custom_trainer/feed/tool/xbox_decompressor_mf.awk
0 → 100755
浏览文件 @
b410c4e1
#!/bin/awk -f
{
OFS
=
"\t"
;
SHOW_RATIO
=
1
;
CLK_RATIO
=
8
;
LR_RATIO
=
1024
;
MF_RATIO
=
1024
;
}
function
decompress_show
(
x
)
{
x
=
x
*
1.0
/
SHOW_RATIO
;
return
x
;
}
function
decompress_clk
(
x
)
{
if
(
x
==
""
)
{
x
=
0
;
}
x
=
x
*
1.0
/
CLK_RATIO
;
return
x
;
}
function
decompress_lr
(
x
)
{
return
x
*
1.0
/
LR_RATIO
;
}
function
decompress_mf
(
x
)
{
return
x
*
1.0
/
MF_RATIO
;
}
function
show_clk_sore
(
show
,
clk
,
nonclk_coeff
,
clk_coeff
)
{
return
(
show
-
clk
)
*
nonclk_coeff
+
clk
*
clk_coeff
;
}
#key, show, clk, pred, lr_w, mf_w or [\t]
{
l
=
split
(
$0
,
a
,
"\t"
);
show
=
decompress_show
(
a
[
2
]);
click
=
decompress_clk
(
a
[
3
]);
lr
=
decompress_lr
(
a
[
5
]);
printf
(
"%s\t0\t0\t%s\t%s\t%s\t0\t"
,
a
[
1
],
show
,
click
,
lr
);
if
(
l
==
7
)
{
printf
(
"0\n"
);
}
else
{
printf
(
"%d"
,
l
-
5
)
for
(
i
=
6
;
i
<=
l
;
i
++
)
{
printf
(
"\t%s"
,
decompress_mf
(
a
[
i
]));
}
printf
(
"\t0\n"
);
}
}
paddle/fluid/train/custom_trainer/feed/tool/xbox_pb_converter
0 → 100755
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/tool/xbox_pb_deconverter
0 → 100755
浏览文件 @
b410c4e1
文件已添加
paddle/fluid/train/custom_trainer/feed/trainer_context.h
浏览文件 @
b410c4e1
...
...
@@ -2,8 +2,9 @@
#include <string>
#include <memory>
#include <vector>
#include <yaml-cpp/yaml.h>
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/train/custom_trainer/feed/common/yaml_helper.h"
#include "paddle/fluid/train/custom_trainer/feed/common/pslib_warpper.h"
#include "paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h"
...
...
@@ -16,23 +17,16 @@ class Dataset;
class
FileSystem
;
class
EpochAccessor
;
const
uint32_t
SecondsPerMin
=
60
;
const
uint32_t
SecondsPerHour
=
3600
;
const
uint32_t
SecondsPerDay
=
24
*
3600
;
enum
class
ModelSaveWay
{
ModelSaveTrainCheckpoint
=
0
,
ModelSaveInferenceDelta
=
1
,
ModelSaveInferenceBase
=
2
};
class
TableMeta
{
public:
TableMeta
()
{}
~
TableMeta
()
{}
int
table_id
()
{
return
_id
;
}
private:
int
_id
;
};
class
SignCacheDict
{
public:
int32_t
sign2index
(
uint64_t
sign
)
{
...
...
@@ -46,16 +40,20 @@ public:
class
TrainerContext
{
public:
YAML
::
Node
trainer_config
;
paddle
::
platform
::
CPUPlace
cpu_place
;
std
::
shared_ptr
<
Dataset
>
dataset
;
//训练样本
std
::
shared_ptr
<
FileSystem
>
file_system
;
//文件操作辅助类
std
::
vector
<
TableMeta
>
params_table_list
;
//参数表
std
::
shared_ptr
<
EpochAccessor
>
epoch_accessor
;
//训练轮次控制
std
::
shared_ptr
<
RuntimeEnvironment
>
environment
;
//运行环境
std
::
vector
<
std
::
shared_ptr
<
Process
>>
process_list
;
//训练流程
std
::
shared_ptr
<
SignCacheDict
>
cache_dict
;
//大模型cache词典
inline
paddle
::
ps
::
PSClient
*
ps_client
()
{
return
pslib
->
ps_client
();
}
YAML
::
Node
trainer_config
;
paddle
::
platform
::
CPUPlace
cpu_place
;
std
::
shared_ptr
<
PSlib
>
pslib
;
std
::
shared_ptr
<
Dataset
>
dataset
;
//训练样本
std
::
shared_ptr
<
FileSystem
>
file_system
;
//文件操作辅助类
std
::
shared_ptr
<
EpochAccessor
>
epoch_accessor
;
//训练轮次控制
std
::
shared_ptr
<
RuntimeEnvironment
>
environment
;
//运行环境
std
::
vector
<
std
::
shared_ptr
<
Process
>>
process_list
;
//训练流程
std
::
shared_ptr
<
SignCacheDict
>
cache_dict
;
//大模型cache词典
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc
浏览文件 @
b410c4e1
...
...
@@ -9,6 +9,7 @@
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
#include "paddle/fluid/train/custom_trainer/feed/io/shell.h"
#include "paddle/fluid/train/custom_trainer/feed/common/scope_helper.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
...
...
@@ -28,7 +29,7 @@ class CreateProgramsTest : public testing::Test
public:
static
void
SetUpTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
if
(
fs
->
exists
(
"./scripts/create_programs.py"
))
{
shell_execute
(
string
::
format_string
(
"python ./scripts/create_programs.py ./scripts/example.py %s"
,
test_data_dir
));
}
else
if
(
fs
->
exists
(
string
::
format_string
(
"%s/scripts/create_programs.py"
,
feed_path
)))
{
...
...
@@ -38,7 +39,7 @@ public:
static
void
TearDownTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
remove
(
test_data_dir
);
}
...
...
@@ -61,7 +62,7 @@ public:
};
TEST_F
(
CreateProgramsTest
,
example_network
)
{
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
CLASS
(
Executor
,
"SimpleExecutor"
));
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
INSTANCE
(
Executor
,
"SimpleExecutor"
));
ASSERT_NE
(
nullptr
,
executor
);
auto
config
=
YAML
::
Load
(
string
::
format_string
(
"{thread_num: 2, startup_program: %s, main_program: %s}"
,
startup_program_path
,
main_program_path
));
...
...
@@ -108,8 +109,10 @@ TEST_F(CreateProgramsTest, example_network) {
ASSERT_EQ
(
-
1
,
output_shape
[
0
]);
ASSERT_EQ
(
1
,
output_shape
[
1
]);
auto
input_var
=
executor
->
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
input_name
);
auto
label_var
=
executor
->
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
label_name
);
paddle
::
framework
::
Scope
scope
;
executor
->
initialize_scope
(
&
scope
);
auto
input_var
=
ScopeHelper
::
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
input_name
);
auto
label_var
=
ScopeHelper
::
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
label_name
);
ASSERT_NE
(
nullptr
,
input_var
);
ASSERT_NE
(
nullptr
,
label_var
);
...
...
@@ -125,12 +128,12 @@ TEST_F(CreateProgramsTest, example_network) {
ASSERT_NE
(
nullptr
,
label_data
);
label_data
[
0
]
=
random
();
ASSERT_EQ
(
0
,
executor
->
run
());
ASSERT_EQ
(
0
,
executor
->
run
(
&
scope
));
auto
loss_var
=
executor
->
var
<::
paddle
::
framework
::
LoDTensor
>
(
loss_name
);
auto
loss_var
=
ScopeHelper
::
var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
loss_name
);
auto
loss
=
loss_var
.
data
<
float
>
()[
0
];
auto
output_var
=
executor
->
var
<::
paddle
::
framework
::
LoDTensor
>
(
output_name
);
auto
output_var
=
ScopeHelper
::
var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
output_name
);
auto
output
=
output_var
.
data
<
float
>
()[
0
];
LOG
(
INFO
)
<<
"loss: "
<<
loss
<<
std
::
endl
;
...
...
paddle/fluid/train/custom_trainer/feed/unit_test/test_datareader.cc
浏览文件 @
b410c4e1
...
...
@@ -3,6 +3,7 @@
#include <gtest/gtest.h>
#include <omp.h>
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
@@ -22,7 +23,7 @@ const char test_data_dir[] = "test_data";
class
DataReaderTest
:
public
testing
::
Test
{
public:
static
void
SetUpTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
mkdir
(
test_data_dir
);
shell_set_verbose
(
true
);
...
...
@@ -42,14 +43,14 @@ public:
}
static
void
TearDownTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
remove
(
test_data_dir
);
}
virtual
void
SetUp
()
{
thread_num
=
omp_get_max_threads
();
omp_set_num_threads
(
1
);
fs
.
reset
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
fs
.
reset
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
context_ptr
.
reset
(
new
TrainerContext
());
}
...
...
@@ -65,7 +66,7 @@ public:
};
TEST_F
(
DataReaderTest
,
LineDataParser
)
{
std
::
unique_ptr
<
DataParser
>
data_parser
(
CREATE_
CLASS
(
DataParser
,
"LineDataParser"
));
std
::
unique_ptr
<
DataParser
>
data_parser
(
CREATE_
INSTANCE
(
DataParser
,
"LineDataParser"
));
ASSERT_NE
(
nullptr
,
data_parser
);
auto
config
=
YAML
::
Load
(
""
);
...
...
@@ -94,7 +95,7 @@ TEST_F(DataReaderTest, LineDataParser) {
}
TEST_F
(
DataReaderTest
,
LineDataReader
)
{
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
CLASS
(
DataReader
,
"LineDataReader"
));
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
INSTANCE
(
DataReader
,
"LineDataReader"
));
ASSERT_NE
(
nullptr
,
data_reader
);
auto
config
=
YAML
::
Load
(
...
...
@@ -147,7 +148,7 @@ TEST_F(DataReaderTest, LineDataReader) {
}
TEST_F
(
DataReaderTest
,
LineDataReader_filename_prefix
)
{
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
CLASS
(
DataReader
,
"LineDataReader"
));
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
INSTANCE
(
DataReader
,
"LineDataReader"
));
ASSERT_NE
(
nullptr
,
data_reader
);
auto
config
=
YAML
::
Load
(
"parser:
\n
"
...
...
@@ -182,7 +183,7 @@ TEST_F(DataReaderTest, LineDataReader_filename_prefix) {
}
TEST_F
(
DataReaderTest
,
LineDataReader_FileSystem
)
{
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
CLASS
(
DataReader
,
"LineDataReader"
));
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
INSTANCE
(
DataReader
,
"LineDataReader"
));
ASSERT_NE
(
nullptr
,
data_reader
);
auto
config
=
YAML
::
Load
(
"parser:
\n
"
...
...
paddle/fluid/train/custom_trainer/feed/unit_test/test_datareader_omp.cc
浏览文件 @
b410c4e1
...
...
@@ -4,6 +4,7 @@
#include <gtest/gtest.h>
#include <omp.h>
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
@@ -23,7 +24,7 @@ const char test_data_dir[] = "test_data";
class
DataReaderOmpTest
:
public
testing
::
Test
{
public:
static
void
SetUpTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
if
(
fs
->
exists
(
test_data_dir
))
{
fs
->
remove
(
test_data_dir
);
}
...
...
@@ -50,14 +51,14 @@ public:
}
static
void
TearDownTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
remove
(
test_data_dir
);
}
virtual
void
SetUp
()
{
thread_num
=
omp_get_max_threads
();
omp_set_num_threads
(
1
);
fs
.
reset
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
fs
.
reset
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
context_ptr
.
reset
(
new
TrainerContext
());
}
...
...
@@ -117,7 +118,7 @@ std::vector<DataItem> DataReaderOmpTest::std_items;
std
::
vector
<
DataItem
>
DataReaderOmpTest
::
sorted_std_items
;
TEST_F
(
DataReaderOmpTest
,
LineDataReaderSingleThread
)
{
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
CLASS
(
DataReader
,
"LineDataReader"
));
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
INSTANCE
(
DataReader
,
"LineDataReader"
));
ASSERT_NE
(
nullptr
,
data_reader
);
auto
config
=
YAML
::
Load
(
...
...
@@ -148,7 +149,7 @@ TEST_F(DataReaderOmpTest, LineDataReaderSingleThread) {
}
TEST_F
(
DataReaderOmpTest
,
LineDataReaderMuiltThread
)
{
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
CLASS
(
DataReader
,
"LineDataReader"
));
std
::
unique_ptr
<
DataReader
>
data_reader
(
CREATE_
INSTANCE
(
DataReader
,
"LineDataReader"
));
ASSERT_NE
(
nullptr
,
data_reader
);
auto
config
=
YAML
::
Load
(
...
...
paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc
浏览文件 @
b410c4e1
...
...
@@ -2,7 +2,9 @@
#include <fstream>
#include <gtest/gtest.h>
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/train/custom_trainer/feed/common/scope_helper.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
...
...
@@ -24,7 +26,7 @@ class SimpleExecutorTest : public testing::Test
public:
static
void
SetUpTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
mkdir
(
test_data_dir
);
shell_set_verbose
(
true
);
...
...
@@ -57,7 +59,7 @@ public:
static
void
TearDownTestCase
()
{
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
CLASS
(
FileSystem
,
"LocalFileSystem"
));
std
::
unique_ptr
<
FileSystem
>
fs
(
CREATE_
INSTANCE
(
FileSystem
,
"LocalFileSystem"
));
fs
->
remove
(
test_data_dir
);
}
...
...
@@ -75,7 +77,7 @@ public:
};
TEST_F
(
SimpleExecutorTest
,
initialize
)
{
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
CLASS
(
Executor
,
"SimpleExecutor"
));
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
INSTANCE
(
Executor
,
"SimpleExecutor"
));
ASSERT_NE
(
nullptr
,
executor
);
YAML
::
Node
config
=
YAML
::
Load
(
"[1, 2, 3]"
);
ASSERT_NE
(
0
,
executor
->
initialize
(
config
,
context_ptr
));
...
...
@@ -86,13 +88,14 @@ TEST_F(SimpleExecutorTest, initialize) {
}
TEST_F
(
SimpleExecutorTest
,
run
)
{
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
CLASS
(
Executor
,
"SimpleExecutor"
));
std
::
unique_ptr
<
Executor
>
executor
(
CREATE_
INSTANCE
(
Executor
,
"SimpleExecutor"
));
ASSERT_NE
(
nullptr
,
executor
);
auto
config
=
YAML
::
Load
(
string
::
format_string
(
"{thread_num: 2, startup_program: %s, main_program: %s}"
,
startup_program_path
,
main_program_path
));
ASSERT_EQ
(
0
,
executor
->
initialize
(
config
,
context_ptr
));
auto
x_var
=
executor
->
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
"x"
);
paddle
::
framework
::
Scope
scope
;
executor
->
initialize_scope
(
&
scope
);
auto
x_var
=
ScopeHelper
::
mutable_var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
std
::
string
(
"x"
));
ASSERT_NE
(
nullptr
,
x_var
);
int
x_len
=
10
;
...
...
@@ -106,9 +109,9 @@ TEST_F(SimpleExecutorTest, run) {
}
std
::
cout
<<
std
::
endl
;
ASSERT_EQ
(
0
,
executor
->
run
());
ASSERT_EQ
(
0
,
executor
->
run
(
&
scope
));
auto
mean_var
=
executor
->
var
<::
paddle
::
framework
::
LoDTensor
>
(
"mean"
);
auto
mean_var
=
ScopeHelper
::
var
<::
paddle
::
framework
::
LoDTensor
>
(
&
scope
,
std
::
string
(
"mean"
)
);
auto
mean
=
mean_var
.
data
<
float
>
()[
0
];
std
::
cout
<<
"mean: "
<<
mean
<<
std
::
endl
;
ASSERT_NEAR
(
4.5
,
mean
,
1e-9
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录