Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
53619a79
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
53619a79
编写于
12月 21, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into accelerate_lstm
上级
679d1a9e
95cbe07c
变更
44
显示空白变更内容
内联
并排
Showing
44 changed file
with
1745 addition
and
554 deletion
+1745
-554
paddle/fluid/API.spec
paddle/fluid/API.spec
+17
-0
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+1
-6
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+7
-8
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+0
-5
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+0
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+108
-24
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+2
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+142
-46
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+4
-5
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+0
-1
paddle/fluid/framework/shape_inference.cc
paddle/fluid/framework/shape_inference.cc
+0
-98
paddle/fluid/framework/shape_inference.h
paddle/fluid/framework/shape_inference.h
+18
-27
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+11
-0
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+11
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+22
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+5
-2
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+29
-14
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+14
-8
paddle/fluid/operators/merge_selected_rows_op.cc
paddle/fluid/operators/merge_selected_rows_op.cc
+29
-1
paddle/fluid/operators/py_func_op.cc
paddle/fluid/operators/py_func_op.cc
+313
-0
paddle/fluid/operators/py_func_op.h
paddle/fluid/operators/py_func_op.h
+25
-0
paddle/fluid/operators/transpose_mkldnn_op.cc
paddle/fluid/operators/transpose_mkldnn_op.cc
+14
-59
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+124
-0
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+3
-0
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-1
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/utils/__init__.py
python/paddle/fluid/contrib/utils/__init__.py
+5
-4
python/paddle/fluid/contrib/utils/hdfs_utils.py
python/paddle/fluid/contrib/utils/hdfs_utils.py
+163
-138
python/paddle/fluid/contrib/utils/lookup_table_utils.py
python/paddle/fluid/contrib/utils/lookup_table_utils.py
+125
-58
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+221
-0
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+38
-41
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
+19
-1
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
.../tests/unittests/test_get_tensor_from_selected_rows_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
...addle/fluid/tests/unittests/test_merge_selectedrows_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_py_func_op.py
python/paddle/fluid/tests/unittests/test_py_func_op.py
+183
-0
python/setup.py.in
python/setup.py.in
+1
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
53619a79
...
...
@@ -208,6 +208,7 @@ paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act
paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
...
...
@@ -350,6 +351,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True))
paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5))
paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,))
paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
53619a79
...
...
@@ -131,9 +131,7 @@ std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
string
&
loss_var_name
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
...
...
@@ -149,9 +147,6 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
Erase
(
"params"
);
pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
53619a79
...
...
@@ -106,14 +106,13 @@ struct BuildStrategy {
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
const
bool
use_cuda
)
const
;
#endif
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
53619a79
...
...
@@ -130,7 +130,6 @@ void AddOutputToLeafOps(ir::Graph *graph) {
static
const
char
kLossVarName
[]
=
"loss_var_name"
;
static
const
char
kPlaces
[]
=
"places"
;
static
const
char
kParams
[]
=
"params"
;
static
const
char
kLocalScopes
[]
=
"local_scopes"
;
static
const
char
kStrategy
[]
=
"strategy"
;
static
const
char
kNumTrainers
[]
=
"num_trainers"
;
...
...
@@ -147,9 +146,6 @@ void MultiDevSSAGraphBuilder::Init() const {
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
for
(
auto
&
p
:
Get
<
const
std
::
unordered_set
<
std
::
string
>>
(
kParams
))
{
grad_names_
.
insert
(
GradVarName
(
p
));
}
balance_vars_
.
resize
(
places_
.
size
(),
0
);
if
(
strategy_
.
enable_data_balance_
&&
places_
.
size
()
==
1
)
{
LOG
(
WARNING
)
<<
"It is no need to enable data balance when there is only "
...
...
@@ -896,7 +892,6 @@ REGISTER_PASS(multi_devices_pass,
paddle
::
framework
::
details
::
MultiDevSSAGraphBuilder
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLossVarName
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kParams
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kStrategy
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kNumTrainers
);
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
53619a79
...
...
@@ -102,7 +102,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
unordered_set
<
std
::
string
>
grad_names_
;
mutable
BuildStrategy
strategy_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
53619a79
...
...
@@ -110,22 +110,125 @@ class CompileTimeInferShapeContext : public InferShapeContext {
}
}
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
bool
IsRuntime
()
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Outputs
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
protected:
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
CompileTimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
;
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
;
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
;
DDim
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
;
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
;
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
;
const
OpDesc
&
op_
;
const
BlockDesc
&
block_
;
};
...
...
@@ -644,20 +747,6 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
return
op_
.
Output
(
name
);
}
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
std
::
vector
<
DDim
>
CompileTimeInferShapeContext
::
GetRepeatedDims
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
...
...
@@ -696,10 +785,5 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
return
block_
.
FindVarRecursive
(
name
)
->
GetType
();
}
InferShapeVarPtr
CompileTimeInferShapeContext
::
GetVarPtr
(
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/op_desc.h
浏览文件 @
53619a79
...
...
@@ -123,6 +123,8 @@ class OpDesc {
BlockDesc
*
Block
()
{
return
this
->
block_
;
}
const
BlockDesc
*
Block
()
const
{
return
this
->
block_
;
}
private:
template
<
typename
MapType
>
static
std
::
vector
<
typename
MapType
::
key_type
>
MapKeys
(
const
MapType
&
map
)
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
53619a79
...
...
@@ -142,12 +142,14 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
const
Scope
&
scope
)
{
for
(
auto
&
var_name_item
:
innames
)
{
std
::
vector
<
Variable
*>&
input_vars
=
inputs
[
var_name_item
.
first
];
input_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
input_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
for
(
auto
&
var_name_item
:
outnames
)
{
std
::
vector
<
Variable
*>&
output_vars
=
outputs
[
var_name_item
.
first
];
output_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
output_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
...
...
@@ -552,30 +554,28 @@ class RuntimeInferShapeContext : public InferShapeContext {
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
// has only one output
const
auto
&
outs
=
op_
.
Outputs
()
;
const
auto
&
outs
=
ctx_
.
outputs
;
auto
it
=
outs
.
find
(
name
);
if
(
it
==
outs
.
end
())
{
return
false
;
}
const
auto
&
out
=
it
->
second
;
if
(
out
.
size
()
==
0
||
out
[
0
]
==
kEmptyVarName
)
{
if
(
out
.
size
()
==
0
)
{
return
false
;
}
PADDLE_ENFORCE_EQ
(
out
.
size
(),
1UL
,
"Output %s should not have more than one outputs"
,
name
);
return
scope_
.
FindVar
(
out
[
0
])
!=
nullptr
;
return
out
[
0
]
!=
nullptr
;
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasInputs
(
name
))
{
return
false
;
}
auto
inputs
=
op_
.
Inputs
(
name
);
if
(
inputs
.
empty
())
{
const
auto
&
ins
=
ctx_
.
inputs
;
auto
it
=
ins
.
find
(
name
);
if
(
it
==
ins
.
end
()
||
it
->
second
.
empty
())
{
return
false
;
}
for
(
auto
&
input
:
i
nputs
)
{
if
(
scope_
.
FindVar
(
input
)
==
nullptr
)
{
for
(
auto
&
input
:
i
t
->
second
)
{
if
(
input
==
nullptr
)
{
return
false
;
}
}
...
...
@@ -583,15 +583,13 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasOutputs
(
name
))
{
return
false
;
}
auto
outputs
=
op_
.
Outputs
(
name
);
if
(
outputs
.
empty
())
{
const
auto
&
outs
=
ctx_
.
outputs
;
auto
it
=
outs
.
find
(
name
);
if
(
it
==
outs
.
end
()
||
it
->
second
.
empty
())
{
return
false
;
}
for
(
auto
&
output
:
outputs
)
{
if
(
scope_
.
FindVar
(
output
)
==
nullptr
)
{
for
(
auto
&
output
:
it
->
second
)
{
if
(
output
==
nullptr
)
{
return
false
;
}
}
...
...
@@ -612,16 +610,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareDim
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
override
{
PADDLE_ENFORCE_LT
(
i
,
Inputs
(
in
).
size
());
PADDLE_ENFORCE_LT
(
j
,
Outputs
(
out
).
size
());
const
std
::
string
&
input_n
=
Inputs
(
in
)[
i
];
const
std
::
string
&
output_n
=
Outputs
(
out
)[
j
];
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
"Inputs %s should have %llu argument"
,
in
,
i
);
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
[
i
];
Variable
*
out_var
=
out_it
->
second
[
j
];
Variable
*
in_var
=
scope_
.
FindVar
(
input_n
);
Variable
*
out_var
=
scope_
.
FindVar
(
output_n
);
PADDLE_ENFORCE
(
in_var
->
Type
()
==
out_var
->
Type
(),
"The type of %s and %s is not the same."
,
output_n
,
GetDim
(
input_n
));
"The type of %s and %s is not the same."
,
in
,
out
);
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
in_sele_rows
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
...
...
@@ -642,13 +642,16 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareLoD
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
const
override
{
const
std
::
vector
<
std
::
string
>&
inputs
=
Inputs
(
in
);
const
std
::
vector
<
std
::
string
>&
outputs
=
Outputs
(
out
);
PADDLE_ENFORCE_LT
(
i
,
inputs
.
size
());
PADDLE_ENFORCE_LT
(
j
,
outputs
.
size
());
Variable
*
in_var
=
scope_
.
FindVar
(
inputs
.
at
(
i
));
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
"Inputs %s should have %llu argument"
,
in
,
i
);
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
.
at
(
i
);
if
(
!
in_var
->
IsType
<
LoDTensor
>
())
return
;
Variable
*
out_var
=
scope_
.
FindVar
(
outputs
.
at
(
j
)
);
Variable
*
out_var
=
out_it
->
second
.
at
(
j
);
PADDLE_ENFORCE
(
out_var
->
IsType
<
LoDTensor
>
(),
"The %d-th output of Output(%s) must be LoDTensor."
,
j
,
out
);
auto
in_tensor
=
in_var
->
Get
<
LoDTensor
>
();
...
...
@@ -683,9 +686,64 @@ class RuntimeInferShapeContext : public InferShapeContext {
bool
IsRuntime
()
const
override
{
return
true
;
}
// TODO(paddle-dev): Can this be template?
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
OutputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
return
this
->
GetDim
(
vars
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
return
GetDims
(
vars
);
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
InputVars
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
OutputVars
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
vars
=
OutputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
SetDim
(
vars
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>&
dims
)
override
{
auto
&
vars
=
OutputVars
(
name
);
SetDims
(
vars
,
dims
);
}
protected:
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
DDim
GetDim
(
Variable
*
var
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
var
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
return
var
->
Get
<
LoDTensor
>
().
dims
();
...
...
@@ -693,25 +751,44 @@ class RuntimeInferShapeContext : public InferShapeContext {
return
var
->
Get
<
SelectedRows
>
().
GetCompleteDims
();
}
else
{
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variable
%s'
s "
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
name
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
Variable
*>&
vars
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
Variable
*
var
)
{
return
this
->
GetDim
(
var
);
});
return
ret
;
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
{
PADDLE_THROW
(
"Only compile time support this method"
);
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
void
SetDim
(
Variable
*
var
,
const
DDim
&
dim
)
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
dim
);
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
PADDLE_THROW
(
"Variable %s type_id %s, expect LoDTensor/SelectedRows."
,
name
,
var
->
Type
().
name
());
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
());
}
}
void
SetDims
(
const
std
::
vector
<
Variable
*>&
vars
,
const
std
::
vector
<
DDim
>&
dims
)
{
size_t
length
=
vars
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
vars
[
i
]
==
nullptr
)
{
continue
;
}
SetDim
(
vars
[
i
],
dims
[
i
]);
}
}
...
...
@@ -720,16 +797,36 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only compile time support this method"
);
}
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
{
auto
*
var
=
scope_
.
FindVar
(
name
);
return
ToVarType
(
var
->
Type
());
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
Variable
*>&
vars
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
RuntimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
{
return
scope_
.
FindVar
(
name
);
proto
::
VarType
::
Type
GetVarType
(
Variable
*
var
)
const
{
return
ToVarType
(
var
->
Type
()
);
}
private:
const
std
::
vector
<
Variable
*>&
InputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
inputs
.
end
(),
"Operator %s does not have the input %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
std
::
vector
<
Variable
*>&
OutputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
outputs
.
end
(),
"Operator %s does not have the outputs %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
RuntimeContext
&
ctx_
;
...
...
@@ -860,8 +957,7 @@ Scope* OperatorWithKernel::PrepareData(
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
auto
&
var_name
=
var_name_item
.
second
[
i
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
input_vars
[
i
]
=
var
;
auto
*
var
=
input_vars
[
i
];
// Only tensor can be tranfer to another device.
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
53619a79
...
...
@@ -190,7 +190,6 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
ParallelExecutor
::
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
...
...
@@ -209,7 +208,7 @@ ParallelExecutor::ParallelExecutor(
"the number of places must be greater than 1."
);
}
// Step 1. Bcast the
param
s to devs.
// Step 1. Bcast the
bcast_var
s to devs.
// Create local scopes
if
(
local_scopes
.
empty
())
{
member_
->
own_local_scope_
=
true
;
...
...
@@ -249,12 +248,12 @@ ParallelExecutor::ParallelExecutor(
// ncclOp
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
main_program
,
member_
->
places_
,
loss_var_name
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
#else
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
auto
max_memory_size
=
GetEagerDeletionThreshold
();
if
(
max_memory_size
>=
0
)
{
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
53619a79
...
...
@@ -41,7 +41,6 @@ class ParallelExecutor {
public:
explicit
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
...
...
paddle/fluid/framework/shape_inference.cc
浏览文件 @
53619a79
...
...
@@ -22,20 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
DDim
InferShapeContext
::
GetInputDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetInputsDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetReaderDims
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
...
...
@@ -46,26 +32,6 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
return
this
->
GetRepeatedDims
(
arg_names
[
0
]);
}
DDim
InferShapeContext
::
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
{
const
std
::
vector
<
std
::
string
>
&
names
=
Inputs
(
name
);
return
this
->
GetDim
(
names
[
idx
]);
}
void
InferShapeContext
::
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
InferShapeContext
::
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
void
InferShapeContext
::
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Outputs
(
name
);
...
...
@@ -76,69 +42,5 @@ void InferShapeContext::SetReaderDims(const std::string &name,
return
this
->
SetRepeatedDims
(
arg_names
[
0
],
dims
);
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetInputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetOutputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
InferShapeContext
::
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetInputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetOutputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Outputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
InferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/shape_inference.h
浏览文件 @
53619a79
...
...
@@ -25,6 +25,8 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
class
OperatorBase
;
using
InferShapeVarPtr
=
boost
::
variant
<
VarDesc
*
,
Variable
*>
;
class
InferShapeContext
{
...
...
@@ -33,22 +35,23 @@ class InferShapeContext {
virtual
bool
HasInput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutput
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
;
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
=
0
;
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutputs
(
const
std
::
string
&
name
)
const
=
0
;
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
;
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
;
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
DDim
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
;
virtual
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
virtual
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
virtual
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
AttrReader
Attrs
()
const
=
0
;
virtual
const
std
::
vector
<
std
::
string
>
&
Inputs
(
...
...
@@ -67,27 +70,15 @@ class InferShapeContext {
virtual
bool
IsRuntime
()
const
=
0
;
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
);
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
);
virtual
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
=
0
;
// Note: In while op, we need this to be public
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
=
0
;
protected:
virtual
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
virtual
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
virtual
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
=
0
;
};
}
// namespace framework
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
53619a79
...
...
@@ -254,5 +254,16 @@ TEST(Analyzer_dam, compare) { compare(); }
TEST
(
Analyzer_dam
,
compare_mkldnn
)
{
compare
(
true
/* use_mkldnn */
);
}
#endif
// Compare Deterministic result
TEST
(
Analyzer_dam
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
53619a79
...
...
@@ -180,6 +180,17 @@ TEST(Analyzer_LAC, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_LAC
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
53619a79
...
...
@@ -179,5 +179,16 @@ TEST(Analyzer_Chinese_ner, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_Chinese_ner
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
53619a79
...
...
@@ -85,6 +85,17 @@ TEST(Analyzer_resnet50, compare) { compare(); }
TEST
(
Analyzer_resnet50
,
compare_mkldnn
)
{
compare
(
true
/* use_mkldnn */
);
}
#endif
// Compare Deterministic result
TEST
(
Analyzer_resnet50
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
53619a79
...
...
@@ -265,6 +265,17 @@ TEST(Analyzer_rnn1, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_rnn1
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Test Multi-Thread.
TEST
(
Analyzer_rnn1
,
multi_thread
)
{
contrib
::
AnalysisConfig
cfg
;
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
53619a79
...
...
@@ -158,5 +158,16 @@ TEST(Analyzer_rnn2, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_rnn2
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
53619a79
...
...
@@ -204,5 +204,16 @@ TEST(Analyzer_seq_conv1, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_seq_conv1
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
53619a79
...
...
@@ -106,6 +106,17 @@ TEST(Analyzer_Text_Classification, compare) {
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_Text_Classification
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
TEST
(
Analyzer_Text_Classification
,
compare_against_embedding_fc_lstm_fused
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
53619a79
...
...
@@ -145,6 +145,17 @@ TEST(Analyzer_vis, compare) { compare(); }
TEST
(
Analyzer_vis
,
compare_mkldnn
)
{
compare
(
true
/* use_mkldnn */
);
}
#endif
// Compare Deterministic result
TEST
(
Analyzer_vis
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
53619a79
...
...
@@ -45,6 +45,7 @@ DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode."
);
DEFINE_bool
(
record_benchmark
,
false
,
"Record benchmark after profiling the model"
);
DEFINE_double
(
accuracy
,
1e-3
,
"Result Accuracy."
);
DECLARE_bool
(
profile
);
DECLARE_int32
(
paddle_num_threads
);
...
...
@@ -85,7 +86,7 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
float
*
pdata
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
pdata_ref
=
static_cast
<
float
*>
(
ref_out
.
data
.
data
());
for
(
size_t
j
=
0
;
j
<
size
;
++
j
)
{
EXPECT_NEAR
(
pdata_ref
[
j
],
pdata
[
j
],
1e-3
);
EXPECT_NEAR
(
pdata_ref
[
j
],
pdata
[
j
],
FLAGS_accuracy
);
}
break
;
}
...
...
@@ -283,6 +284,26 @@ void TestPrediction(const PaddlePredictor::Config *config,
}
}
void
CompareDeterministic
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreateTestPredictor
(
config
,
FLAGS_use_analysis
);
// warmup run
std
::
vector
<
PaddleTensor
>
warmup_outputs
,
outputs
;
predictor
->
Run
(
inputs
[
0
],
&
warmup_outputs
,
batch_size
);
// run num_times to Compare Deterministic Result.
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
&
outputs
,
batch_size
);
CompareResult
(
outputs
,
warmup_outputs
);
}
}
}
void
CompareNativeAndAnalysis
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
53619a79
...
...
@@ -42,8 +42,7 @@ if (WITH_DISTRIBUTE)
SET
(
OP_PREFETCH_DEPS
${
OP_PREFETCH_DEPS
}
parameter_prefetch
)
endif
()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
register_operators
(
EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU AND NOT WIN32
)
...
...
@@ -92,4 +91,8 @@ cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
cc_test
(
save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op
)
nv_test
(
dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor
)
if
(
WITH_PYTHON
)
cc_library
(
py_func_op SRCS py_func_op.cc DEPS op_registry python pybind
)
endif
()
set
(
GLOB_OP_LIB
${
OP_LIBRARY
}
CACHE INTERNAL
"Global OP library"
)
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
53619a79
...
...
@@ -399,26 +399,41 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
auto
p_names
=
ctx
->
Inputs
(
kX
);
auto
pg_ig_names
=
ctx
->
Outputs
(
kXGRAD
);
auto
var_types
=
ctx
->
GetInputsVarType
(
kX
);
std
::
vector
<
std
::
string
>
names_to_set
;
std
::
vector
<
framework
::
DDim
>
dims_to_set
;
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
std
::
vector
<
framework
::
InferShapeVarPtr
>
in_var_ptrs
=
ctx
->
GetInputVarPtrs
(
kX
);
std
::
vector
<
framework
::
InferShapeVarPtr
>
out_var_ptrs
=
ctx
->
GetOutputVarPtrs
(
kXGRAD
);
PADDLE_ENFORCE
(
in_var_ptrs
.
size
()
==
out_var_ptrs
.
size
());
for
(
size_t
i
=
0
;
i
<
in_var_ptrs
.
size
();
++
i
)
{
if
(
pg_ig_names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
auto
dims
=
ctx
->
GetInputsElementDim
(
kX
,
i
);
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
dims_to_set
.
push_back
(
dims
);
}
else
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
dims_to_set
.
push_back
(
dims
);
if
(
ctx
->
IsRuntime
())
{
framework
::
Variable
*
in_var
=
boost
::
get
<
framework
::
Variable
*>
(
in_var_ptrs
[
i
]);
framework
::
Variable
*
out_var
=
boost
::
get
<
framework
::
Variable
*>
(
out_var_ptrs
[
i
]);
auto
type
=
framework
::
ToVarType
(
in_var
->
Type
());
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
out_var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
in_var
->
Get
<
framework
::
LoDTensor
>
().
dims
());
}
else
if
(
type
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
set_height
(
in_var
->
Get
<
framework
::
SelectedRows
>
().
GetCompleteDims
()[
0
]);
}
else
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
PADDLE_THROW
(
"WhileGradOp doesn't support type %d"
,
static_cast
<
int
>
(
type
));
}
}
else
{
framework
::
VarDesc
*
in_var
=
boost
::
get
<
framework
::
VarDesc
*>
(
in_var_ptrs
[
i
]);
boost
::
get
<
framework
::
VarDesc
*>
(
out_var_ptrs
[
i
])
->
SetShape
(
in_var
->
GetShape
());
}
}
ctx
->
SetDims
(
names_to_set
,
dims_to_set
);
}
};
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
53619a79
...
...
@@ -155,11 +155,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
weights_format
=
mkldnn
::
memory
::
format
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
...
@@ -435,11 +438,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
weights_format
=
mkldnn
::
memory
::
format
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
...
paddle/fluid/operators/merge_selected_rows_op.cc
浏览文件 @
53619a79
...
...
@@ -26,6 +26,13 @@ class MergeSelectedRowsOp : public framework::OperatorWithKernel {
"Input(X) of MergeSelectedRowsOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of MergeSelectedRowsOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"X"
).
front
(),
framework
::
proto
::
VarType
::
SELECTED_ROWS
,
"Input X only should be SelectedRows."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetOutputsVarType
(
"Out"
).
front
(),
framework
::
proto
::
VarType
::
SELECTED_ROWS
,
"Output Y only should be SelectedRows."
);
ctx
->
ShareDim
(
"X"
,
/*->*/
"Out"
);
}
};
...
...
@@ -43,7 +50,28 @@ class MergeSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
R"DOC(
MergeSelectedRows Operator.
MergeSelectedRows is used to merge the duplicated rows of the input.
MergeSelectedRows is used to merge the duplicated rows of the input. The
output's row has no duplicated, and it's order is incremental.
Example:
Input:
X.rows is [0, 5, 5, 4, 19]
X.height is 20
X.value is:
[[1, 1]
[2, 2]
[3, 3]
[4, 4]
[6, 6]]
Output:
Out.row is [0, 4, 5, 19]
Out.height is 20
Out.value is:
[[1, 1]
[4, 4]
[5, 5]
[6, 6]]
)DOC"
);
}
};
...
...
paddle/fluid/operators/py_func_op.cc
0 → 100644
浏览文件 @
53619a79
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/py_func_op.h"
#include <set>
#include <string>
#include <vector>
#include "Python.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
namespace
py
=
::
pybind11
;
static
std
::
vector
<
py
::
object
>
g_py_callables
;
const
char
kForwardPythonCallableId
[]
=
"forward_callable_id"
;
const
char
kBackwardPythonCallableId
[]
=
"backward_callable_id"
;
const
char
kPyFuncBackwardSkipVars
[]
=
"backward_skip_vars"
;
size_t
AppendPythonCallableObjectAndReturnId
(
const
py
::
object
&
py_obj
)
{
g_py_callables
.
emplace_back
(
py_obj
);
return
g_py_callables
.
size
()
-
1
;
}
// Return py::object* instead of py::object
// Returning py::object would cause reference count increasing
// but without GIL, reference count in Python may not be safe
static
py
::
object
*
GetPythonCallableObject
(
size_t
i
)
{
PADDLE_ENFORCE_LT
(
i
,
g_py_callables
.
size
(),
"Invalid python callable id"
);
return
&
g_py_callables
[
i
];
}
static
std
::
string
PythonFuncDebugString
(
const
py
::
object
&
py_callable
)
{
py
::
gil_scoped_acquire
guard
;
std
::
string
wrapper_func_str
=
py
::
str
(
py_callable
);
auto
inner_func
=
py_callable
.
attr
(
"_func"
);
std
::
string
inner_func_str
=
py
::
str
(
inner_func
);
return
inner_func_str
+
" wrapped by "
+
wrapper_func_str
;
}
static
void
CallPythonFunc
(
py
::
object
*
callable
,
const
std
::
vector
<
framework
::
LoDTensor
>
&
ins
,
std
::
vector
<
framework
::
LoDTensor
*>
*
outs
)
{
py
::
gil_scoped_acquire
guard
;
py
::
tuple
in_args
(
ins
.
size
());
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
++
i
)
{
in_args
[
i
]
=
ins
[
i
].
IsInitialized
()
?
py
::
cast
(
ins
[
i
])
:
py
::
cast
(
nullptr
);
}
auto
ret
=
(
*
callable
)(
*
in_args
);
auto
ret_tuple
=
py
::
cast
<
py
::
tuple
>
(
ret
);
size_t
ret_num
=
py
::
len
(
ret_tuple
);
size_t
out_num
=
outs
->
size
();
if
(
UNLIKELY
(
ret_num
!=
out_num
))
{
// Python function has no return values or returns None
// In this case, ret_num = 1 && ret[0] == None && out_num should be 0
// Otherwise, ret_num must be equal to out_num
PADDLE_ENFORCE
(
ret_num
==
1
&&
out_num
==
0
&&
py
::
cast
<
framework
::
LoDTensor
*>
(
ret_tuple
[
0
])
==
nullptr
,
"Output number not match. Expected %d, actual %d"
,
out_num
,
ret_num
);
}
for
(
size_t
i
=
0
;
i
<
out_num
;
++
i
)
{
auto
*
out
=
(
*
outs
)[
i
];
if
(
out
==
nullptr
)
{
continue
;
}
try
{
auto
*
py_out_tensor
=
py
::
cast
<
framework
::
LoDTensor
*>
(
ret_tuple
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
py_out_tensor
,
"Output tensor %d should not be nullptr"
,
i
);
out
->
set_lod
(
py_out_tensor
->
lod
());
out
->
ShareDataWith
(
*
py_out_tensor
);
}
catch
(
py
::
cast_error
&
)
{
PADDLE_THROW
(
"The %d-th output must be LoDTensor"
,
i
);
}
}
}
class
PyFuncOpVarTypInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
&
outs
=
op
.
Outputs
();
bool
has_out
=
(
outs
.
count
(
"Out"
)
>
0
&&
!
outs
.
at
(
"Out"
).
empty
());
auto
&
ins
=
op
.
Inputs
();
bool
has_in
=
(
ins
.
count
(
"X"
)
>
0
&&
!
ins
.
at
(
"X"
).
empty
());
/**
* X or Out can be empty, so that py_func can be more flexible
* to support Python functions with no input or no output
*/
PADDLE_ENFORCE
(
has_in
||
has_out
,
"Input(X) or Output(Out) must exist"
);
PADDLE_ENFORCE_GE
(
boost
::
get
<
int
>
(
op
.
GetAttr
(
kForwardPythonCallableId
)),
0
,
"Function id cannot be less than 0"
);
if
(
!
has_out
)
return
;
/**
* Traverse all outputs, check if name of any output ends with @GRAD.
* If found, set its shape, dtype, lod_level, type to be the same as
* the corresponding forward variable
*/
const
std
::
string
kGradVarSuffix
=
framework
::
kGradVarSuffix
;
auto
&
out_var_names
=
outs
.
at
(
"Out"
);
for
(
auto
&
out_var_name
:
out_var_names
)
{
if
(
out_var_name
==
framework
::
kEmptyVarName
||
out_var_name
.
size
()
<
kGradVarSuffix
.
size
())
{
continue
;
}
size_t
len
=
out_var_name
.
size
()
-
kGradVarSuffix
.
size
();
if
(
out_var_name
.
substr
(
len
)
==
kGradVarSuffix
)
{
auto
fwd_var_name
=
out_var_name
.
substr
(
0
,
len
);
auto
*
out_var_desc
=
block
->
FindVarRecursive
(
out_var_name
);
auto
*
fwd_var_desc
=
block
->
FindVarRecursive
(
fwd_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
out_var_desc
,
"Backward variable %s not found"
,
out_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
fwd_var_desc
,
"Forward variable %s not found"
,
fwd_var_name
);
VLOG
(
10
)
<<
"Infer var_desc of Output("
<<
out_var_name
<<
") as Input("
<<
fwd_var_name
<<
")"
;
out_var_desc
->
SetShape
(
fwd_var_desc
->
GetShape
());
out_var_desc
->
SetDataType
(
fwd_var_desc
->
GetDataType
());
out_var_desc
->
SetLoDLevel
(
fwd_var_desc
->
GetLoDLevel
());
out_var_desc
->
SetType
(
fwd_var_desc
->
GetType
());
}
}
}
};
class
PyFuncOpShapeInference
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
!
ctx
->
IsRuntime
(),
"Infer shape cannot be called in runtime."
);
}
};
class
PyFuncOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"Inputs of py_func op."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"Outputs of py_func op"
).
AsDuplicable
();
AddAttr
<
int
>
(
kForwardPythonCallableId
,
"Index of registered forward Python function."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
kBackwardPythonCallableId
,
"Index of registered backward Python function."
)
.
SetDefault
(
-
1
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kPyFuncBackwardSkipVars
,
"Unused forward in/out in backward op"
)
.
SetDefault
(
std
::
vector
<
std
::
string
>
());
AddComment
(
R"DOC("PyFunc Op")DOC"
);
}
};
/**
* There are several benefits when backward op of py_func op is
* still py_func op.
*
* - Less codes are needed, since codes of backward is almost
* the same as forward.
*
* - To support high order derivative, so that py_func is
* infinite-order differentiable
*/
class
PyFuncOpGradDescMaker
:
public
framework
::
GradOpDescMakerBase
{
private:
static
std
::
string
DebugString
(
const
std
::
vector
<
std
::
string
>
&
strs
)
{
if
(
strs
.
empty
())
return
""
;
std
::
string
ret
=
strs
[
0
];
for
(
size_t
i
=
1
;
i
<
strs
.
size
();
++
i
)
{
ret
+=
" "
;
ret
+=
strs
[
i
];
}
return
ret
;
}
public:
using
framework
::
GradOpDescMakerBase
::
GradOpDescMakerBase
;
std
::
vector
<
std
::
unique_ptr
<
framework
::
OpDesc
>>
operator
()()
const
override
{
auto
&
fwd_attrs
=
Attrs
();
// no backward op when backward_id is less than 0
if
(
boost
::
get
<
int
>
(
fwd_attrs
.
at
(
kBackwardPythonCallableId
))
<
0
)
{
return
{};
}
std
::
unique_ptr
<
framework
::
OpDesc
>
grad_op
(
new
framework
::
OpDesc
());
grad_op
->
SetType
(
"py_func"
);
framework
::
AttributeMap
bwd_attrs
;
bwd_attrs
[
kForwardPythonCallableId
]
=
fwd_attrs
.
at
(
kBackwardPythonCallableId
);
bwd_attrs
[
kBackwardPythonCallableId
]
=
-
1
;
grad_op
->
SetAttrMap
(
bwd_attrs
);
// All forward inputs
auto
fwd_ins
=
Input
(
"X"
);
// All forward outputs
auto
fwd_outs
=
Output
(
"Out"
);
// For memory reused, some inputs/output in forward part may be not needed
// in backward part. Skipping these vars helps to save memory
auto
&
backward_skip_var_list
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
fwd_attrs
.
at
(
kPyFuncBackwardSkipVars
));
std
::
unordered_set
<
std
::
string
>
backward_skip_var_set
(
backward_skip_var_list
.
begin
(),
backward_skip_var_list
.
end
());
std
::
vector
<
std
::
string
>
bwd_ins
;
bwd_ins
.
reserve
(
fwd_ins
.
size
()
+
fwd_outs
.
size
());
for
(
auto
&
fwd_in
:
fwd_ins
)
{
if
(
backward_skip_var_set
.
count
(
fwd_in
)
==
0
)
{
bwd_ins
.
emplace_back
(
fwd_in
);
}
}
for
(
auto
&
fwd_out
:
fwd_outs
)
{
if
(
backward_skip_var_set
.
count
(
fwd_out
)
==
0
)
{
bwd_ins
.
emplace_back
(
fwd_out
);
}
}
// Backward OG cannot be skipped
// But in Python side, if OG is kEmptyVarName, input tensor would be None
auto
fwd_out_grads
=
OutputGrad
(
"Out"
);
bwd_ins
.
reserve
(
bwd_ins
.
size
()
+
fwd_out_grads
.
size
());
bwd_ins
.
insert
(
bwd_ins
.
end
(),
fwd_out_grads
.
begin
(),
fwd_out_grads
.
end
());
// Backward IG cannot be skipped
// But in Python side, if IG is not needed, users can just return None
auto
bwd_outs
=
InputGrad
(
"X"
,
false
);
VLOG
(
10
)
<<
"PyFunc Grad Input: "
<<
DebugString
(
bwd_ins
);
VLOG
(
10
)
<<
"PyFunc Grad Output: "
<<
DebugString
(
bwd_outs
);
grad_op
->
SetInput
(
"X"
,
bwd_ins
);
grad_op
->
SetOutput
(
"Out"
,
bwd_outs
);
std
::
vector
<
std
::
unique_ptr
<
framework
::
OpDesc
>>
ret
(
1
);
ret
[
0
]
=
std
::
move
(
grad_op
);
return
ret
;
}
};
class
PyFuncOp
:
public
framework
::
OperatorBase
{
public:
using
framework
::
OperatorBase
::
OperatorBase
;
protected:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
in_arg_names
=
Inputs
(
"X"
);
auto
&
out_arg_names
=
Outputs
(
"Out"
);
std
::
vector
<
framework
::
LoDTensor
>
inputs
(
in_arg_names
.
size
());
for
(
size_t
i
=
0
;
i
<
in_arg_names
.
size
();
++
i
)
{
auto
in_var
=
scope
.
FindVar
(
in_arg_names
[
i
]);
// When py_func op is called in backward, in_var may be null
if
(
in_var
==
nullptr
)
{
continue
;
}
auto
&
in_tensor
=
in_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
!
in_tensor
.
IsInitialized
())
{
continue
;
}
if
(
platform
::
is_gpu_place
(
in_tensor
.
place
()))
{
framework
::
TensorCopySync
(
in_tensor
,
platform
::
CPUPlace
(),
&
inputs
[
i
]);
}
else
{
inputs
[
i
].
ShareDataWith
(
in_tensor
);
}
inputs
[
i
].
set_lod
(
in_tensor
.
lod
());
}
std
::
vector
<
framework
::
LoDTensor
*>
outputs
(
out_arg_names
.
size
());
for
(
size_t
i
=
0
;
i
<
out_arg_names
.
size
();
++
i
)
{
auto
*
out_var
=
scope
.
FindVar
(
out_arg_names
[
i
]);
outputs
[
i
]
=
out_var
?
out_var
->
GetMutable
<
framework
::
LoDTensor
>
()
:
nullptr
;
}
auto
callable_id
=
static_cast
<
size_t
>
(
Attr
<
int
>
(
kForwardPythonCallableId
));
auto
*
py_callable
=
GetPythonCallableObject
(
callable_id
);
VLOG
(
10
)
<<
"Call Python function with id "
<<
callable_id
<<
": "
<<
PythonFuncDebugString
(
*
py_callable
);
CallPythonFunc
(
py_callable
,
inputs
,
&
outputs
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
py_func
,
ops
::
PyFuncOp
,
ops
::
PyFuncOpMaker
,
ops
::
PyFuncOpVarTypInference
,
ops
::
PyFuncOpShapeInference
,
ops
::
PyFuncOpGradDescMaker
);
paddle/fluid/operators/py_func_op.h
0 → 100644
浏览文件 @
53619a79
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace
paddle
{
namespace
operators
{
size_t
AppendPythonCallableObjectAndReturnId
(
const
::
pybind11
::
object
&
py_obj
);
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/transpose_mkldnn_op.cc
浏览文件 @
53619a79
...
...
@@ -32,7 +32,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
PADDLE_ENFORCE
(
is_test
==
true
,
"
Conv
TransposeMKLDNN works only for inference!. Set is_test = True"
);
"TransposeMKLDNN works only for inference!. Set is_test = True"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
...
@@ -47,69 +47,24 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
return
;
}
std
::
vector
<
int
>
nchw_axis
(
ndims
,
0
);
for
(
size_t
i
=
0
;
i
<
nchw_axis
.
size
();
++
i
)
{
nchw_axis
[
i
]
=
i
;
}
std
::
vector
<
int
>
nchw_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
auto
src_md
=
input
->
format
()
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
nchw_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
())
:
Axis2MemoryDesc
(
nchw_tz
,
nchw_axis
);
this
->
TransposeKernel
(
ctx
.
GetPlace
(),
Axis2MemoryDesc
(
nchw_tz
,
axis
),
src_md
,
output
,
input_data
,
nchw_tz
,
mkldnn_engine
);
}
protected:
mkldnn
::
memory
::
desc
Axis2MemoryDesc
(
std
::
vector
<
int
>&
nchw_tz
,
std
::
vector
<
int
>&
axis
)
const
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
mem_fmt
.
ndims
=
axis
.
size
();
for
(
unsigned
int
i
=
0
;
i
<
nchw_tz
.
size
();
++
i
)
{
mem_fmt
.
dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
mem_fmt
.
data_type
=
mkldnn_f32
;
mem_fmt
.
format
=
mkldnn_blocked
;
unsigned
int
total_stride
=
1
;
for
(
int
i
=
nchw_tz
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
mem_fmt
.
layout_desc
.
blocking
.
padding_dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format, regardless physical
// layout)
mem_fmt
.
layout_desc
.
blocking
.
block_dims
[
i
]
=
1
;
mem_fmt
.
layout_desc
.
blocking
.
offset_padding_to_data
[
i
]
=
0
;
// no offset
mem_fmt
.
layout_desc
.
blocking
.
strides
[
0
][
axis
[
i
]]
=
total_stride
;
mem_fmt
.
layout_desc
.
blocking
.
strides
[
1
][
axis
[
i
]]
=
1
;
total_stride
*=
nchw_tz
[
axis
[
i
]];
}
mem_fmt
.
layout_desc
.
blocking
.
offset_padding
=
0
;
// no initial offset
return
mem_fmt
;
}
void
TransposeKernel
(
platform
::
Place
place
,
mkldnn
::
memory
::
desc
md_o
,
mkldnn
::
memory
::
desc
md_i
,
Tensor
*
output
,
const
T
*
data_i
,
std
::
vector
<
int
>&
nchw_dims
,
const
mkldnn
::
engine
&
eng
)
const
{
// Make Memory primitive descriptors
auto
mpd_o
=
mkldnn
::
memory
::
primitive_desc
(
md_o
,
eng
);
auto
mpd_i
=
mkldnn
::
memory
::
primitive_desc
(
md_i
,
eng
);
const
std
::
string
key
=
platform
::
TransposeMKLDNNHandler
::
GetHash
(
nchw_tz
,
axis
,
ctx
.
op
().
Output
(
"Out"
));
auto
data_o
=
output
->
mutable_data
<
T
>
(
place
,
paddle
::
memory
::
Allocator
::
kDefault
,
mpd_o
.
get_size
()
);
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
axis
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
src
=
mkldnn
::
memory
(
mpd_i
,
(
T
*
)(
data_i
));
auto
dst
=
mkldnn
::
memory
(
mpd_o
,
data_o
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
->
format
(),
platform
::
to_void_cast
<
T
>
(
input_data
));
auto
transpose_dst_memory_p
=
handler
.
AcquireDstMemory
(
output
,
ctx
.
GetPlace
());
auto
transpose_p
=
handler
.
AcquireTranspose
(
transpose_dst_memory_p
,
transpose_src_memory_p
);
auto
r
=
mkldnn
::
reorder
(
src
,
dst
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
({
r
}).
wait
();
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
*
transpose_p
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
};
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
53619a79
...
...
@@ -197,6 +197,130 @@ class MKLDNNHandler {
bool
is_reusing_
;
};
class
TransposeMKLDNNHandler
:
public
MKLDNNHandler
{
public:
TransposeMKLDNNHandler
(
std
::
vector
<
int
>&
dims
,
std
::
vector
<
int
>&
axis
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
axis_
(
axis
),
logical_axis_
(
dims
.
size
(),
0
)
{}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
format
&
fmt
,
void
*
ptr
)
{
auto
local_key
=
key_
+
"@user_src_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
// Make memory descriptor using input format, unless it
// cannot be trusted (nchw) then make up memory fmt manually
for
(
size_t
i
=
0
;
i
<
logical_axis_
.
size
();
++
i
)
{
logical_axis_
[
i
]
=
i
;
}
auto
src_md
=
fmt
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
dims_
,
platform
::
MKLDNNGetDataType
<
float
>
(),
fmt
)
:
Axis2MemoryDesc
(
dims_
,
logical_axis_
);
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mkldnn
::
memory
::
primitive_desc
{
src_md
,
engine_
},
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
framework
::
Tensor
*
output
,
platform
::
Place
place
)
{
auto
local_key
=
key_
+
"@user_dst_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
auto
dst_mdp
=
mkldnn
::
memory
::
primitive_desc
{
Axis2MemoryDesc
(
dims_
,
axis_
),
engine_
};
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
,
paddle
::
memory
::
Allocator
::
kDefault
,
dst_mdp
.
get_size
());
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
dst_mdp
,
dst_data
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
);
mem_p
->
set_data_handle
(
dst_data
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
reorder
>
AcquireTranspose
(
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
)
{
auto
prim_key
=
key_
+
"@transpose_p"
;
auto
transpose_p
=
std
::
static_pointer_cast
<
mkldnn
::
reorder
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
transpose_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find convolution primitive in device context"
);
if
(
transpose_p
==
nullptr
)
{
transpose_p
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
(
src_memory_p
),
*
(
dst_memory_p
));
dev_ctx_
.
SetBlob
(
prim_key
,
transpose_p
);
}
else
{
is_reusing_
=
true
;
}
return
transpose_p
;
}
static
std
::
string
GetHash
(
std
::
vector
<
int
>&
shape
,
// NOLINT
std
::
vector
<
int
>&
axis
,
// NOLINT
const
std
::
string
&
suffix
)
{
return
dims2str
(
shape
)
+
dims2str
(
axis
)
+
suffix
;
}
protected:
mkldnn_memory_desc_t
Axis2MemoryDesc
(
std
::
vector
<
int
>&
nchw_tz
,
std
::
vector
<
int
>&
axis
)
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
mem_fmt
.
ndims
=
axis
.
size
();
for
(
unsigned
int
i
=
0
;
i
<
nchw_tz
.
size
();
++
i
)
{
mem_fmt
.
dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
mem_fmt
.
data_type
=
mkldnn_f32
;
mem_fmt
.
format
=
mkldnn_blocked
;
unsigned
int
total_stride
=
1
;
for
(
int
i
=
nchw_tz
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
mem_fmt
.
layout_desc
.
blocking
.
padding_dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format, regardless physical
// layout)
mem_fmt
.
layout_desc
.
blocking
.
block_dims
[
i
]
=
1
;
mem_fmt
.
layout_desc
.
blocking
.
offset_padding_to_data
[
i
]
=
0
;
// no offset
mem_fmt
.
layout_desc
.
blocking
.
strides
[
0
][
axis
[
i
]]
=
total_stride
;
mem_fmt
.
layout_desc
.
blocking
.
strides
[
1
][
axis
[
i
]]
=
1
;
total_stride
*=
nchw_tz
[
axis
[
i
]];
}
mem_fmt
.
layout_desc
.
blocking
.
offset_padding
=
0
;
// no initial offset
return
mem_fmt
;
}
private:
std
::
vector
<
int
>
dims_
;
std
::
vector
<
int
>
axis_
;
std
::
vector
<
int
>
logical_axis_
;
};
template
<
class
forward_t
,
class
backward_data_t
,
class
backward_weights_t
>
class
ConvMKLDNNTemplateHandler
:
public
MKLDNNHandler
{
public:
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
53619a79
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
)
if
(
WITH_PYTHON
)
list
(
APPEND PYBIND_DEPS py_func_op
)
endif
()
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc
)
if
(
WITH_PYTHON
)
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
53619a79
...
...
@@ -328,7 +328,7 @@ void BindOpDesc(pybind11::module *m) {
.
def
(
"infer_var_type"
,
&
pd
::
OpDesc
::
InferVarType
)
.
def
(
"set_is_target"
,
&
pd
::
OpDesc
::
SetIsTarget
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
OpDesc
>
)
.
def
(
"block"
,
&
pd
::
OpDesc
::
Block
,
.
def
(
"block"
,
[](
pd
::
OpDesc
&
self
)
{
return
self
.
Block
();
}
,
pybind11
::
return_value_policy
::
reference
);
}
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
53619a79
...
...
@@ -37,6 +37,7 @@ limitations under the License. */
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -110,6 +111,12 @@ PYBIND11_MODULE(core, m) {
BindException
(
&
m
);
m
.
def
(
"_append_python_callable_object_and_return_id"
,
[](
py
::
object
py_obj
)
->
size_t
{
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
.
def
(
py
::
init
<>
())
.
def
(
"_run_backward"
,
...
...
@@ -977,7 +984,6 @@ All parameter, weight, gradient are variables in Paddle.
cannot be updated after being finalized.)DOC"
);
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
size_t
,
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
53619a79
...
...
@@ -22,9 +22,12 @@ from . import op_frequence
from
.op_frequence
import
*
from
.
import
quantize
from
.quantize
import
*
from
.
import
utils
from
.utils
import
*
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
utils
.
__all__
python/paddle/fluid/contrib/utils/__init__.py
浏览文件 @
53619a79
...
...
@@ -13,10 +13,11 @@
# limitations under the License.
from
__future__
import
print_function
#
from . import lookup_table_utils
#
from .lookup_table_utils import *
from
.
import
lookup_table_utils
from
.lookup_table_utils
import
*
from
.
import
hdfs_utils
from
.hdfs_utils
import
*
#__all__ = lookup_table_utils.__all__
__all__
=
hdfs_utils
.
__all__
__all__
=
[]
__all__
+=
lookup_table_utils
.
__all__
__all__
+=
hdfs_utils
.
__all__
python/paddle/fluid/contrib/utils/hdfs_utils.py
浏览文件 @
53619a79
...
...
@@ -14,6 +14,7 @@
"""HDFS Utils"""
import
os
import
sys
import
subprocess
import
multiprocessing
from
datetime
import
datetime
...
...
@@ -24,7 +25,7 @@ import errno
import
logging
__all__
=
[
"HDFSClient"
,
"multi_download"
]
__all__
=
[
"HDFSClient"
,
"multi_download"
,
"multi_upload"
]
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
...
...
@@ -94,11 +95,13 @@ class HDFSClient(object):
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
"""
upload the local file to hdfs
Args:
hdfs_path: hdfs path, target path
local_path: local file path, source path
overwrite: will overwrite the original file
retry_times: max times retry to upload
hdfs_path(str): the hdfs file path
local_path(str): the local file path
overwrite(bool|None): will overwrite the file on HDFS or not
retry_times(int|5): retry times
Returns:
True or False
"""
...
...
@@ -109,7 +112,7 @@ class HDFSClient(object):
_logger
.
warn
(
"The Local path: {} is dir and I will support it later, return"
.
format
(
local_path
))
return
return
False
base
=
os
.
path
.
basename
(
local_path
)
if
not
self
.
is_exist
(
hdfs_path
):
...
...
@@ -141,13 +144,15 @@ class HDFSClient(object):
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
"""
download from hdfs
download file from HDFS
Args:
hdfs_path: hdfs path, target path
local_path: local file path, source path
overwrite: will remove original file and overwrite it.
unzip: ignore this param
Returns
hdfs_path(str): the hdfs file path
local_path(str): the local file path
overwrite(bool|None): will overwrite the file on HDFS or not
unzip(bool|False): if the download file is compressed by zip, unzip it or not.
Returns:
True or False
"""
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
...
...
@@ -188,11 +193,11 @@ class HDFSClient(object):
def
is_exist
(
self
,
hdfs_path
=
None
):
"""
whether the remote hdfs path exists?
whether the remote HDFS path exists
Args:
hdfs_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
hdfs_path(str): the hdfs file path
Returns:
True or False
"""
...
...
@@ -211,11 +216,11 @@ class HDFSClient(object):
def
is_dir
(
self
,
hdfs_path
=
None
):
"""
whether the remote hdfs path exists?
whether the remote HDFS path is directory
Args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
hdfs_path(str): the hdfs file path
Returns:
True or False
"""
...
...
@@ -239,15 +244,15 @@ class HDFSClient(object):
"""
Remove a file or directory from HDFS.
whether the remote HDFS path exists
Args:
param hdfs_path: HDFS path.
param recursive: Recursively delete files and directories. By default,
this method will raise an :class:`HdfsError` if trying to delete a
non-empty directory.
hdfs_path: HDFS path.
Returns:
True or False
This function returns `True` if the deletion was successful and `False` if
no file or directory previously existed at `hdfs_path`.
"""
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
...
...
@@ -273,16 +278,14 @@ class HDFSClient(object):
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
"""
Rename a file or folder.
Move a file or folder on HDFS.
Args:
:param hdfs_src_path: Source path.
:param hdfs_dst_path: Destination path. If the path already exists and is
a directory, the source will be moved into it. If the path exists and is
a file, or if a parent destination directory is missing, this method will
raise an :class:`HdfsError`.
hdfs_path(str): HDFS path.
overwrite(bool|False): If the path already exists and overwrite is False, will return False.
Returns:
This function returns `True` if the rename was successful and `False` if
rename was faild.
True or False
"""
assert
hdfs_src_path
is
not
None
assert
hdfs_dst_path
is
not
None
...
...
@@ -320,17 +323,20 @@ class HDFSClient(object):
raise
def
makedirs
(
self
,
hdfs_path
):
"""Create a remote directory, recursively if necessary.
"""
Create a remote directory, recursively if necessary.
Args:
:param hdfs_path: Remote path. Intermediate directories will be created
appropriately.
hdfs_path(str): Remote path. Intermediate directories will be created appropriately.
Returns:
True
if make a directories was successful, False when make a directiries was failed.
True
or False
"""
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
assert
hdfs_path
is
not
None
if
self
.
is_exist
(
hdfs_path
):
_logger
.
error
(
"HDFS path is exist: {}"
.
format
(
hdfs_path
))
return
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
...
...
@@ -346,11 +352,13 @@ class HDFSClient(object):
def
ls
(
self
,
hdfs_path
):
"""
ls a hdfs_path.
ls directory contents about HDFS hdfs_path
Args:
:param hdfs_path: hdfs_path will be ls.
hdfs_path(str): Remote HDFS path will be ls.
Returns:
This function returns a `list` that contaion all files in the hdfs_path.
List: a contents list about hdfs_path.
"""
assert
hdfs_path
is
not
None
...
...
@@ -378,11 +386,15 @@ class HDFSClient(object):
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
"""
ls a hdfs_path sort by time.
list directory contents about HDFS hdfs_path recursively
Args:
:param hdfs_path: hdfs_path will be ls.
hdfs_path(str): Remote HDFS path.
only_file(bool|True): will discard folders.
sort(bool|True): will be sorted by create time.
Returns:
This function returns a `list` that contaion all files sorted by time in the hdfs_path.
List: a contents list about hdfs_path.
"""
def
sort_by_time
(
v1
,
v2
):
...
...
@@ -422,61 +434,54 @@ class HDFSClient(object):
return
ret_lines
def
multi_
up
load
(
client
,
def
multi_
down
load
(
client
,
hdfs_path
,
local_path
,
multi_processes
=
5
,
overwrite
=
False
):
trainer_id
,
trainers
,
multi_processes
=
5
):
"""
Upload file to hdfs.
Download files from HDFS using multi process.
Args:
:param overwrite: will overwrite hdfs file or no
t
:param multi_processes: the upload data process at the same time, default=5
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
Returns:
client(HDFSClient): instance of HDFSClien
t
hdfs_path(str): path on hdfs
local_path(str): path on local
trainer_id(int): current trainer id
trainers(int): all trainers number
multi_processes(int|5): the download data process at the same time, default=5
Returns:
List:
Download files in local folder.
"""
def
__subprocess_
up
load
(
datas
):
def
__subprocess_
down
load
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
local_path
)
hdfs_re_path
=
os
.
path
.
join
(
hdfs_path
,
re_path
)
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
def
get_local_files
(
path
):
"""
Get all local files
Args:
path: local file path
Returns:
A list that contation all files in the path.
"""
rlist
=
[]
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
if
re_path
==
os
.
curdir
:
sub_local_re_path
=
local_path
else
:
sub_local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
)
client
.
download
(
data
,
sub_local_re_path
)
if
not
os
.
path
.
isdir
(
path
):
return
rlist
assert
isinstance
(
client
,
HDFSClient
)
for
dirname
,
folder
,
files
in
os
.
walk
(
path
):
for
i
in
files
:
t
=
os
.
path
.
join
(
dirname
,
i
)
rlist
.
append
(
t
)
return
rlist
client
.
make_local_dirs
(
local_path
)
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
assert
isinstance
(
client
,
HDFSClient
)
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
all_files
=
get_local_files
(
local_path
)
if
not
all_files
:
_logger
.
info
(
"there are nothing need to upload, exit"
)
return
_logger
.
info
(
"Start {} multi process to upload datas"
.
format
(
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
all_files
[
i
::
multi_processes
]
process_datas
=
need_download
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_
up
load
,
args
=
(
process_datas
,
))
target
=
__subprocess_
down
load
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
...
...
@@ -484,55 +489,84 @@ def multi_upload(client,
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to
up
load datas"
.
format
(
_logger
.
info
(
"Finish {} multi process to
down
load datas"
.
format
(
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
if
re_path
==
os
.
curdir
:
local_re_path
=
os
.
path
.
join
(
local_path
,
data_name
)
else
:
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
def
multi_download
(
client
,
def
getfilelist
(
path
):
rlist
=
[]
for
dir
,
folder
,
file
in
os
.
walk
(
path
):
for
i
in
file
:
t
=
os
.
path
.
join
(
dir
,
i
)
rlist
.
append
(
t
)
for
r
in
rlist
:
print
(
r
)
def
multi_upload
(
client
,
hdfs_path
,
local_path
,
trainer_id
,
trainers
,
file_cnt
,
multi_processes
=
5
):
multi_processes
=
5
,
overwrite
=
False
,
sync
=
True
):
"""
multi_download
Upload files to HDFS using multi process.
Args:
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:param trainer_id: current trainer id
:param trainers: all trainers number
:param file_cnt: all file number
:param multi_processes: the download data process at the same time, default=5
:return: None
client(HDFSClient): instance of HDFSClient
hdfs_path(str): path on hdfs
local_path(str): path on local
multi_processes(int|5): the upload data process at the same time, default=5
overwrite(bool|False): will overwrite file on HDFS or not
sync(bool|True): upload files sync or not.
Returns:
A list that be downloaded.
None
"""
def
__subprocess_
down
load
(
datas
):
def
__subprocess_
up
load
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs
_path
)
local_re_path
=
os
.
path
.
join
(
local
_path
,
re_path
)
client
.
download
(
data
,
local_re_path
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
local
_path
)
hdfs_re_path
=
os
.
path
.
join
(
hdfs
_path
,
re_path
)
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
assert
isinstance
(
client
,
HDFSClient
)
def
get_local_files
(
path
):
rlist
=
[]
client
.
make_local_dirs
(
local_path
)
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
if
not
os
.
path
.
isdir
(
path
):
return
rlist
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)[:
file_cnt
]
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
for
dirname
,
folder
,
files
in
os
.
walk
(
path
):
for
i
in
files
:
t
=
os
.
path
.
join
(
dirname
,
i
)
rlist
.
append
(
t
)
return
rlist
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
assert
isinstance
(
client
,
HDFSClient
)
all_files
=
get_local_files
(
local_path
)
if
not
all_files
:
_logger
.
info
(
"there are nothing need to upload, exit"
)
return
_logger
.
info
(
"Start {} multi process to upload datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
need_download
[
i
::
multi_processes
]
process_datas
=
all_files
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_
down
load
,
args
=
(
process_datas
,
))
target
=
__subprocess_
up
load
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
...
...
@@ -540,18 +574,9 @@ def multi_download(client,
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to
down
load datas"
.
format
(
_logger
.
info
(
"Finish {} multi process to
up
load datas"
.
format
(
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
if
__name__
==
"__main__"
:
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
...
...
python/paddle/fluid/contrib/utils/lookup_table_utils.py
浏览文件 @
53619a79
...
...
@@ -18,14 +18,12 @@ import os
import
time
import
logging
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid
import
io
from
paddle.fluid
import
Program
__all__
=
[
"load_
inference_model"
,
"load_persistable_vars
"
,
"load_
persistables_for_increment"
,
"load_persistables_for_inference
"
,
"convert_dist_to_sparse_program"
]
...
...
@@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program):
return
prefetch_op_tuples
def
convert_dist_to_sparse_program
(
main_program
):
if
not
main_program
.
_distributed_lookup_table
:
def
convert_dist_to_sparse_program
(
program
):
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
when we train model with distributed lookup table but want to do the local inference, we can use
this function to convert the train program with distributed lookup table to sparse lookup table.
:param program(Program): the program must be the trainer program, which will be get by the distribute transpiler.
:return:
program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table.
"""
if
not
program
.
_distributed_lookup_table
:
_logger
.
warn
(
"There are no distributed lookup tables need to be converted"
)
return
# create table param and grad var in pserver program
origin_emb_var
=
"{}.origin"
.
format
(
main_
program
.
_distributed_lookup_table
)
emb_var
=
main_
program
.
_distributed_lookup_table
main_
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
origin_param_var
=
main_
program
.
global_block
().
vars
[
origin_emb_var
]
origin_emb_var
=
"{}.origin"
.
format
(
program
.
_distributed_lookup_table
)
emb_var
=
program
.
_distributed_lookup_table
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
origin_param_var
=
program
.
global_block
().
vars
[
origin_emb_var
]
param_var
=
main_
program
.
global_block
().
create_var
(
param_var
=
program
.
global_block
().
create_var
(
name
=
emb_var
,
shape
=
origin_param_var
.
shape
,
dtype
=
origin_param_var
.
dtype
,
...
...
@@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program):
persistable
=
True
)
# parameter must be selected rows
param_var
.
desc
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
main_
program
.
_sync_with_cpp
()
program
.
_sync_with_cpp
()
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
main_
program
)
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
program
)
split_ids_id
=
prefetch_op_tuples
[
0
]
for
idx
in
range
(
split_ids_id
+
2
,
split_ids_id
-
1
,
-
1
):
main_
program
.
global_block
().
_remove_op
(
idx
)
main_
program
.
desc
.
flush
()
program
.
global_block
().
_remove_op
(
idx
)
program
.
desc
.
flush
()
in_out_pairs
=
zip
(
prefetch_op_tuples
[
1
],
prefetch_op_tuples
[
2
])
for
in_out_pair
in
in_out_pairs
:
idx
=
split_ids_id
ids
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
out
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
__insert_lookup_sparse_table_op
(
main_
program
,
idx
,
ids
,
param_var
,
out
)
main_
program
.
desc
.
flush
()
return
main_
program
ids
=
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
out
=
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
__insert_lookup_sparse_table_op
(
program
,
idx
,
ids
,
param_var
,
out
)
program
.
desc
.
flush
()
return
program
def
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var
):
def
_load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
):
def
_is_checkpoint_var
(
exclude_fluid_vars
=
None
):
"""
the checkpoint will not save or load all the variables.
...
...
@@ -159,7 +166,81 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
return
is_valid
def
_load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
io
.
load_vars
(
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
def
load_persistables_for_increment
(
dirname
,
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
):
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
for increment trainning, the pserver will not only load dense variables,
but also load the suitable lookup table var. Because of slice lookup table
var with HASH, we must load the correct slice var.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var: the distributed lookup tables var name.
:param lookup_table_var_path: the the distributed lookup tables var location.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
main_program
,
lookup_table_var
,
lookup_table_var_path
):
emb_var
=
main_program
.
global_block
().
var
(
lookup_table_var
)
load_program
=
Program
()
load_block
=
load_program
.
global_block
()
load_block
.
append_op
(
type
=
'load'
,
inputs
=
{},
outputs
=
{
'Out'
:
[
emb_var
]},
attrs
=
{
'file_path'
:
lookup_table_var_path
})
executor
.
run
(
load_program
)
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
if
not
os
.
path
.
exists
(
lookup_table_var_path
):
raise
ValueError
(
"There is no file named '%s'"
,
lookup_table_var_path
)
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var
])
__load_lookup_table_vars
(
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_persistables_for_inference
(
dirname
,
executor
,
program
,
lookup_table_var_name
):
"""
WARNING: this function will only be used for inference with distributed lookup table.
Inference with distributed lookup table is a little funky, this function will load distributed
lookup table vars into sparse var, can be used in local inference mode.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var_name: the distributed lookup tables var name.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
lookup_table_vars
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
...
...
@@ -209,30 +290,13 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
global_block
.
append_op
(
type
=
'delete_var'
,
inputs
=
{
'X'
:
sums
})
executor
.
run
(
convert_program
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
lookup_table_vars
=
[
lookup_table_var
]
io
.
load_vars
(
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
_load_lookup_table_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_inference_model
(
dirname
,
executor
,
lookup_table_var_name
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
if
program
:
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
else
:
local_model
=
os
.
path
.
join
(
dirname
,
model_filename
)
with
open
(
local_model
,
"rb"
)
as
f
:
...
...
@@ -244,13 +308,16 @@ def load_inference_model(dirname, executor, lookup_table_var_name):
raise
ValueError
(
"Unsupported program version: %d
\n
"
%
program
.
_version
())
# Binary data also need version.
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var_name
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
__load_lookup_table_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
feed_target_names
=
program
.
desc
.
get_feed_target_names
()
fetch_target_names
=
program
.
desc
.
get_fetch_target_names
()
fetch_targets
=
[
program
.
global_block
().
var
(
name
)
for
name
in
fetch_target_names
]
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
return
[
program
,
feed_target_names
,
fetch_targets
]
return
program
python/paddle/fluid/layers/nn.py
浏览文件 @
53619a79
...
...
@@ -18,7 +18,9 @@ All layers just related to the neural network.
from
__future__
import
print_function
import
numpy
as
np
import
six
import
os
import
inspect
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
,
OpProtoHolder
...
...
@@ -176,6 +178,7 @@ __all__ = [
'merge_selected_rows'
,
'get_tensor_from_selected_rows'
,
'lstm'
,
'py_func'
,
'psroi_pool'
,
'huber_loss'
,
]
...
...
@@ -9327,6 +9330,224 @@ def get_tensor_from_selected_rows(x, name=None):
return
out
class
PyFuncRegistry
(
object
):
_register_funcs
=
[]
def
__init__
(
self
,
func
):
if
func
is
None
or
not
callable
(
func
):
raise
TypeError
(
'func must be a Python function'
)
self
.
_func
=
func
# find named args using reflection
args
=
inspect
.
getargspec
(
self
.
_func
)
if
len
(
args
[
0
])
==
0
and
args
[
1
]
is
None
and
args
[
2
]
is
None
:
# Function with no inputs
self
.
_named_args
=
None
else
:
self
.
_named_args
=
args
[
0
]
self
.
_id
=
core
.
_append_python_callable_object_and_return_id
(
self
)
'''
Why record self here?
1. For debug usage. Users can call
:code:`py_func.registered_func(idx)` method
to find the registered function corresponding
to :code:`idx`.
2. For increasing reference count of self.
It seems that to release Python object
whose reference count is 1 would cause
segmentation fault error in C++ side.
May be lack of Python GC in C++ side?
'''
PyFuncRegistry
.
_register_funcs
.
append
(
self
)
@
classmethod
def
registered_func
(
cls
,
idx
):
return
cls
.
_register_funcs
[
idx
].
_func
@
classmethod
def
registered_func_num
(
cls
):
return
len
(
cls
.
_register_funcs
)
@
property
def
id
(
self
):
return
self
.
_id
def
__call__
(
self
,
*
args
):
if
self
.
_named_args
is
None
:
func_ret
=
self
.
_func
()
else
:
kwargs
=
dict
()
idx
=
0
for
arg
in
self
.
_named_args
:
kwargs
[
arg
]
=
args
[
idx
]
idx
+=
1
func_ret
=
self
.
_func
(
*
args
[
idx
:],
**
kwargs
)
if
not
isinstance
(
func_ret
,
(
list
,
tuple
)):
func_ret
=
(
func_ret
,
)
ret
=
[]
for
each_ret
in
func_ret
:
if
each_ret
is
None
or
isinstance
(
each_ret
,
core
.
LoDTensor
):
ret
.
append
(
each_ret
)
continue
if
not
isinstance
(
each_ret
,
np
.
ndarray
):
each_ret
=
np
.
array
(
each_ret
)
tensor
=
core
.
LoDTensor
()
tensor
.
set
(
each_ret
,
core
.
CPUPlace
())
ret
.
append
(
tensor
)
return
tuple
(
ret
)
@
templatedoc
()
def
py_func
(
func
,
x
,
out
,
backward_func
=
None
,
skip_vars_in_backward_input
=
None
):
"""
PyFunc Operator.
User can use :code:`py_func` to register operators in Python side.
The inputs of :code:`func` is :code:`LoDTensor` and outputs can be
numpy array or :code:`LoDTensor`. Paddle would call the registered
:code:`func` in forward part, and call :code:`backward_func` in
backward part (if :code:`backward_func` is not None).
User should set the right data type and shape of :code:`out` before
calling this function. However, data types and shapes of gradients of
:code:`out` and :code:`x` would be inferred automatically.
Input orders of :code:`backward_func` would be: forward inputs
:code:`x`, forward outputs :code:`out` and backward input gradients of
:code:`out`. If some variables of :code:`out` have no gradient, the input
tensor would be None in Python side. If some variables of :code:`in` have
no gradient, users should return None.
This function can also be used to debug the running network. User can
add a :code:`py_func` operator without output, and print input
:code:`x` inside :code:`func`.
Args:
func (callable): forward Python function.
x (Variable|list(Variable)|tuple(Variable)): inputs of :code:`func`.
out (Variable|list(Variable)|tuple(Variable)): outputs of :code:`func`.
Paddle cannot infer shapes and data types of :code:`out`. Users
should create :code:`out` beforehand.
backward_func (callable|None): backward Python function.
None means no backward. Default None.
skip_vars_in_backward_input (Variable|list(Variable)|tuple(Variable)):
Variables that are not needed in :code:`backward_func` inputs.
These variables must be any of :code:`x` and :code:`out`.
If set, these vars would not be inputs of :code:`backward_func`,
Only useful when :code:`backward_func` is not None. Default None.
Returns:
out (Variable|list(Variable)|tuple(Variable)): input :code:`out`
Examples:
>>> import paddle.fluid as fluid
>>> import six
>>>
>>> def create_tmp_var(name, dtype, shape):
>>> return fluid.default_main_program().current_block().create_var(
>>> name=name, dtype=dtype, shape=shape)
>>>
>>> # tanh activation has been provided by Paddle C++ op
>>> # Here, we only use tanh to be an example to show the usage
>>> # of py_func
>>> def tanh(x):
>>> return np.tanh(x)
>>>
>>> # forward input x is skipped
>>> def tanh_grad(y, dy):
>>> return np.array(dy) * (1 - np.square(np.array(y)))
>>>
>>> def debug_func(x):
>>> print(x)
>>>
>>> def simple_net(img, label):
>>> hidden = img
>>> for idx in six.moves.range(4):
>>> hidden = fluid.layers.fc(hidden, size=200)
>>> new_hidden = create_tmp_var(name='hidden_{}'.format(idx),
>>> dtype=hidden.dtype, shape=hidden.shape)
>>>
>>> # user-defined layers with forward and backward
>>> hidden = fluid.layers.py_func(func=tanh, x=hidden,
>>> out=new_hidden, backward_func=tanh_grad,
>>> skip_vars_in_backward_input=hidden)
>>>
>>> # user-defined debug layers to print variables
>>> fluid.layers.py_func(func=debug_func, x=hidden, out=None)
>>>
>>> prediction = fluid.layers.fc(hidden, size=10, act='softmax')
>>> loss = fluid.layers.cross_entropy(input=prediction, label=label)
>>> return fluid.layers.mean(loss)
"""
helper
=
LayerHelper
(
'py_func'
,
**
locals
())
if
x
is
None
:
x
=
[]
elif
isinstance
(
x
,
Variable
):
x
=
[
x
]
elif
not
isinstance
(
x
,
(
list
,
tuple
)):
raise
TypeError
(
'Input must be Variable/list(Variable)/tuple(Variable)'
)
if
out
is
None
:
out_list
=
[]
elif
isinstance
(
out
,
Variable
):
out_list
=
[
out
]
elif
isinstance
(
out
,
(
list
,
tuple
)):
out_list
=
out
else
:
raise
TypeError
(
'Output must be Variable/list(Variable)/tuple(Variable)'
)
fwd_func_id
=
PyFuncRegistry
(
func
).
id
bwd_func_id
=
PyFuncRegistry
(
backward_func
).
id
if
backward_func
is
not
None
else
-
1
for
each_out
in
out_list
:
if
len
(
each_out
.
shape
)
==
0
:
raise
ValueError
(
'Output shapes of py_func op should be provided by users manually'
)
backward_skip_vars
=
set
()
if
backward_func
is
not
None
and
skip_vars_in_backward_input
is
not
None
:
if
isinstance
(
skip_vars_in_backward_input
,
Variable
):
skip_vars_in_backward_input
=
[
skip_vars_in_backward_input
]
fwd_in_out
=
[
v
.
name
for
v
in
x
]
fwd_in_out
.
extend
([
v
.
name
for
v
in
out_list
])
fwd_in_out
=
set
(
fwd_in_out
)
backward_skip_vars
=
set
()
for
v
in
skip_vars_in_backward_input
:
if
not
v
.
name
in
fwd_in_out
:
raise
ValueError
(
'Variable {} is not found in forward inputs and outputs'
.
format
(
v
.
name
))
backward_skip_vars
.
add
(
v
.
name
)
helper
.
append_op
(
type
=
'py_func'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out_list
},
attrs
=
{
'forward_callable_id'
:
fwd_func_id
,
'backward_callable_id'
:
bwd_func_id
,
'backward_skip_vars'
:
list
(
backward_skip_vars
)
})
return
out
# For debug usage
py_func
.
registered_func
=
PyFuncRegistry
.
registered_func
py_func
.
registered_func_num
=
PyFuncRegistry
.
registered_func_num
@
templatedoc
()
def
psroi_pool
(
input
,
rois
,
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
53619a79
...
...
@@ -92,35 +92,27 @@ class ParallelExecutor(object):
num_trainers
=
1
,
trainer_id
=
0
,
scope
=
None
):
# step1: get places, the places are used in run too.
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
gpus
=
[]
gpus_env
=
os
.
getenv
(
"FLAGS_selected_gpus"
)
if
gpus_env
:
gpus
=
[
int
(
s
)
for
s
in
gpus_env
.
split
(
","
)]
else
:
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
()):
gpus
.
append
(
i
)
for
i
in
gpus
:
p
=
core
.
Place
()
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
gpus
=
[
i
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
())
]
self
.
_places
=
[
core
.
CUDAPlace
(
i
)
for
i
in
gpus
]
else
:
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
i
in
six
.
moves
.
range
(
cpu_num
):
p
=
core
.
Place
()
self
.
_act_places
.
append
(
core
.
CPUPlace
())
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
self
.
_places
=
[
core
.
CPUPlace
()
for
_
in
six
.
moves
.
range
(
cpu_num
)]
assert
self
.
_places
,
"no place for execution"
# step2: init exec_strategy
if
exec_strategy
is
None
:
exec_strategy
=
ExecutionStrategy
()
exec_strategy
.
use_cuda
=
use_cuda
if
exec_strategy
.
num_threads
==
0
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
...
...
@@ -131,49 +123,54 @@ class ParallelExecutor(object):
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exec_strategy
.
num_threads
=
cpu_num
*
2
# step3: init build_strategy
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
trainer_id
=
trainer_id
main
=
main_program
main
=
main
if
main
else
framework
.
default_main_program
()
# step4: get main_program, scope, local_scopes
main
=
main_program
if
main_program
\
else
framework
.
default_main_program
()
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
()
\
if
share_vars_from
else
[]
# step5: check trainers_endpoints, it is used for distribution.
trainers_endpoints
=
main
.
_trainers_endpoints
if
num_trainers
>
1
and
trainers_endpoints
:
assert
num_trainers
==
len
(
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
if
scope
==
None
:
scope
=
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
(
)
if
share_vars_from
else
[]
self
.
persistable_vars
=
[
v
.
name
for
v
in
[
# step5: get persistable_vars, parameter_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars
=
set
([
cpt
.
to_text
(
v
.
name
)
for
v
in
[
var
for
var
in
main
.
list_vars
()
if
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
]
]
])
def
place_obj
(
place
):
p
=
core
.
Place
()
p
.
set_place
(
place
)
return
p
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step6: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
self
.
_places
,
set
([
cpt
.
to_text
(
p
.
name
)
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
]),
set
(
cpt
.
to_text
(
var
)
for
var
in
self
.
persistable_vars
),
main
.
desc
,
places
,
persistable_vars
,
main
.
desc
,
cpt
.
to_text
(
loss_name
)
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
build_strategy
,
num_trainers
,
trainer_id
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
...
...
@@ -261,7 +258,7 @@ class ParallelExecutor(object):
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
self
.
_
act_
places
):
if
len
(
feed
)
!=
len
(
self
.
_places
):
raise
ValueError
(
"Feed a list of tensor, the list should be the same size as places"
)
...
...
@@ -277,7 +274,7 @@ class ParallelExecutor(object):
tensor
=
each
[
feed_name
]
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
tensor
,
self
.
_
act_
places
[
i
])
tmp
.
set
(
tensor
,
self
.
_places
[
i
])
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
...
...
@@ -294,4 +291,4 @@ class ParallelExecutor(object):
@
property
def
device_count
(
self
):
return
len
(
self
.
_
act_
places
)
return
len
(
self
.
_places
)
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
浏览文件 @
53619a79
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
unittest
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
,
TestWithGroup
,
TestWith1x1
,
TestWithInput1x1Filter1x1
class
TestMKLDNN
(
TestConv2dOp
):
...
...
@@ -37,5 +37,23 @@ class TestMKLDNNWithStride(TestWithStride):
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithGroup
(
TestWithGroup
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWith1x1
(
TestWith1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithInput1x1Filter1x1
(
TestWithInput1x1Filter1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
浏览文件 @
53619a79
...
...
@@ -29,7 +29,7 @@ class TestGetTensorFromSelectedRows(unittest.TestCase):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
x_rows
=
[
0
,
5
,
5
,
4
,
20
]
x_rows
=
[
0
,
5
,
5
,
4
,
19
]
height
=
20
row_numel
=
2
...
...
python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
浏览文件 @
53619a79
...
...
@@ -29,8 +29,8 @@ class TestMergeSelectedRows(unittest.TestCase):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
x_rows
=
[
0
,
5
,
5
,
4
,
20
]
out_rows
=
[
0
,
4
,
5
,
20
]
x_rows
=
[
0
,
5
,
5
,
4
,
19
]
out_rows
=
[
0
,
4
,
5
,
19
]
height
=
20
row_numel
=
2
...
...
python/paddle/fluid/tests/unittests/test_py_func_op.py
0 → 100644
浏览文件 @
53619a79
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
paddle.fluid
as
fluid
import
paddle
import
unittest
import
six
import
numpy
as
np
dev_cnt
=
2
if
fluid
.
core
.
is_compiled_with_cuda
():
dev_cnt
=
fluid
.
core
.
get_cuda_device_count
()
os
.
environ
[
'CPU_NUM'
]
=
str
(
dev_cnt
)
def
dummy_func_with_no_input
():
return
float
(
1.0
)
def
dummy_func_with_no_output
(
x
):
pass
def
tanh
(
x
):
return
np
.
tanh
(
x
)
def
tanh_grad
(
y
,
dy
):
return
np
.
array
(
dy
)
*
(
1
-
np
.
square
(
np
.
array
(
y
)))
def
cross_entropy
(
logits
,
labels
):
logits
=
np
.
array
(
logits
)
labels
=
np
.
array
(
labels
)
M
=
logits
.
shape
[
0
]
N
=
logits
.
shape
[
1
]
ret
=
np
.
ndarray
([
M
,
1
]).
astype
(
logits
.
dtype
)
for
idx
in
six
.
moves
.
range
(
M
):
ret
[
idx
][
0
]
=
-
np
.
log
(
logits
[
idx
][
labels
[
idx
][
0
]])
return
ret
def
cross_entropy_grad
(
logits
,
labels
,
bwd_dout
):
logits
=
np
.
array
(
logits
)
labels
=
np
.
array
(
labels
)
bwd_dout
=
np
.
array
(
bwd_dout
)
M
=
logits
.
shape
[
0
]
N
=
logits
.
shape
[
1
]
dlogits
=
np
.
zeros
([
M
,
N
]).
astype
(
logits
.
dtype
)
for
idx
in
six
.
moves
.
range
(
M
):
dlogits
[
idx
][
labels
[
idx
][
0
]]
=
-
bwd_dout
[
idx
]
/
logits
[
idx
][
labels
[
idx
][
0
]]
return
dlogits
,
None
def
simple_fc_net
(
img
,
label
,
use_py_func_op
):
hidden
=
img
for
idx
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
if
not
use_py_func_op
:
hidden
=
fluid
.
layers
.
tanh
(
hidden
)
else
:
new_hidden
=
fluid
.
default_main_program
().
current_block
(
).
create_var
(
name
=
'hidden_{}'
.
format
(
idx
),
dtype
=
'float32'
,
shape
=
hidden
.
shape
)
hidden
=
fluid
.
layers
.
py_func
(
func
=
tanh
,
x
=
hidden
,
out
=
new_hidden
,
backward_func
=
tanh_grad
,
skip_vars_in_backward_input
=
hidden
)
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
if
not
use_py_func_op
:
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
else
:
loss
=
fluid
.
default_main_program
().
current_block
().
create_var
(
name
=
'loss'
,
dtype
=
'float32'
,
shape
=
[
-
1
,
1
])
loss
=
fluid
.
layers
.
py_func
(
func
=
cross_entropy
,
x
=
[
prediction
,
label
],
out
=
loss
,
backward_func
=
cross_entropy_grad
,
skip_vars_in_backward_input
=
loss
)
dummy_var
=
fluid
.
default_main_program
().
current_block
().
create_var
(
name
=
'test_tmp_var'
,
dtype
=
'float32'
,
shape
=
[
1
])
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_input
,
x
=
None
,
out
=
dummy_var
)
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_output
,
x
=
loss
,
out
=
None
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
reader
():
for
_
in
six
.
moves
.
range
(
dev_cnt
*
100
):
yield
np
.
random
.
random
([
784
]),
np
.
random
.
random_integers
(
size
=
[
1
],
low
=
0
,
high
=
9
)
def
test_main
(
use_cuda
,
use_py_func_op
,
use_parallel_executor
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
None
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
core
.
Scope
()):
fluid
.
default_main_program
().
random_seed
=
1
fluid
.
default_startup_program
().
random_seed
=
1
np
.
random
.
seed
(
1
)
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
simple_fc_net
(
img
,
label
,
use_py_func_op
)
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
1e-3
)
optimizer
.
minimize
(
loss
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
place
)
r
=
paddle
.
batch
(
reader
,
batch_size
=
10
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
if
use_parallel_executor
:
exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
)
fetch_list
=
[
loss
.
name
]
else
:
fetch_list
=
[
loss
]
ret
=
[]
for
epoch_id
in
six
.
moves
.
range
(
2
):
for
d
in
r
():
L
,
=
exe
.
run
(
feed
=
feeder
.
feed
(
d
),
fetch_list
=
fetch_list
)
ret
.
append
(
L
)
return
np
.
array
(
ret
)
class
TestPyFuncOpUseExecutor
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
use_parallel_executor
=
False
def
test_loss_diff
(
self
):
losses
=
[]
for
use_cuda
in
[
True
,
False
]:
for
use_py_func_op
in
[
True
,
False
]:
L
=
test_main
(
use_cuda
,
use_py_func_op
,
self
.
use_parallel_executor
)
if
L
is
not
None
:
losses
.
append
(
L
)
for
idx
in
six
.
moves
.
range
(
len
(
losses
)
-
1
):
max_diff
=
np
.
max
(
np
.
abs
(
losses
[
idx
]
-
losses
[
0
]))
self
.
assertAlmostEqual
(
max_diff
,
0
,
delta
=
1e-3
)
class
TestPyFuncOpUseParallelExecutor
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
use_parallel_executor
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/setup.py.in
浏览文件 @
53619a79
...
...
@@ -107,9 +107,9 @@ packages=['paddle',
'paddle.fluid.distributed',
'paddle.fluid.layers',
'paddle.fluid.contrib',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.utils',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details']
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录