Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d94920ce
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d94920ce
编写于
9月 26, 2018
作者:
D
Dang Qingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into quantize_transpiler_update
上级
44791f42
01fda934
变更
46
隐藏空白更改
内联
并排
Showing
46 changed file
with
839 addition
and
315 deletion
+839
-315
cmake/configure.cmake
cmake/configure.cmake
+19
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+10
-10
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+2
-0
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+8
-26
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
...e/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
+7
-10
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+1
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+1
-2
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+11
-6
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+3
-1
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+1
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+44
-13
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+15
-51
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+11
-8
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+5
-0
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+199
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-9
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+19
-18
paddle/fluid/operators/pool_mkldnn_op.cc
paddle/fluid/operators/pool_mkldnn_op.cc
+35
-6
paddle/fluid/operators/read_op.cc
paddle/fluid/operators/read_op.cc
+2
-0
paddle/fluid/operators/sequence_slice_op.h
paddle/fluid/operators/sequence_slice_op.h
+2
-2
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+21
-20
paddle/fluid/operators/shrink_rnn_memory_op.cc
paddle/fluid/operators/shrink_rnn_memory_op.cc
+21
-8
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+1
-1
paddle/fluid/operators/top_k_op.cc
paddle/fluid/operators/top_k_op.cc
+2
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+5
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+5
-0
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+33
-10
paddle/fluid/platform/for_range.h
paddle/fluid/platform/for_range.h
+29
-10
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+17
-0
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+3
-0
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+3
-0
paddle/fluid/train/CMakeLists.txt
paddle/fluid/train/CMakeLists.txt
+30
-0
paddle/fluid/train/test_train_recognize_digits.cc
paddle/fluid/train/test_train_recognize_digits.cc
+89
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+4
-2
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+2
-1
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+4
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+10
-1
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+27
-28
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+2
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+72
-19
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+1
-22
python/paddle/fluid/param_attr.py
python/paddle/fluid/param_attr.py
+28
-3
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+11
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+19
-16
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
d94920ce
...
@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING)
...
@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING)
endif
()
endif
()
if
(
WIN32
)
if
(
WIN32
)
# windows
stupid compile
option for all targets.
# windows
header
option for all targets.
add_definitions
(
-D_XKEYCHECK_H
)
add_definitions
(
-D_XKEYCHECK_H
)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1
)
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1
)
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1
)
SET
(
CMAKE_C_RESPONSE_FILE_LINK_FLAG
"@"
)
SET
(
CMAKE_CXX_RESPONSE_FILE_LINK_FLAG
"@"
)
# Specify the program to use when building static libraries
SET
(
CMAKE_C_CREATE_STATIC_LIBRARY
"<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>"
)
SET
(
CMAKE_CXX_CREATE_STATIC_LIBRARY
"<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>"
)
# set defination for the dll export
if
(
NOT MSVC
)
message
(
FATAL
"Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA."
)
endif
(
NOT MSVC
)
endif
(
WIN32
)
endif
(
WIN32
)
if
(
NOT WITH_GOLANG
)
if
(
NOT WITH_GOLANG
)
...
...
paddle/fluid/API.spec
浏览文件 @
d94920ce
...
@@ -41,7 +41,7 @@ paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id',
...
@@ -41,7 +41,7 @@ paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id',
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=
'kwargs'
, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=
None
, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None
...
@@ -162,14 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
...
@@ -162,14 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, Tru
e, None, None))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, Non
e, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
@@ -378,7 +378,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non
...
@@ -378,7 +378,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'
], varargs=None, keywords='kwargs', defaults=(None,
))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'
, 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False
))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
d94920ce
...
@@ -13,3 +13,5 @@ if(WITH_INFERENCE)
...
@@ -13,3 +13,5 @@ if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last.
# NOTE: please add subdirectory inference at last.
add_subdirectory
(
inference
)
add_subdirectory
(
inference
)
endif
()
endif
()
add_subdirectory
(
train
)
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
浏览文件 @
d94920ce
...
@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
...
@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
"conv_relu_mkldnn_fuse"
,
graph
.
get
());
FusePassBase
::
Init
(
"conv_relu_mkldnn_fuse"
,
graph
.
get
());
std
::
unordered_set
<
Node
*>
nodes2delete
;
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
"conv_relu_mkldnn_fuse/conv_input"
)
->
NewNode
(
"conv_relu_mkldnn_fuse/conv_input"
)
...
@@ -42,36 +40,20 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
...
@@ -42,36 +40,20 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
Graph
*
g
)
{
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvReLU fuse"
;
VLOG
(
4
)
<<
"handle ConvReLU fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
conv_relu_pattern
);
// Filter
conv_relu_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_bias
,
conv_bias
,
conv_relu_pattern
);
// Bias
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv
,
conv
,
conv_relu_pattern
);
// CONV op
GET_IR_NODE_FROM_SUBGRAPH
(
conv
,
conv
,
conv_relu_pattern
);
// CONV op
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
conv_relu_pattern
);
// Out
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
conv_relu_pattern
);
// Out
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
conv_relu_pattern
);
// ReLU op
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
conv_relu_pattern
);
// ReLU op
// Create an ConvReLU Node.
// Transform Conv node into ConvReLU node.
OpDesc
desc
;
OpDesc
*
desc
=
conv
->
Op
();
std
::
string
conv_relu_i_in
=
subgraph
.
at
(
conv_input
)
->
Name
();
desc
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
relu_out
->
Name
()}));
std
::
string
conv_relu_w_in
=
conv_weight
->
Name
();
desc
->
SetAttr
(
"fuse_relu"
,
true
);
std
::
string
conv_relu_b_in
=
conv_bias
->
Name
();
GraphSafeRemoveNodes
(
graph
.
get
(),
{
relu
,
conv_out
});
std
::
string
conv_relu_out
=
relu_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
conv_relu_i_in
}));
desc
.
SetInput
(
"Filter"
,
std
::
vector
<
std
::
string
>
({
conv_relu_w_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
conv_relu_b_in
}));
desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
conv_relu_out
}));
desc
.
SetType
(
"conv2d"
);
for
(
auto
&
attr
:
conv
->
Op
()
->
GetAttrMap
())
{
desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
desc
.
SetAttr
(
"fuse_relu"
,
true
);
auto
conv_relu_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv
,
relu
,
conv_out
});
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
IR_NODE_LINK_TO
(
subgraph
.
at
(
conv_input
),
conv_relu_node
);
IR_NODE_LINK_TO
(
conv
,
relu_out
);
IR_NODE_LINK_TO
(
conv_weight
,
conv_relu_node
);
IR_NODE_LINK_TO
(
conv_bias
,
conv_relu_node
);
IR_NODE_LINK_TO
(
conv_relu_node
,
relu_out
);
found_conv_relu_count
++
;
found_conv_relu_count
++
;
};
};
...
...
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
浏览文件 @
d94920ce
...
@@ -85,16 +85,13 @@ TEST(ConvReLUFusePass, basic) {
...
@@ -85,16 +85,13 @@ TEST(ConvReLUFusePass, basic) {
for
(
auto
*
node
:
graph
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
()
->
Type
()
==
"conv2d"
)
{
if
(
node
->
IsOp
()
&&
node
->
Op
()
->
Type
()
==
"conv2d"
)
{
if
(
node
->
Op
()
->
HasAttr
(
"use_mkldnn"
))
{
auto
*
op
=
node
->
Op
();
bool
use_mkldnn
=
boost
::
get
<
bool
>
(
node
->
Op
()
->
GetAttr
(
"use_mkldnn"
));
ASSERT_TRUE
(
op
->
HasAttr
(
"use_mkldnn"
));
if
(
use_mkldnn
)
{
EXPECT_TRUE
(
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"use_mkldnn"
)));
if
(
node
->
Op
()
->
HasAttr
(
"fuse_relu"
))
{
ASSERT_TRUE
(
op
->
HasAttr
(
"fuse_relu"
));
bool
fuse_relu
=
boost
::
get
<
bool
>
(
node
->
Op
()
->
GetAttr
(
"fuse_relu"
));
bool
fuse_relu
=
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"fuse_relu"
));
if
(
fuse_relu
)
{
if
(
fuse_relu
)
{
++
conv_relu_count
;
++
conv_relu_count
;
}
}
}
}
}
}
}
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
d94920ce
...
@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()(
...
@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()(
->
AsInput
()
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
// Bias
auto
*
conv_bias_var
=
pattern
->
NewNode
(
conv_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Bias"
);
// intermediate variable, will be removed in the IR after fuse.
// intermediate variable, will be removed in the IR after fuse.
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
->
AsIntermediate
()
...
@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()(
...
@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()(
->
AsOutput
()
->
AsOutput
()
->
assert_is_op_output
(
"relu"
);
->
assert_is_op_output
(
"relu"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
,
conv_bias_var
})
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
.
LinksTo
({
conv_out_var
});
relu_op
->
LinksFrom
({
conv_out_var
}).
LinksTo
({
relu_out_var
});
relu_op
->
LinksFrom
({
conv_out_var
}).
LinksTo
({
relu_out_var
});
return
relu_out_var
;
return
relu_out_var
;
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
d94920ce
...
@@ -379,7 +379,7 @@ struct PatternBase {
...
@@ -379,7 +379,7 @@ struct PatternBase {
// op: conv + relu
// op: conv + relu
// named nodes:
// named nodes:
// conv_input, conv_weight,
// conv_input, conv_weight,
// conv_
bias, conv_
out, conv,
// conv_out, conv,
// relu_out, relu
// relu_out, relu
struct
ConvReLU
:
public
PatternBase
{
struct
ConvReLU
:
public
PatternBase
{
ConvReLU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
ConvReLU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
...
@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase {
...
@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase {
PATTERN_DECL_NODE
(
relu
);
PATTERN_DECL_NODE
(
relu
);
// declare variable node's name
// declare variable node's name
PATTERN_DECL_NODE
(
conv_weight
);
PATTERN_DECL_NODE
(
conv_weight
);
PATTERN_DECL_NODE
(
conv_bias
);
PATTERN_DECL_NODE
(
conv_out
);
PATTERN_DECL_NODE
(
conv_out
);
PATTERN_DECL_NODE
(
relu_out
);
PATTERN_DECL_NODE
(
relu_out
);
};
};
...
...
paddle/fluid/framework/op_info.h
浏览文件 @
d94920ce
...
@@ -38,27 +38,31 @@ struct OpInfo {
...
@@ -38,27 +38,31 @@ struct OpInfo {
OpAttrChecker
*
checker_
{
nullptr
};
OpAttrChecker
*
checker_
{
nullptr
};
InferVarTypeFN
infer_var_type_
;
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
InferShapeFN
infer_shape_
;
std
::
string
op_type_
;
bool
HasOpProtoAndChecker
()
const
{
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
}
}
const
proto
::
OpProto
&
Proto
()
const
{
const
proto
::
OpProto
&
Proto
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator Proto has not been registered"
);
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator %s Proto has not been registered"
,
op_type_
);
PADDLE_ENFORCE
(
proto_
->
IsInitialized
(),
PADDLE_ENFORCE
(
proto_
->
IsInitialized
(),
"Operator Proto must be initialized in op info"
);
"Operator %s Proto must be initialized in op info"
,
op_type_
);
return
*
proto_
;
return
*
proto_
;
}
}
const
OpCreator
&
Creator
()
const
{
const
OpCreator
&
Creator
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
creator_
,
PADDLE_ENFORCE_NOT_NULL
(
"Operator Creator has not been registered"
);
creator_
,
"Operator %s Creator has not been registered"
,
op_type_
);
return
creator_
;
return
creator_
;
}
}
const
GradOpMakerFN
&
GradOpMaker
()
const
{
const
GradOpMakerFN
&
GradOpMaker
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
grad_op_maker_
,
PADDLE_ENFORCE_NOT_NULL
(
grad_op_maker_
,
"Operator GradOpMaker has not been registered."
);
"Operator %s GradOpMaker has not been registered."
,
op_type_
);
return
grad_op_maker_
;
return
grad_op_maker_
;
}
}
...
@@ -73,8 +77,9 @@ class OpInfoMap {
...
@@ -73,8 +77,9 @@ class OpInfoMap {
return
map_
.
find
(
op_type
)
!=
map_
.
end
();
return
map_
.
find
(
op_type
)
!=
map_
.
end
();
}
}
void
Insert
(
const
std
::
string
&
type
,
const
OpInfo
&
info
)
{
void
Insert
(
const
std
::
string
&
type
,
OpInfo
info
)
{
PADDLE_ENFORCE
(
!
Has
(
type
),
"Operator %s has been registered"
,
type
);
PADDLE_ENFORCE
(
!
Has
(
type
),
"Operator %s has been registered"
,
type
);
info
.
op_type_
=
type
;
map_
.
insert
({
type
,
info
});
map_
.
insert
({
type
,
info
});
}
}
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
d94920ce
...
@@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
...
@@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
Validate
();
}
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
d94920ce
...
@@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker {
...
@@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
d94920ce
...
@@ -14,15 +14,17 @@ limitations under the License. */
...
@@ -14,15 +14,17 @@ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/operator.h"
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op
erato
r.h"
#include "paddle/fluid/framework/op
_proto_make
r.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -140,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
...
@@ -140,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
try
{
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
platform
::
SetDeviceId
(
dev_id
);
#endif
#endif
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
@@ -180,7 +211,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
...
@@ -180,7 +211,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
}
}
bool
OperatorBase
::
HasOutputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasOutputs
(
const
std
::
string
&
name
)
const
{
if
(
outputs_
.
find
(
name
)
!=
outputs_
.
end
(
))
{
if
(
outputs_
.
end
()
!=
outputs_
.
find
(
name
))
{
return
true
;
return
true
;
}
else
{
}
else
{
return
false
;
return
false
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d94920ce
...
@@ -76,10 +76,10 @@ bool AnalysisPredictor::Init(
...
@@ -76,10 +76,10 @@ bool AnalysisPredictor::Init(
}
}
OptimizeInferenceProgram
();
OptimizeInferenceProgram
();
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
if
(
config_
.
_use_mkldnn
)
{
if
(
config_
.
_use_mkldnn
)
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
executor_
->
EnableMKLDNN
(
*
inference_program_
);
}
}
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
5
)
<<
"to create variables"
;
VLOG
(
5
)
<<
"to create variables"
;
PADDLE_ENFORCE
(
scope_
.
get
());
PADDLE_ENFORCE
(
scope_
.
get
());
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
d94920ce
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
template
<
typename
T
>
template
<
typename
T
>
void
NativePaddlePredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
void
NativePaddlePredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
PaddleTensor
*
output
)
{
PaddleTensor
*
output
)
{
std
::
vector
<
int
>
shape
;
// set shape.
auto
dims_i
=
fetch
.
dims
();
auto
shape
=
framework
::
vectorize
(
fetch
.
dims
());
auto
lod
=
fetch
.
lod
();
output
->
shape
.
assign
(
shape
.
begin
(),
shape
.
end
());
const
T
*
output_ptr
=
fetch
.
data
<
T
>
();
// set data.
auto
num
=
fetch
.
numel
();
const
T
*
data
=
fetch
.
data
<
T
>
();
std
::
vector
<
T
>
data
;
int
num_elems
=
inference
::
VecReduceToInt
(
shape
);
if
(
0
==
lod
.
size
())
{
output
->
data
.
Resize
(
num_elems
*
sizeof
(
T
));
std
::
copy
(
output_ptr
,
output_ptr
+
num
,
std
::
back_inserter
(
data
));
// The fetched tensor output by fetch op, should always in CPU memory, so just
for
(
int
j
=
0
;
j
<
dims_i
.
size
();
++
j
)
{
// copy.
shape
.
push_back
(
dims_i
[
j
]);
memcpy
(
output
->
data
.
data
(),
data
,
num_elems
*
sizeof
(
T
));
}
// set lod
}
else
{
output
->
lod
.
clear
();
// for batch detection
for
(
auto
&
level
:
fetch
.
lod
())
{
// image[0] -> output[0] shape {145, 6}
output
->
lod
.
emplace_back
(
level
.
begin
(),
level
.
end
());
// image[1] -> output[1] shape {176, 6}
// then,
// the batch output shape {321, 6}
// the lod {{0, 145, 321}}
// so we should append output[0] to {176, 6}
size_t
max_dim
=
0
;
for
(
size_t
j
=
1
;
j
<
lod
[
0
].
size
();
j
++
)
{
max_dim
=
std
::
max
(
max_dim
,
lod
[
0
][
j
]
-
lod
[
0
][
j
-
1
]);
}
size_t
common_dim
=
lod
[
0
].
back
()
==
0
?
0
:
num
/
lod
[
0
].
back
();
if
(
max_dim
>
0
)
{
data
.
resize
((
lod
[
0
].
size
()
-
1
)
*
max_dim
*
common_dim
,
0
);
}
for
(
size_t
j
=
1
;
j
<
lod
[
0
].
size
();
j
++
)
{
size_t
start
=
lod
[
0
][
j
-
1
]
*
common_dim
;
size_t
end
=
lod
[
0
][
j
]
*
common_dim
;
if
(
end
>
start
)
{
std
::
copy
(
output_ptr
+
start
,
output_ptr
+
end
,
data
.
begin
()
+
(
j
-
1
)
*
max_dim
*
common_dim
);
}
}
shape
.
push_back
(
lod
[
0
].
size
()
-
1
);
shape
.
push_back
(
max_dim
);
for
(
int
j
=
1
;
j
<
dims_i
.
size
();
++
j
)
{
shape
.
push_back
(
dims_i
[
j
]);
}
}
output
->
shape
=
shape
;
auto
&
buffer
=
output
->
data
;
if
(
buffer
.
empty
()
||
buffer
.
length
()
<
sizeof
(
T
)
*
data
.
size
())
{
buffer
.
Resize
(
sizeof
(
T
)
*
data
.
size
());
}
std
::
memcpy
(
buffer
.
data
(),
data
.
data
(),
sizeof
(
T
)
*
data
.
size
());
// copy LoD
for
(
const
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
);
}
}
}
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
d94920ce
...
@@ -74,13 +74,17 @@ template <>
...
@@ -74,13 +74,17 @@ template <>
std
::
string
to_string
<
std
::
vector
<
std
::
vector
<
float
>>>
(
std
::
string
to_string
<
std
::
vector
<
std
::
vector
<
float
>>>
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
&
vec
);
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
&
vec
);
template
<
typename
T
>
int
VecReduceToInt
(
const
std
::
vector
<
T
>
&
v
)
{
return
std
::
accumulate
(
v
.
begin
(),
v
.
end
(),
1
,
[](
T
a
,
T
b
)
{
return
a
*
b
;
});
}
template
<
typename
T
>
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
// Assign buffer
// Assign buffer
int
dim
=
std
::
accumulate
(
tensor
->
shape
.
begin
(),
tensor
->
shape
.
end
(),
1
,
int
num_elems
=
VecReduceToInt
(
tensor
->
shape
);
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
tensor
->
data
.
Resize
(
sizeof
(
T
)
*
num_elems
);
tensor
->
data
.
Resize
(
sizeof
(
T
)
*
dim
);
int
c
=
0
;
int
c
=
0
;
for
(
const
auto
&
f
:
data
)
{
for
(
const
auto
&
f
:
data
)
{
for
(
T
v
:
f
)
{
for
(
T
v
:
f
)
{
...
@@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor,
...
@@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
st
atic
st
d
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
os
<<
" - type: "
;
...
@@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
...
@@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os
<<
"
\n
"
;
os
<<
"
\n
"
;
os
<<
" - data: "
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
int
dim
=
VecReduceToInt
(
tensor
.
shape
);
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
}
...
@@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
...
@@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
return
os
.
str
();
return
os
.
str
();
}
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms ======"
;
<<
", latency: "
<<
latency
<<
"ms ======"
;
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d94920ce
...
@@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
...
@@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_text_classification
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
analyzer_text_classification_tester.cc
)
inference_analysis_api_test
(
test_analyzer_text_classification
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
analyzer_text_classification_tester.cc
)
# seq_conv1
set
(
SEQ_CONV1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/seq_conv1"
)
download_model_and_data
(
${
SEQ_CONV1_INSTALL_DIR
}
"seq_conv1_model.tar.gz"
"seq_conv1_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_seq_conv1
${
SEQ_CONV1_INSTALL_DIR
}
analyzer_seq_conv1_tester.cc
)
# ocr
# ocr
set
(
OCR_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/ocr"
)
set
(
OCR_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/ocr"
)
if
(
NOT EXISTS
${
OCR_INSTALL_DIR
}
)
if
(
NOT EXISTS
${
OCR_INSTALL_DIR
}
)
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
0 → 100644
浏览文件 @
d94920ce
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
Load
(
path
);
}
DataRecord
NextBatch
()
{
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
title1_all
.
begin
()
+
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
title2_all
.
begin
()
+
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
}
batch_iter
+=
batch_size
;
return
data
;
}
void
Load
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
std
::
string
line
;
int
num_lines
=
0
;
while
(
std
::
getline
(
file
,
line
))
{
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
split
(
line
,
'\t'
,
&
data
);
// load title1 data
std
::
vector
<
int64_t
>
title1_data
;
split_to_int64
(
data
[
0
],
' '
,
&
title1_data
);
// load title2 data
std
::
vector
<
int64_t
>
title2_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title2_data
);
// load title3 data
std
::
vector
<
int64_t
>
title3_data
;
split_to_int64
(
data
[
2
],
' '
,
&
title3_data
);
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1_all
.
push_back
(
std
::
move
(
title1_data
));
title2_all
.
push_back
(
std
::
move
(
title2_data
));
title3_all
.
push_back
(
std
::
move
(
title3_data
));
l1_all
.
push_back
(
std
::
move
(
l1_data
));
}
num_samples
=
num_lines
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
PaddleTensor
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
;
title1_tensor
.
name
=
"title1"
;
title2_tensor
.
name
=
"title2"
;
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
tensor
.
dtype
=
PaddleDType
::
INT64
;
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
// Easy for profiling independently.
TEST
(
Analyzer_seq_conv1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
EXPECT_LT
(
result
[
i
],
1
);
}
}
}
// Check the fuse status
TEST
(
Analyzer_seq_conv1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_seq_conv1
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
d94920ce
...
@@ -47,11 +47,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
...
@@ -47,11 +47,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
out
=
outputs
[
i
];
auto
&
ref_out
=
ref_outputs
[
i
];
auto
&
ref_out
=
ref_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
size_t
size
=
VecReduceToInt
(
out
.
shape
);
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
ref_size
=
VecReduceToInt
(
ref_out
.
shape
);
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_GT
(
size
,
0
);
EXPECT_GT
(
size
,
0
);
EXPECT_EQ
(
size
,
ref_size
);
EXPECT_EQ
(
size
,
ref_size
);
EXPECT_EQ
(
out
.
dtype
,
ref_out
.
dtype
);
EXPECT_EQ
(
out
.
dtype
,
ref_out
.
dtype
);
...
@@ -87,10 +84,7 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
...
@@ -87,10 +84,7 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
}
}
}
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
return
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
int
*
num_ops
)
{
int
*
num_ops
)
{
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
d94920ce
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include <string>
#include "paddle/fluid/operators/mkldnn_activation_op.h"
#include "paddle/fluid/operators/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
...
@@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
}
}
};
};
__attribute__
((
unused
))
constexpr
char
SigmoidDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SigmoidDoc
[]
=
R"DOC(
Sigmoid Activation Operator
Sigmoid Activation Operator
$$out = \frac{1}{1 + e^{-x}}$$
$$out = \frac{1}{1 + e^{-x}}$$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
LogSigmoidDoc
[]
=
R"DOC(
UNUSED
constexpr
char
LogSigmoidDoc
[]
=
R"DOC(
Logsigmoid Activation Operator
Logsigmoid Activation Operator
$$out = \\log \\frac{1}{1 + e^{-x}}$$
$$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ExpDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ExpDoc
[]
=
R"DOC(
Exp Activation Operator.
Exp Activation Operator.
$out = e^x$
$out = e^x$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ReluDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ReluDoc
[]
=
R"DOC(
Relu Activation Operator.
Relu Activation Operator.
$out = \max(x, 0)$
$out = \max(x, 0)$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
TanhDoc
[]
=
R"DOC(
UNUSED
constexpr
char
TanhDoc
[]
=
R"DOC(
Tanh Activation Operator.
Tanh Activation Operator.
$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
UNUSED
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
TanhShrink Activation Operator.
TanhShrink Activation Operator.
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SqrtDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SqrtDoc
[]
=
R"DOC(
Sqrt Activation Operator.
Sqrt Activation Operator.
$out = \sqrt{x}$
$out = \sqrt{x}$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
AbsDoc
[]
=
R"DOC(
UNUSED
constexpr
char
AbsDoc
[]
=
R"DOC(
Abs Activation Operator.
Abs Activation Operator.
$out = |x|$
$out = |x|$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
CeilDoc
[]
=
R"DOC(
UNUSED
constexpr
char
CeilDoc
[]
=
R"DOC(
Ceil Activation Operator.
Ceil Activation Operator.
$out = ceil(x)$
$out = ceil(x)$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
FloorDoc
[]
=
R"DOC(
UNUSED
constexpr
char
FloorDoc
[]
=
R"DOC(
Floor Activation Operator.
Floor Activation Operator.
$out = floor(x)$
$out = floor(x)$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
CosDoc
[]
=
R"DOC(
UNUSED
constexpr
char
CosDoc
[]
=
R"DOC(
Cosine Activation Operator.
Cosine Activation Operator.
$out = cos(x)$
$out = cos(x)$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SinDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SinDoc
[]
=
R"DOC(
Sine Activation Operator.
Sine Activation Operator.
$out = sin(x)$
$out = sin(x)$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
RoundDoc
[]
=
R"DOC(
UNUSED
constexpr
char
RoundDoc
[]
=
R"DOC(
Round Activation Operator.
Round Activation Operator.
$out = [x]$
$out = [x]$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ReciprocalDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ReciprocalDoc
[]
=
R"DOC(
Reciprocal Activation Operator.
Reciprocal Activation Operator.
$$out = \\frac{1}{x}$$
$$out = \\frac{1}{x}$$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
LogDoc
[]
=
R"DOC(
UNUSED
constexpr
char
LogDoc
[]
=
R"DOC(
Log Activation Operator.
Log Activation Operator.
$out = \ln(x)$
$out = \ln(x)$
...
@@ -212,21 +213,21 @@ Natural logarithm of x.
...
@@ -212,21 +213,21 @@ Natural logarithm of x.
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SquareDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SquareDoc
[]
=
R"DOC(
Square Activation Operator.
Square Activation Operator.
$out = x^2$
$out = x^2$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SoftplusDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SoftplusDoc
[]
=
R"DOC(
Softplus Activation Operator.
Softplus Activation Operator.
$out = \ln(1 + e^{x})$
$out = \ln(1 + e^{x})$
)DOC"
;
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SoftsignDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SoftsignDoc
[]
=
R"DOC(
Softsign Activation Operator.
Softsign Activation Operator.
$$out = \frac{x}{1 + |x|}$$
$$out = \frac{x}{1 + |x|}$$
...
...
paddle/fluid/operators/pool_mkldnn_op.cc
浏览文件 @
d94920ce
...
@@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims,
...
@@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims,
dims2str
(
paddings
)
+
pooling_type
+
suffix
;
dims2str
(
paddings
)
+
pooling_type
+
suffix
;
}
}
static
inline
int
ComputeCeiledOutput
(
int
input_size
,
int
kernel_size
,
int
padding
,
int
stride
)
{
return
(
input_size
-
kernel_size
+
2
*
padding
)
/
stride
+
1
;
}
static
inline
void
CorrectOutputSize
(
const
std
::
vector
<
int
>&
src_tz
,
const
std
::
vector
<
int
>&
dst_tz
,
const
std
::
vector
<
int
>&
kernel_size
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
right_bot_padding
)
{
// NOLINT
for
(
size_t
i
=
0
;
i
<
right_bot_padding
.
size
();
i
++
)
{
int
desired_size
=
ComputeCeiledOutput
(
src_tz
[
i
+
2
],
kernel_size
[
i
],
paddings
[
i
],
strides
[
i
]);
if
(
desired_size
!=
dst_tz
[
i
+
2
])
{
right_bot_padding
[
i
]
+=
strides
[
i
];
}
}
}
template
<
typename
T
>
template
<
typename
T
>
class
PoolMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
PoolMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
pool_p
=
auto
pool_p
=
std
::
static_pointer_cast
<
pooling_forward
>
(
dev_ctx
.
GetBlob
(
key_pool_p
));
std
::
static_pointer_cast
<
pooling_forward
>
(
dev_ctx
.
GetBlob
(
key_pool_p
));
if
(
pool_p
==
nullptr
)
{
if
(
pool_p
==
nullptr
)
{
const
std
::
vector
<
int
>&
padding_left_top
(
paddings
);
std
::
vector
<
int
>
padding_right_bottom
(
paddings
);
bool
ceil_mode
=
ctx
.
Attr
<
bool
>
(
"ceil_mode"
);
if
(
ceil_mode
)
{
CorrectOutputSize
(
src_tz
,
dst_tz
,
ksize
,
paddings
,
strides
,
padding_right_bottom
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input_format
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input_format
);
...
@@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn
::
memory
::
format
::
any
);
mkldnn
::
memory
::
format
::
any
);
std
::
shared_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
pool_pd
=
std
::
shared_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
pool_pd
=
CreatePrimitiveDesc
(
src_md
,
dst_md
,
strides
,
paddings
,
ksize
,
CreatePrimitiveDesc
(
src_md
,
dst_md
,
strides
,
padding_left_top
,
pooling_type
,
mkldnn_engine
);
padding_right_bottom
,
ksize
,
pooling_type
,
mkldnn_engine
,
ceil_mode
);
// save pool_pd into global device context to be referred in backward path
// save pool_pd into global device context to be referred in backward path
dev_ctx
.
SetBlob
(
key_pool_pd
,
pool_pd
);
dev_ctx
.
SetBlob
(
key_pool_pd
,
pool_pd
);
...
@@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
private:
private:
std
::
unique_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
CreatePrimitiveDesc
(
std
::
unique_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
CreatePrimitiveDesc
(
const
mkldnn
::
memory
::
desc
&
src
,
const
mkldnn
::
memory
::
desc
&
dst
,
const
mkldnn
::
memory
::
desc
&
src
,
const
mkldnn
::
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding_left_top
,
const
std
::
vector
<
int
>&
kernel
,
const
std
::
string
&
pooling_type
,
const
std
::
vector
<
int
>&
padding_right_bot
,
const
std
::
vector
<
int
>&
kernel
,
const
mkldnn
::
engine
&
engine
)
const
{
const
std
::
string
&
pooling_type
,
const
mkldnn
::
engine
&
engine
,
bool
ceil_mode
)
const
{
auto
pool_desc
=
mkldnn
::
pooling_forward
::
desc
(
auto
pool_desc
=
mkldnn
::
pooling_forward
::
desc
(
mkldnn
::
prop_kind
::
forward
,
mkldnn
::
prop_kind
::
forward
,
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
mkldnn
::
algorithm
::
pooling_avg
,
:
mkldnn
::
algorithm
::
pooling_avg
,
src
,
dst
,
stride
,
kernel
,
padding
,
padding
,
mkldnn
::
padding_kind
::
zero
);
src
,
dst
,
stride
,
kernel
,
padding_left_top
,
padding_right_bot
,
mkldnn
::
padding_kind
::
zero
);
auto
p_pool_pd
=
auto
p_pool_pd
=
new
mkldnn
::
pooling_forward
::
primitive_desc
(
pool_desc
,
engine
);
new
mkldnn
::
pooling_forward
::
primitive_desc
(
pool_desc
,
engine
);
...
...
paddle/fluid/operators/read_op.cc
浏览文件 @
d94920ce
...
@@ -45,10 +45,12 @@ class ReadInferVarType : public framework::VarTypeInference {
...
@@ -45,10 +45,12 @@ class ReadInferVarType : public framework::VarTypeInference {
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
auto
dtypes
=
reader
->
GetDataTypes
();
auto
dtypes
=
reader
->
GetDataTypes
();
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
auto
lod_levels
=
reader
->
GetLoDLevels
();
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetDataType
(
dtypes
[
i
]);
out
.
SetDataType
(
dtypes
[
i
]);
out
.
SetLoDLevel
(
lod_levels
[
i
]);
}
}
}
}
};
};
...
...
paddle/fluid/operators/sequence_slice_op.h
浏览文件 @
d94920ce
...
@@ -75,11 +75,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
...
@@ -75,11 +75,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
}
}
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
PADDLE_ENFORCE_L
T
(
0
,
offset_data
[
i
],
PADDLE_ENFORCE_L
E
(
0
,
offset_data
[
i
],
"The offset[%d] must greater than zero."
,
i
);
"The offset[%d] must greater than zero."
,
i
);
PADDLE_ENFORCE_LT
(
0
,
length_data
[
i
],
PADDLE_ENFORCE_LT
(
0
,
length_data
[
i
],
"The length[%d] must greater than zero."
,
i
);
"The length[%d] must greater than zero."
,
i
);
PADDLE_ENFORCE_L
T
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
],
PADDLE_ENFORCE_L
E
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
],
lod
[
0
][
i
+
1
],
"The target tensor's length overflow."
);
lod
[
0
][
i
+
1
],
"The target tensor's length overflow."
);
}
}
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
d94920ce
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#
define EIGEN_USE_GPU
#
include <algorithm>
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
...
@@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate,
...
@@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate,
}
}
}
}
template
<
typename
T
,
int
block_size
>
template
<
typename
T
>
__global__
void
SparseSGDFunctorKernel
(
const
T
*
selected_rows
,
__global__
void
SparseSGDFunctorKernel
(
const
T
*
selected_rows
,
const
int64_t
*
rows
,
const
int64_t
*
rows
,
const
T
*
learning_rate
,
T
*
tensor_out
,
const
T
*
learning_rate
,
T
*
tensor_out
,
int64_t
row_numel
)
{
int64_t
row_numel
,
int64_t
limit
)
{
const
int
ty
=
blockIdx
.
y
;
for
(
int64_t
i
=
blockIdx
.
x
;
i
<
limit
;
i
+=
gridDim
.
x
)
{
int
tid
=
threadIdx
.
x
;
const
T
*
selected_rows_ptr
=
selected_rows
+
i
*
row_numel
;
T
*
tensor_out_ptr
=
tensor_out
+
rows
[
i
]
*
row_numel
;
selected_rows
+=
ty
*
row_numel
;
for
(
int64_t
index
=
threadIdx
.
x
;
index
<
row_numel
;
index
+=
blockDim
.
x
)
{
tensor_out
+=
rows
[
ty
]
*
row_numel
;
// Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error.
for
(
int
index
=
tid
;
index
<
row_numel
;
index
+=
block_size
)
{
paddle
::
platform
::
CudaAtomicAdd
(
// Since index in rows of SelectedRows can be duplicate, we have to use
tensor_out_ptr
+
index
,
// Atomic Operation to avoid concurrent write error.
-
1.0
*
learning_rate
[
0
]
*
selected_rows_ptr
[
index
]);
paddle
::
platform
::
CudaAtomicAdd
(
}
tensor_out
+
index
,
-
1.0
*
learning_rate
[
0
]
*
selected_rows
[
index
]);
}
}
}
}
}
// namespace
}
// namespace
...
@@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
const
int
block_size
=
256
;
const
int
kThreadsPerBlock
=
256
;
dim3
threads
(
block_size
,
1
);
int
thread_x
=
kThreadsPerBlock
;
dim3
grid
(
1
,
in_rows
.
size
());
int
max_threads
=
ctx
.
cuda_device_context
().
GetMaxPhysicalThreadCount
();
SparseSGDFunctorKernel
<
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
T
,
256
><<<
grid
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
in_data
,
in_rows
.
CUDAData
(
ctx
.
GetPlace
()),
learning_rate
->
data
<
T
>
(),
in_data
,
in_rows
.
CUDAData
(
ctx
.
GetPlace
()),
learning_rate
->
data
<
T
>
(),
out_data
,
in_row_numel
);
out_data
,
in_row_numel
,
in_rows
.
size
()
);
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
...
...
paddle/fluid/operators/shrink_rnn_memory_op.cc
浏览文件 @
d94920ce
...
@@ -52,16 +52,26 @@ class ShrinkRNNMemoryOp : public ArrayOp {
...
@@ -52,16 +52,26 @@ class ShrinkRNNMemoryOp : public ArrayOp {
size_t
height
=
dst_num_rows
;
size_t
height
=
dst_num_rows
;
// do shrink for the top level LoD
// do shrink for the top level LoD
if
(
x_tensor
.
lod
().
size
()
>
0
&&
if
(
x_tensor
.
lod
().
size
()
>
0
&&
x_tensor
.
lod
()[
0
].
size
()
>
static_cast
<
size_t
>
(
dst_num_rows
))
{
x_tensor
.
lod
()[
0
].
size
()
>
static_cast
<
size_t
>
(
dst_num_rows
))
{
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
if
(
x_tensor
.
lod
().
size
()
>
1
)
{
// MultiLevel LoD
dst_num_rows
,
0
);
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
height
=
lod_offset
.
second
.
second
;
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
auto
out_lod
=
out_tensor
.
mutable_lod
();
height
=
lod_offset
.
second
.
second
;
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
}
else
{
// Shrink LoD
auto
lod_item
=
x_tensor
.
lod
()[
0
];
lod_item
.
resize
(
dst_num_rows
+
1
);
out_tensor
.
set_lod
({
lod_item
});
const
auto
&
const_lod_item
=
lod_item
;
height
=
const_lod_item
.
back
();
}
}
}
if
(
dst_num_rows
!=
0
)
{
if
(
height
!=
0
)
{
out_tensor
.
mutable_data
(
place
,
x_tensor
.
type
());
out_tensor
.
mutable_data
(
place
,
x_tensor
.
type
());
auto
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
auto
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
framework
::
TensorCopy
(
x_tensor
.
Slice
(
0
,
height
),
place
,
*
dev_ctx
,
framework
::
TensorCopy
(
x_tensor
.
Slice
(
0
,
height
),
place
,
*
dev_ctx
,
...
@@ -134,8 +144,11 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
...
@@ -134,8 +144,11 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
}
else
{
}
else
{
auto
&
dout_tensor
=
dout_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
dout_tensor
=
dout_var
->
Get
<
framework
::
LoDTensor
>
();
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
if
(
height
!=
0
)
{
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
}
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
dx_tensor
.
dims
()[
0
]));
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
dx_tensor
.
dims
()[
0
]));
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
d94920ce
...
@@ -36,7 +36,7 @@ namespace operators {
...
@@ -36,7 +36,7 @@ namespace operators {
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
TRT_DT
=
nvinfer1
::
DataType
;
using
TRT_DT
=
nvinfer1
::
DataType
;
namespace
{
namespace
{
// NOLINT
TRT_DT
FluidDataType2TRT
(
FluidDT
type
)
{
TRT_DT
FluidDataType2TRT
(
FluidDT
type
)
{
switch
(
type
)
{
switch
(
type
)
{
...
...
paddle/fluid/operators/top_k_op.cc
浏览文件 @
d94920ce
...
@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
...
@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
"Output(Indices) of TopkOp should not be null."
);
"Output(Indices) of TopkOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
input_dims
.
size
(),
2
,
"Rank of TopK op's input must be 2."
);
const
int
k
=
static_cast
<
int
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"k"
));
const
int
k
=
static_cast
<
int
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"k"
));
PADDLE_ENFORCE_GE
(
k
,
1
,
"k must >= 1"
);
PADDLE_ENFORCE_GE
(
k
,
1
,
"k must >= 1"
);
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
d94920ce
...
@@ -201,6 +201,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
...
@@ -201,6 +201,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
compute_capability
=
GetCUDAComputeCapability
(
place_
.
device
);
compute_capability
=
GetCUDAComputeCapability
(
place_
.
device
);
multi_process
=
GetCUDAMultiProcessors
(
place_
.
device
);
multi_process
=
GetCUDAMultiProcessors
(
place_
.
device
);
max_threads_per_mp
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
max_threads_per_mp
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
grid_max_dims_
=
GpuMaxGridDim
(
place_
.
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
stream_
,
place
);
eigen_stream_
->
Reinitialize
(
&
stream_
,
place
);
...
@@ -239,6 +240,10 @@ int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
...
@@ -239,6 +240,10 @@ int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
return
multi_process
*
max_threads_per_mp
;
return
multi_process
*
max_threads_per_mp
;
}
}
std
::
tuple
<
int
,
int
,
int
>
CUDADeviceContext
::
GetMaxGridDims
()
const
{
return
grid_max_dims_
;
}
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
return
eigen_device_
.
get
();
return
eigen_device_
.
get
();
}
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
d94920ce
...
@@ -13,6 +13,7 @@ limitations under the License. */
...
@@ -13,6 +13,7 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include <string>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
...
@@ -91,6 +92,8 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -91,6 +92,8 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Return the max physical thread count in the device context */
/*! \brief Return the max physical thread count in the device context */
int
GetMaxPhysicalThreadCount
()
const
;
int
GetMaxPhysicalThreadCount
()
const
;
std
::
tuple
<
int
,
int
,
int
>
GetMaxGridDims
()
const
;
/*! \brief Return eigen device in the device context. */
/*! \brief Return eigen device in the device context. */
Eigen
::
GpuDevice
*
eigen_device
()
const
;
Eigen
::
GpuDevice
*
eigen_device
()
const
;
...
@@ -135,6 +138,8 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -135,6 +138,8 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t
stream_
;
cudaStream_t
stream_
;
cublasHandle_t
cublas_handle_
;
cublasHandle_t
cublas_handle_
;
std
::
tuple
<
int
,
int
,
int
>
grid_max_dims_
;
int
compute_capability
;
int
compute_capability
;
int
multi_process
;
int
multi_process
;
int
max_threads_per_mp
;
int
max_threads_per_mp
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
d94920ce
...
@@ -21,6 +21,7 @@ limitations under the License. */
...
@@ -21,6 +21,7 @@ limitations under the License. */
#if defined(_WIN32)
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#endif
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -47,7 +48,7 @@ limitations under the License. */
...
@@ -47,7 +48,7 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#include "paddle/fluid/platform/dynload/curand.h"
#if !defined(__APPLE__)
and
!defined(_WIN32)
#if !defined(__APPLE__)
&&
!defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#endif // __APPLE__
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
#endif // PADDLE_WITH_CUDA
...
@@ -216,7 +217,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
...
@@ -216,7 +217,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
#endif
#endif
}
}
#if !defined(__APPLE__)
and
!defined(_WIN32)
#if !defined(__APPLE__)
&&
!defined(_WIN32)
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
ncclResult_t
stat
,
const
Args
&
...
args
)
{
ncclResult_t
stat
,
const
Args
&
...
args
)
{
...
@@ -260,14 +261,8 @@ inline void throw_on_error(T e) {
...
@@ -260,14 +261,8 @@ inline void throw_on_error(T e) {
} \
} \
} while (false)
} while (false)
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
#else
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
;
#endif // REPLACE_ENFORCE_GLOG
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
#else // !_WIN32
...
@@ -281,6 +276,12 @@ inline void throw_on_error(T e) {
...
@@ -281,6 +276,12 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE(x, ...) x
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
#endif // !_WIN32
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
/*
/*
* Some enforce helpers here, usage:
* Some enforce helpers here, usage:
* int a = 1;
* int a = 1;
...
@@ -294,7 +295,7 @@ inline void throw_on_error(T e) {
...
@@ -294,7 +295,7 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example:
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
*/
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
...
@@ -307,6 +308,7 @@ inline void throw_on_error(T e) {
...
@@ -307,6 +308,7 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
if (UNLIKELY(nullptr == (__VAL))) { \
...
@@ -326,6 +328,27 @@ inline void throw_on_error(T e) {
...
@@ -326,6 +328,27 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
} \
} while (0)
} while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \
} \
} while (0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while (0)
#endif // !_WIN32
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/for_range.h
浏览文件 @
d94920ce
...
@@ -48,35 +48,54 @@ __global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
...
@@ -48,35 +48,54 @@ __global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
}
}
template
<
typename
Function
>
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
in
t
limit
)
{
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
size_
t
limit
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
idx
<
limit
)
{
if
(
idx
<
limit
)
{
func
(
idx
);
func
(
idx
);
}
}
}
}
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOpGridLarge
(
Function
func
,
size_t
limit
,
int
grid_dim
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
idx
<
limit
)
{
func
(
idx
);
idx
+=
grid_dim
;
}
}
template
<
>
template
<
>
struct
ForRange
<
CUDADeviceContext
>
{
struct
ForRange
<
CUDADeviceContext
>
{
ForRange
(
const
CUDADeviceContext
&
dev_ctx
,
size_t
limit
)
ForRange
(
const
CUDADeviceContext
&
dev_ctx
,
size_t
limit
)
:
dev_ctx_
(
dev_ctx
),
limit_
(
static_cast
<
int
>
(
limit
)
)
{}
:
dev_ctx_
(
dev_ctx
),
limit_
(
limit
)
{}
template
<
typename
Function
>
template
<
typename
Function
>
inline
void
operator
()(
Function
func
)
const
{
inline
void
operator
()(
Function
func
)
const
{
constexpr
int
num_threads
=
1024
;
constexpr
int
num_threads
=
1024
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
int
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
size_t
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
if
(
grid_size
==
1
)
{
int
max_grid_dim
=
std
::
get
<
0
>
(
dev_ctx_
.
GetMaxGridDims
());
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
if
(
grid_size
<
max_grid_dim
)
{
int
grid_size_int
=
static_cast
<
int
>
(
grid_size
);
if
(
grid_size
==
1
)
{
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
}
else
{
ForRangeElemwiseOp
<<<
grid_size_int
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
);
}
}
else
{
}
else
{
ForRangeElemwiseOp
<<<
grid_size
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
ForRangeElemwiseOpGridLarge
<<<
max_grid_dim
,
block_size
,
0
,
func
,
limit_
);
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
,
max_grid_dim
);
}
}
}
}
const
CUDADeviceContext
&
dev_ctx_
;
const
CUDADeviceContext
&
dev_ctx_
;
in
t
limit_
;
size_
t
limit_
;
};
};
#endif
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
d94920ce
...
@@ -152,5 +152,22 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
...
@@ -152,5 +152,22 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
PADDLE_ENFORCE
(
cudaMemsetAsync
(
dst
,
value
,
count
,
stream
),
PADDLE_ENFORCE
(
cudaMemsetAsync
(
dst
,
value
,
count
,
stream
),
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"
);
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"
);
}
}
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
)
{
std
::
tuple
<
int
,
int
,
int
>
result
;
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
0
>
(
result
),
cudaDevAttrMaxBlockDimX
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
1
>
(
result
),
cudaDevAttrMaxBlockDimY
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
2
>
(
result
),
cudaDevAttrMaxBlockDimZ
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
return
result
;
}
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/gpu_info.h
浏览文件 @
d94920ce
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include <stddef.h>
#include <stddef.h>
#include <string>
#include <string>
#include <tuple>
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -72,6 +73,8 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
...
@@ -72,6 +73,8 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously
//! Set memory dst with value count size asynchronously
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
);
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
d94920ce
...
@@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) {
...
@@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/train/CMakeLists.txt
0 → 100644
浏览文件 @
d94920ce
function
(
train_test TARGET_NAME
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs ARGS
)
cmake_parse_arguments
(
train_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
set
(
arg_list
""
)
if
(
train_test_ARGS
)
foreach
(
arg
${
train_test_ARGS
}
)
list
(
APPEND arg_list
"_
${
arg
}
"
)
endforeach
()
else
()
list
(
APPEND arg_list
"_"
)
endif
()
foreach
(
arg
${
arg_list
}
)
string
(
REGEX REPLACE
"^_$"
""
arg
"
${
arg
}
"
)
cc_test
(
test_train_
${
TARGET_NAME
}${
arg
}
SRCS test_train_
${
TARGET_NAME
}
.cc
DEPS paddle_fluid_origin
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book/
${
TARGET_NAME
}${
arg
}
.train.model/
)
set_tests_properties
(
test_train_
${
TARGET_NAME
}${
arg
}
PROPERTIES DEPENDS test_
${
TARGET_NAME
}
)
endforeach
()
endfunction
(
train_test
)
if
(
WITH_TESTING
)
train_test
(
recognize_digits ARGS mlp conv
)
endif
()
paddle/fluid/train/test_train_recognize_digits.cc
0 → 100644
浏览文件 @
d94920ce
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <time.h>
#include <fstream>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the train model."
);
namespace
paddle
{
void
Train
()
{
CHECK
(
!
FLAGS_dirname
.
empty
());
framework
::
InitDevices
(
false
);
const
auto
cpu_place
=
platform
::
CPUPlace
();
framework
::
Executor
executor
(
cpu_place
);
framework
::
Scope
scope
;
auto
train_program
=
inference
::
Load
(
&
executor
,
&
scope
,
FLAGS_dirname
+
"__model_combined__.main_program"
,
FLAGS_dirname
+
"__params_combined__"
);
std
::
string
loss_name
=
""
;
for
(
auto
op_desc
:
train_program
->
Block
(
0
).
AllOps
())
{
if
(
op_desc
->
Type
()
==
"mean"
)
{
loss_name
=
op_desc
->
Output
(
"Out"
)[
0
];
break
;
}
}
PADDLE_ENFORCE_NE
(
loss_name
,
""
,
"loss not found"
);
// prepare data
auto
x_var
=
scope
.
Var
(
"img"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
64
,
1
,
28
,
28
});
auto
x_data
=
x_tensor
->
mutable_data
<
float
>
(
cpu_place
);
for
(
int
i
=
0
;
i
<
64
*
28
*
28
;
++
i
)
{
x_data
[
i
]
=
1.0
;
}
auto
y_var
=
scope
.
Var
(
"label"
);
auto
y_tensor
=
y_var
->
GetMutable
<
framework
::
LoDTensor
>
();
y_tensor
->
Resize
({
64
,
1
});
auto
y_data
=
y_tensor
->
mutable_data
<
int64_t
>
(
cpu_place
);
for
(
int
i
=
0
;
i
<
64
*
1
;
++
i
)
{
y_data
[
i
]
=
static_cast
<
int64_t
>
(
1
);
}
auto
loss_var
=
scope
.
Var
(
loss_name
);
float
first_loss
=
0.0
;
float
last_loss
=
0.0
;
for
(
int
i
=
0
;
i
<
100
;
++
i
)
{
executor
.
Run
(
*
train_program
.
get
(),
&
scope
,
0
,
false
,
true
);
if
(
i
==
0
)
{
first_loss
=
loss_var
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
}
else
if
(
i
==
99
)
{
last_loss
=
loss_var
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
}
}
EXPECT_LT
(
last_loss
,
first_loss
);
}
TEST
(
train
,
recognize_digits
)
{
Train
();
}
}
// namespace paddle
paddle/scripts/paddle_build.sh
浏览文件 @
d94920ce
...
@@ -147,6 +147,7 @@ function cmake_gen() {
...
@@ -147,6 +147,7 @@ function cmake_gen() {
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
========================================
========================================
EOF
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
# Disable UNITTEST_USE_VIRTUALENV in docker because
...
@@ -178,7 +179,8 @@ EOF
...
@@ -178,7 +179,8 @@ EOF
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
}
}
...
@@ -361,7 +363,7 @@ EOF
...
@@ -361,7 +363,7 @@ EOF
ctest
--output-on-failure
ctest
--output-on-failure
# make install should also be test when unittest
# make install should also be test when unittest
make
install
-j
`
nproc
`
make
install
-j
`
nproc
`
pip
install
/usr/local
/opt/paddle/share/wheels/
*
.whl
pip
install
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
paddle version
fi
fi
...
...
python/paddle/dataset/wmt14.py
浏览文件 @
d94920ce
...
@@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size):
...
@@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size):
]
]
for
name
in
names
:
for
name
in
names
:
for
line
in
f
.
extractfile
(
name
):
for
line
in
f
.
extractfile
(
name
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
'
\t
'
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
'
\t
'
)
if
len
(
line_split
)
!=
2
:
if
len
(
line_split
)
!=
2
:
continue
continue
src_seq
=
line_split
[
0
]
# one source sequence
src_seq
=
line_split
[
0
]
# one source sequence
...
...
python/paddle/dataset/wmt16.py
浏览文件 @
d94920ce
...
@@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang):
...
@@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang):
word_dict
=
defaultdict
(
int
)
word_dict
=
defaultdict
(
int
)
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
"wmt16/train"
):
for
line
in
f
.
extractfile
(
"wmt16/train"
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
"
\t
"
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
continue
if
len
(
line_split
)
!=
2
:
continue
sen
=
line_split
[
0
]
if
lang
==
"en"
else
line_split
[
1
]
sen
=
line_split
[
0
]
if
lang
==
"en"
else
line_split
[
1
]
for
w
in
sen
.
split
():
for
w
in
sen
.
split
():
...
@@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
...
@@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
file_name
):
for
line
in
f
.
extractfile
(
file_name
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
"
\t
"
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
if
len
(
line_split
)
!=
2
:
continue
continue
src_words
=
line_split
[
src_col
].
split
()
src_words
=
line_split
[
src_col
].
split
()
...
...
python/paddle/fluid/framework.py
浏览文件 @
d94920ce
...
@@ -18,6 +18,7 @@ import collections
...
@@ -18,6 +18,7 @@ import collections
import
contextlib
import
contextlib
import
re
import
re
import
six
import
six
import
traceback
import
numpy
as
np
import
numpy
as
np
...
@@ -34,6 +35,8 @@ except ImportError as e:
...
@@ -34,6 +35,8 @@ except ImportError as e:
except
Exception
as
e
:
except
Exception
as
e
:
raise
e
raise
e
from
.
import
unique_name
from
.
import
unique_name
import
os
PADDLE_ON_MODEL_CE
=
os
.
environ
.
get
(
'PADDLE_ON_MODEL_CE'
,
None
)
is
not
None
__all__
=
[
__all__
=
[
'Program'
,
'Program'
,
...
@@ -489,7 +492,8 @@ class OpProtoHolder(object):
...
@@ -489,7 +492,8 @@ class OpProtoHolder(object):
return
{
return
{
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
()
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
()
}
}
...
@@ -572,6 +576,11 @@ class Operator(object):
...
@@ -572,6 +576,11 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
del
op_attrs
[
role_var_name
]
if
not
PADDLE_ON_MODEL_CE
:
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
if
len
(
self
.
desc
.
type
())
!=
0
:
return
return
if
type
is
None
:
if
type
is
None
:
...
...
python/paddle/fluid/io.py
浏览文件 @
d94920ce
...
@@ -600,7 +600,7 @@ def save_inference_model(dirname,
...
@@ -600,7 +600,7 @@ def save_inference_model(dirname,
"""
"""
if
isinstance
(
feeded_var_names
,
six
.
string_types
):
if
isinstance
(
feeded_var_names
,
six
.
string_types
):
feeded_var_names
=
[
feeded_var_names
]
feeded_var_names
=
[
feeded_var_names
]
el
se
:
el
if
export_for_deployment
:
if
len
(
feeded_var_names
)
>
0
:
if
len
(
feeded_var_names
)
>
0
:
# TODO(paddle-dev): polish these code blocks
# TODO(paddle-dev): polish these code blocks
if
not
(
bool
(
feeded_var_names
)
and
all
(
if
not
(
bool
(
feeded_var_names
)
and
all
(
...
@@ -610,61 +610,60 @@ def save_inference_model(dirname,
...
@@ -610,61 +610,60 @@ def save_inference_model(dirname,
if
isinstance
(
target_vars
,
Variable
):
if
isinstance
(
target_vars
,
Variable
):
target_vars
=
[
target_vars
]
target_vars
=
[
target_vars
]
el
se
:
el
if
export_for_deployment
:
if
not
(
bool
(
target_vars
)
and
all
(
if
not
(
bool
(
target_vars
)
and
all
(
isinstance
(
var
,
Variable
)
for
var
in
target_vars
)):
isinstance
(
var
,
Variable
)
for
var
in
target_vars
)):
raise
ValueError
(
"'target_vars' should be a list of Variable."
)
raise
ValueError
(
"'target_vars' should be a list of Variable."
)
if
main_program
is
None
:
if
main_program
is
None
:
main_program
=
default_main_program
()
main_program
=
default_main_program
()
copy_program
=
main_program
.
clone
()
# if there is lookup table, the trainer 0 will notify all pserver to save.
if
main_program
.
_is_distributed
and
main_program
.
_is_chief
and
main_program
.
_distributed_lookup_table
:
lookup_table_filename
=
os
.
path
.
join
(
dirname
,
"__lookup_table__"
)
_save_lookup_tables_by_notify
(
executor
,
lookup_table_filename
,
main_program
.
_distributed_lookup_table
,
main_program
.
_endpoints
)
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
os
.
makedirs
(
dirname
)
os
.
makedirs
(
dirname
)
if
model_filename
is
not
None
:
model_basename
=
os
.
path
.
basename
(
model_filename
)
else
:
model_basename
=
"__model__"
model_basename
=
os
.
path
.
join
(
dirname
,
model_basename
)
# When export_for_deployment is true, we modify the program online so that
# When export_for_deployment is true, we modify the program online so that
# it can only be loaded for inference directly. If it's false, the whole
# it can only be loaded for inference directly. If it's false, the whole
# original program and related meta are saved so that future usage can be
# original program and related meta are saved so that future usage can be
# more flexible.
# more flexible.
if
export_for_deployment
:
if
export_for_deployment
:
global_block
=
copy_program
.
global_block
()
main_program
=
main_program
.
clone
()
global_block
=
main_program
.
global_block
()
for
i
,
op
in
enumerate
(
global_block
.
ops
):
for
i
,
op
in
enumerate
(
global_block
.
ops
):
op
.
desc
.
set_is_target
(
False
)
op
.
desc
.
set_is_target
(
False
)
if
op
.
type
==
"feed"
or
op
.
type
==
"fetch"
:
if
op
.
type
==
"feed"
or
op
.
type
==
"fetch"
:
global_block
.
_remove_op
(
i
)
global_block
.
_remove_op
(
i
)
copy
_program
.
desc
.
flush
()
main
_program
.
desc
.
flush
()
pruned_program
=
copy
_program
.
_prune
(
targets
=
target_vars
)
main_program
=
main
_program
.
_prune
(
targets
=
target_vars
)
saved_program
=
pruned
_program
.
_inference_optimize
(
prune_read_op
=
True
)
main_program
=
main
_program
.
_inference_optimize
(
prune_read_op
=
True
)
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
prepend_feed_ops
(
saved_program
,
feeded_var_names
)
prepend_feed_ops
(
main_program
,
feeded_var_names
)
append_fetch_ops
(
saved_program
,
fetch_var_names
)
append_fetch_ops
(
main_program
,
fetch_var_names
)
with
open
(
model_basename
,
"wb"
)
as
f
:
f
.
write
(
main_program
.
desc
.
serialize_to_string
())
else
:
else
:
# TODO(panyx0718): Save more information so that it can also be used
# TODO(panyx0718): Save more information so that it can also be used
# for training and more flexible post-processing.
# for training and more flexible post-processing.
saved_program
=
copy_program
with
open
(
model_basename
+
".main_program"
,
"wb"
)
as
f
:
f
.
write
(
main_program
.
desc
.
serialize_to_string
())
if
model_filename
is
not
None
:
model_filename
=
os
.
path
.
basename
(
model_filename
)
else
:
model_filename
=
"__model__"
model_filename
=
os
.
path
.
join
(
dirname
,
model_filename
)
if
params_filename
is
not
None
:
if
params_filename
is
not
None
:
params_filename
=
os
.
path
.
basename
(
params_filename
)
params_filename
=
os
.
path
.
basename
(
params_filename
)
save_persistables
(
executor
,
dirname
,
main_program
,
params_filename
)
with
open
(
model_filename
,
"wb"
)
as
f
:
f
.
write
(
saved_program
.
desc
.
serialize_to_string
())
save_persistables
(
executor
,
dirname
,
saved_program
,
params_filename
)
# if there is lookup table, the trainer 0 will notify all pserver to save.
if
main_program
.
_is_distributed
and
main_program
.
_is_chief
and
main_program
.
_distributed_lookup_table
:
lookup_table_filename
=
os
.
path
.
join
(
dirname
,
"__lookup_table__"
)
_save_lookup_tables_by_notify
(
executor
,
lookup_table_filename
,
main_program
.
_distributed_lookup_table
,
main_program
.
_endpoints
)
def
load_inference_model
(
dirname
,
def
load_inference_model
(
dirname
,
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
d94920ce
...
@@ -311,6 +311,7 @@ def _copy_reader_var_(block, var):
...
@@ -311,6 +311,7 @@ def _copy_reader_var_(block, var):
new_var
=
block
.
create_var
(
name
=
var
.
name
,
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
=
block
.
create_var
(
name
=
var
.
name
,
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
desc
.
set_lod_levels
(
var
.
desc
.
lod_levels
())
new_var
.
persistable
=
True
new_var
.
persistable
=
True
return
new_var
return
new_var
...
@@ -632,6 +633,7 @@ def py_reader(capacity,
...
@@ -632,6 +633,7 @@ def py_reader(capacity,
})
})
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
desc
.
set_lod_levels
(
lod_levels
)
startup_var
.
persistable
=
True
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
d94920ce
...
@@ -6471,12 +6471,14 @@ def _elementwise_op(helper):
...
@@ -6471,12 +6471,14 @@ def _elementwise_op(helper):
assert
y
is
not
None
,
'y cannot be None in {}'
.
format
(
op_type
)
assert
y
is
not
None
,
'y cannot be None in {}'
.
format
(
op_type
)
axis
=
helper
.
kwargs
.
get
(
'axis'
,
-
1
)
axis
=
helper
.
kwargs
.
get
(
'axis'
,
-
1
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
out
=
helper
.
kwargs
.
get
(
'out'
,
None
)
if
name
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
else
:
if
name
is
None
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
helper
.
append_op
(
type
=
op_type
,
type
=
op_type
,
...
@@ -6489,7 +6491,13 @@ def _elementwise_op(helper):
...
@@ -6489,7 +6491,13 @@ def _elementwise_op(helper):
@
templatedoc
()
@
templatedoc
()
def
scale
(
x
,
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
act
=
None
,
name
=
None
):
def
scale
(
x
,
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
out
=
None
,
act
=
None
,
name
=
None
):
"""
"""
${comment}
${comment}
...
@@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
...
@@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
scale(${scale_type}): ${scale_comment}
scale(${scale_type}): ${scale_comment}
bias(${bias_type}): ${bias_comment}
bias(${bias_type}): ${bias_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
out(Tensor): Output tensor.
act(basestring|None): Activation applied to the output.
act(basestring|None): Activation applied to the output.
name(basestring|None): Name of the output.
name(basestring|None): Name of the output.
...
@@ -6506,11 +6515,12 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
...
@@ -6506,11 +6515,12 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
"""
helper
=
LayerHelper
(
'scale'
,
**
locals
())
helper
=
LayerHelper
(
'scale'
,
**
locals
())
if
name
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
if
name
is
None
:
else
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_variable
(
else
:
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'scale'
,
type
=
'scale'
,
...
@@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
...
@@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
return
helper
.
append_activation
(
out
)
return
helper
.
append_activation
(
out
)
def
elementwise_add
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_add
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_add'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_add'
,
**
locals
()))
def
elementwise_div
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_div
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_div'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_div'
,
**
locals
()))
def
elementwise_sub
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_sub
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_sub'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_sub'
,
**
locals
()))
def
elementwise_mul
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_mul
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_mul'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_mul'
,
**
locals
()))
def
elementwise_max
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_max
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_max'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_max'
,
**
locals
()))
def
elementwise_min
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_min
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_min'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_min'
,
**
locals
()))
def
elementwise_pow
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_pow
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_pow'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_pow'
,
**
locals
()))
...
@@ -6560,6 +6612,7 @@ for func in [
...
@@ -6560,6 +6612,7 @@ for func in [
func
.
__doc__
=
_generate_doc_string_
(
func
.
__doc__
=
_generate_doc_string_
(
op_proto
,
op_proto
,
additional_args_lines
=
[
additional_args_lines
=
[
"out (Tensor): The output tensor of elementwise op."
,
"act (basestring|None): Activation applied to the output."
,
"act (basestring|None): Activation applied to the output."
,
"name (basestring|None): Name of the output."
"name (basestring|None): Name of the output."
])
])
python/paddle/fluid/parallel_executor.py
浏览文件 @
d94920ce
...
@@ -74,28 +74,7 @@ class ParallelExecutor(object):
...
@@ -74,28 +74,7 @@ class ParallelExecutor(object):
build_strategy
=
None
,
build_strategy
=
None
,
num_trainers
=
1
,
num_trainers
=
1
,
trainer_id
=
0
,
trainer_id
=
0
,
scope
=
None
,
scope
=
None
):
**
kwargs
):
if
len
(
kwargs
)
!=
0
:
err_msg
=
""
for
key
in
kwargs
:
if
key
in
dir
(
ExecutionStrategy
):
err_msg
+=
\
"Setting {0} by constructor is deprecated. Use "
\
"strategy=ExecutionStrategy(); strategy.{0}=xxx; "
\
"pe=ParallelExecutor(exec_strategy=strategy) "
\
"instead.
\n
"
.
format
(
key
)
elif
key
in
dir
(
BuildStrategy
):
err_msg
+=
\
"Setting {0} by constructor is deprecated. Use "
\
"strategy=BuildStrategy(); See help("
\
"paddle.fluid.ParallelExecutor.BuildStrategy)
\n
"
.
format
(
key
)
else
:
err_msg
+=
"Setting {0} by constructor is deprecated. Use strategy.
\n
"
.
format
(
key
)
raise
ValueError
(
err_msg
)
self
.
_places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
if
use_cuda
:
...
...
python/paddle/fluid/param_attr.py
浏览文件 @
d94920ce
...
@@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr):
...
@@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr):
Args:
Args:
dim(list): The parameter's name. Default None.
dim(list): The parameter's name. Default None.
kwargs: Any field in ParamAttr. Default None.
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average.
Default False.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr):
...
@@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr):
# these paramters for inference.
# these paramters for inference.
params_with_weight_norm
=
[]
params_with_weight_norm
=
[]
def
__init__
(
self
,
dim
=
None
,
**
kwargs
):
def
__init__
(
self
,
super
(
WeightNormParamAttr
,
self
).
__init__
(
**
kwargs
)
dim
=
None
,
name
=
None
,
initializer
=
None
,
learning_rate
=
1.0
,
regularizer
=
None
,
trainable
=
True
,
gradient_clip
=
None
,
do_model_average
=
False
):
super
(
WeightNormParamAttr
,
self
).
__init__
(
name
=
name
,
initializer
=
initializer
,
learning_rate
=
learning_rate
,
regularizer
=
regularizer
,
trainable
=
trainable
,
gradient_clip
=
gradient_clip
,
do_model_average
=
do_model_average
)
self
.
dim
=
dim
self
.
dim
=
dim
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
d94920ce
...
@@ -67,6 +67,7 @@ def train(nn_type,
...
@@ -67,6 +67,7 @@ def train(nn_type,
use_cuda
,
use_cuda
,
parallel
,
parallel
,
save_dirname
=
None
,
save_dirname
=
None
,
save_full_dirname
=
None
,
model_filename
=
None
,
model_filename
=
None
,
params_filename
=
None
,
params_filename
=
None
,
is_local
=
True
):
is_local
=
True
):
...
@@ -143,6 +144,13 @@ def train(nn_type,
...
@@ -143,6 +144,13 @@ def train(nn_type,
exe
,
exe
,
model_filename
=
model_filename
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
params_filename
=
params_filename
)
if
save_full_dirname
is
not
None
:
fluid
.
io
.
save_inference_model
(
save_full_dirname
,
[],
[],
exe
,
model_filename
=
model_filename
,
params_filename
=
params_filename
,
export_for_deployment
=
False
)
return
return
else
:
else
:
print
(
print
(
...
@@ -214,10 +222,12 @@ def infer(use_cuda,
...
@@ -214,10 +222,12 @@ def infer(use_cuda,
def
main
(
use_cuda
,
parallel
,
nn_type
,
combine
):
def
main
(
use_cuda
,
parallel
,
nn_type
,
combine
):
save_dirname
=
None
save_dirname
=
None
save_full_dirname
=
None
model_filename
=
None
model_filename
=
None
params_filename
=
None
params_filename
=
None
if
not
use_cuda
and
not
parallel
:
if
not
use_cuda
and
not
parallel
:
save_dirname
=
"recognize_digits_"
+
nn_type
+
".inference.model"
save_dirname
=
"recognize_digits_"
+
nn_type
+
".inference.model"
save_full_dirname
=
"recognize_digits_"
+
nn_type
+
".train.model"
if
combine
==
True
:
if
combine
==
True
:
model_filename
=
"__model_combined__"
model_filename
=
"__model_combined__"
params_filename
=
"__params_combined__"
params_filename
=
"__params_combined__"
...
@@ -228,6 +238,7 @@ def main(use_cuda, parallel, nn_type, combine):
...
@@ -228,6 +238,7 @@ def main(use_cuda, parallel, nn_type, combine):
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
parallel
=
parallel
,
parallel
=
parallel
,
save_dirname
=
save_dirname
,
save_dirname
=
save_dirname
,
save_full_dirname
=
save_full_dirname
,
model_filename
=
model_filename
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
params_filename
=
params_filename
)
infer
(
infer
(
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
d94920ce
...
@@ -661,22 +661,25 @@ class TestLoadSliceVar(TranspilerTest):
...
@@ -661,22 +661,25 @@ class TestLoadSliceVar(TranspilerTest):
class
TestNCCL2Transpile
(
TranspilerTest
):
class
TestNCCL2Transpile
(
TranspilerTest
):
def
test_nccl2_transpile
(
self
):
def
test_nccl2_transpile
(
self
):
main
=
fluid
.
Program
()
if
fluid
.
core
.
is_compiled_with_cuda
():
#test nccl2 only with cuda
startup
=
fluid
.
Program
()
main
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
startup
=
fluid
.
Program
()
self
.
net_conf
()
with
fluid
.
program_guard
(
main
,
startup
):
self
.
net_conf
()
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
mode
=
"nccl2"
config
=
fluid
.
DistributeTranspilerConfig
()
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
config
.
mode
=
"nccl2"
t
.
transpile
(
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
0
,
t
.
transpile
(
trainers
=
"127.0.0.1:6174,127.0.0.1:6175"
,
0
,
current_endpoint
=
"127.0.0.1:6174"
,
trainers
=
"127.0.0.1:6174,127.0.0.1:6175"
,
startup_program
=
startup
)
current_endpoint
=
"127.0.0.1:6174"
,
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
startup_program
=
startup
)
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
else
:
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
d94920ce
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
(
mul_op
.
attr_names
),
set
([
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
"op_namescope"
,
"op_callstack"
]))
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录