Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
d94920ce
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d94920ce
编写于
9月 26, 2018
作者:
D
Dang Qingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into quantize_transpiler_update
上级
44791f42
01fda934
变更
46
隐藏空白更改
内联
并排
Showing
46 changed file
with
839 addition
and
315 deletion
+839
-315
cmake/configure.cmake
cmake/configure.cmake
+19
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+10
-10
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+2
-0
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+8
-26
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
...e/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
+7
-10
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+1
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+1
-2
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+11
-6
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+3
-1
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+1
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+44
-13
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+15
-51
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+11
-8
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+5
-0
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+199
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-9
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+19
-18
paddle/fluid/operators/pool_mkldnn_op.cc
paddle/fluid/operators/pool_mkldnn_op.cc
+35
-6
paddle/fluid/operators/read_op.cc
paddle/fluid/operators/read_op.cc
+2
-0
paddle/fluid/operators/sequence_slice_op.h
paddle/fluid/operators/sequence_slice_op.h
+2
-2
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+21
-20
paddle/fluid/operators/shrink_rnn_memory_op.cc
paddle/fluid/operators/shrink_rnn_memory_op.cc
+21
-8
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+1
-1
paddle/fluid/operators/top_k_op.cc
paddle/fluid/operators/top_k_op.cc
+2
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+5
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+5
-0
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+33
-10
paddle/fluid/platform/for_range.h
paddle/fluid/platform/for_range.h
+29
-10
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+17
-0
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+3
-0
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+3
-0
paddle/fluid/train/CMakeLists.txt
paddle/fluid/train/CMakeLists.txt
+30
-0
paddle/fluid/train/test_train_recognize_digits.cc
paddle/fluid/train/test_train_recognize_digits.cc
+89
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+4
-2
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+2
-1
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+4
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+10
-1
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+27
-28
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+2
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+72
-19
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+1
-22
python/paddle/fluid/param_attr.py
python/paddle/fluid/param_attr.py
+28
-3
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+11
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+19
-16
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
未找到文件。
cmake/configure.cmake
浏览文件 @
d94920ce
...
...
@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING)
endif
()
if
(
WIN32
)
# windows
stupid compile
option for all targets.
# windows
header
option for all targets.
add_definitions
(
-D_XKEYCHECK_H
)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1
)
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1
)
SET
(
CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1
)
SET
(
CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1
)
SET
(
CMAKE_C_RESPONSE_FILE_LINK_FLAG
"@"
)
SET
(
CMAKE_CXX_RESPONSE_FILE_LINK_FLAG
"@"
)
# Specify the program to use when building static libraries
SET
(
CMAKE_C_CREATE_STATIC_LIBRARY
"<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>"
)
SET
(
CMAKE_CXX_CREATE_STATIC_LIBRARY
"<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>"
)
# set defination for the dll export
if
(
NOT MSVC
)
message
(
FATAL
"Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA."
)
endif
(
NOT MSVC
)
endif
(
WIN32
)
if
(
NOT WITH_GOLANG
)
...
...
paddle/fluid/API.spec
浏览文件 @
d94920ce
...
...
@@ -41,7 +41,7 @@ paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id',
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=
'kwargs'
, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=
None
, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None
...
...
@@ -162,14 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, Tru
e, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(
-1, False, None, None))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, Non
e, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None,
-1, False, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
...
@@ -378,7 +378,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'
], varargs=None, keywords='kwargs', defaults=(None,
))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'
, 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False
))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
d94920ce
...
...
@@ -13,3 +13,5 @@ if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last.
add_subdirectory
(
inference
)
endif
()
add_subdirectory
(
train
)
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
浏览文件 @
d94920ce
...
...
@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
"conv_relu_mkldnn_fuse"
,
graph
.
get
());
std
::
unordered_set
<
Node
*>
nodes2delete
;
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
"conv_relu_mkldnn_fuse/conv_input"
)
...
...
@@ -42,36 +40,20 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvReLU fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
conv_relu_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_bias
,
conv_bias
,
conv_relu_pattern
);
// Bias
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
conv_relu_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_relu_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv
,
conv
,
conv_relu_pattern
);
// CONV op
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
conv_relu_pattern
);
// Out
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
conv_relu_pattern
);
// ReLU op
// Create an ConvReLU Node.
OpDesc
desc
;
std
::
string
conv_relu_i_in
=
subgraph
.
at
(
conv_input
)
->
Name
();
std
::
string
conv_relu_w_in
=
conv_weight
->
Name
();
std
::
string
conv_relu_b_in
=
conv_bias
->
Name
();
std
::
string
conv_relu_out
=
relu_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
conv_relu_i_in
}));
desc
.
SetInput
(
"Filter"
,
std
::
vector
<
std
::
string
>
({
conv_relu_w_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
conv_relu_b_in
}));
desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
conv_relu_out
}));
desc
.
SetType
(
"conv2d"
);
for
(
auto
&
attr
:
conv
->
Op
()
->
GetAttrMap
())
{
desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
desc
.
SetAttr
(
"fuse_relu"
,
true
);
auto
conv_relu_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv
,
relu
,
conv_out
});
// Transform Conv node into ConvReLU node.
OpDesc
*
desc
=
conv
->
Op
();
desc
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
relu_out
->
Name
()}));
desc
->
SetAttr
(
"fuse_relu"
,
true
);
GraphSafeRemoveNodes
(
graph
.
get
(),
{
relu
,
conv_out
});
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
IR_NODE_LINK_TO
(
subgraph
.
at
(
conv_input
),
conv_relu_node
);
IR_NODE_LINK_TO
(
conv_weight
,
conv_relu_node
);
IR_NODE_LINK_TO
(
conv_bias
,
conv_relu_node
);
IR_NODE_LINK_TO
(
conv_relu_node
,
relu_out
);
IR_NODE_LINK_TO
(
conv
,
relu_out
);
found_conv_relu_count
++
;
};
...
...
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
浏览文件 @
d94920ce
...
...
@@ -85,16 +85,13 @@ TEST(ConvReLUFusePass, basic) {
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
()
->
Type
()
==
"conv2d"
)
{
if
(
node
->
Op
()
->
HasAttr
(
"use_mkldnn"
))
{
bool
use_mkldnn
=
boost
::
get
<
bool
>
(
node
->
Op
()
->
GetAttr
(
"use_mkldnn"
));
if
(
use_mkldnn
)
{
if
(
node
->
Op
()
->
HasAttr
(
"fuse_relu"
))
{
bool
fuse_relu
=
boost
::
get
<
bool
>
(
node
->
Op
()
->
GetAttr
(
"fuse_relu"
));
if
(
fuse_relu
)
{
++
conv_relu_count
;
}
}
}
auto
*
op
=
node
->
Op
();
ASSERT_TRUE
(
op
->
HasAttr
(
"use_mkldnn"
));
EXPECT_TRUE
(
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"use_mkldnn"
)));
ASSERT_TRUE
(
op
->
HasAttr
(
"fuse_relu"
));
bool
fuse_relu
=
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"fuse_relu"
));
if
(
fuse_relu
)
{
++
conv_relu_count
;
}
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
d94920ce
...
...
@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()(
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
// Bias
auto
*
conv_bias_var
=
pattern
->
NewNode
(
conv_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Bias"
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
...
...
@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()(
->
AsOutput
()
->
assert_is_op_output
(
"relu"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
,
conv_bias_var
})
.
LinksTo
({
conv_out_var
});
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
relu_op
->
LinksFrom
({
conv_out_var
}).
LinksTo
({
relu_out_var
});
return
relu_out_var
;
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
d94920ce
...
...
@@ -379,7 +379,7 @@ struct PatternBase {
// op: conv + relu
// named nodes:
// conv_input, conv_weight,
// conv_
bias, conv_
out, conv,
// conv_out, conv,
// relu_out, relu
struct
ConvReLU
:
public
PatternBase
{
ConvReLU
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
...
...
@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase {
PATTERN_DECL_NODE
(
relu
);
// declare variable node's name
PATTERN_DECL_NODE
(
conv_weight
);
PATTERN_DECL_NODE
(
conv_bias
);
PATTERN_DECL_NODE
(
conv_out
);
PATTERN_DECL_NODE
(
relu_out
);
};
...
...
paddle/fluid/framework/op_info.h
浏览文件 @
d94920ce
...
...
@@ -38,27 +38,31 @@ struct OpInfo {
OpAttrChecker
*
checker_
{
nullptr
};
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
std
::
string
op_type_
;
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
}
const
proto
::
OpProto
&
Proto
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator Proto has not been registered"
);
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator %s Proto has not been registered"
,
op_type_
);
PADDLE_ENFORCE
(
proto_
->
IsInitialized
(),
"Operator Proto must be initialized in op info"
);
"Operator %s Proto must be initialized in op info"
,
op_type_
);
return
*
proto_
;
}
const
OpCreator
&
Creator
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
creator_
,
"Operator Creator has not been registered"
);
PADDLE_ENFORCE_NOT_NULL
(
creator_
,
"Operator %s Creator has not been registered"
,
op_type_
);
return
creator_
;
}
const
GradOpMakerFN
&
GradOpMaker
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
grad_op_maker_
,
"Operator GradOpMaker has not been registered."
);
"Operator %s GradOpMaker has not been registered."
,
op_type_
);
return
grad_op_maker_
;
}
...
...
@@ -73,8 +77,9 @@ class OpInfoMap {
return
map_
.
find
(
op_type
)
!=
map_
.
end
();
}
void
Insert
(
const
std
::
string
&
type
,
const
OpInfo
&
info
)
{
void
Insert
(
const
std
::
string
&
type
,
OpInfo
info
)
{
PADDLE_ENFORCE
(
!
Has
(
type
),
"Operator %s has been registered"
,
type
);
info
.
op_type_
=
type
;
map_
.
insert
({
type
,
info
});
}
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
d94920ce
...
...
@@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
d94920ce
...
...
@@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
d94920ce
...
...
@@ -14,15 +14,17 @@ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/operator.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op
erato
r.h"
#include "paddle/fluid/framework/op
_proto_make
r.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -140,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
try
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
#endif
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
...
@@ -180,7 +211,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
}
bool
OperatorBase
::
HasOutputs
(
const
std
::
string
&
name
)
const
{
if
(
outputs_
.
find
(
name
)
!=
outputs_
.
end
(
))
{
if
(
outputs_
.
end
()
!=
outputs_
.
find
(
name
))
{
return
true
;
}
else
{
return
false
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d94920ce
...
...
@@ -76,10 +76,10 @@ bool AnalysisPredictor::Init(
}
OptimizeInferenceProgram
();
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
if
(
config_
.
_use_mkldnn
)
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
}
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
5
)
<<
"to create variables"
;
PADDLE_ENFORCE
(
scope_
.
get
());
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
d94920ce
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
template
<
typename
T
>
void
NativePaddlePredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
PaddleTensor
*
output
)
{
std
::
vector
<
int
>
shape
;
auto
dims_i
=
fetch
.
dims
();
auto
lod
=
fetch
.
lod
();
const
T
*
output_ptr
=
fetch
.
data
<
T
>
();
auto
num
=
fetch
.
numel
();
std
::
vector
<
T
>
data
;
if
(
0
==
lod
.
size
())
{
std
::
copy
(
output_ptr
,
output_ptr
+
num
,
std
::
back_inserter
(
data
));
for
(
int
j
=
0
;
j
<
dims_i
.
size
();
++
j
)
{
shape
.
push_back
(
dims_i
[
j
]);
}
}
else
{
// for batch detection
// image[0] -> output[0] shape {145, 6}
// image[1] -> output[1] shape {176, 6}
// then,
// the batch output shape {321, 6}
// the lod {{0, 145, 321}}
// so we should append output[0] to {176, 6}
size_t
max_dim
=
0
;
for
(
size_t
j
=
1
;
j
<
lod
[
0
].
size
();
j
++
)
{
max_dim
=
std
::
max
(
max_dim
,
lod
[
0
][
j
]
-
lod
[
0
][
j
-
1
]);
}
size_t
common_dim
=
lod
[
0
].
back
()
==
0
?
0
:
num
/
lod
[
0
].
back
();
if
(
max_dim
>
0
)
{
data
.
resize
((
lod
[
0
].
size
()
-
1
)
*
max_dim
*
common_dim
,
0
);
}
for
(
size_t
j
=
1
;
j
<
lod
[
0
].
size
();
j
++
)
{
size_t
start
=
lod
[
0
][
j
-
1
]
*
common_dim
;
size_t
end
=
lod
[
0
][
j
]
*
common_dim
;
if
(
end
>
start
)
{
std
::
copy
(
output_ptr
+
start
,
output_ptr
+
end
,
data
.
begin
()
+
(
j
-
1
)
*
max_dim
*
common_dim
);
}
}
shape
.
push_back
(
lod
[
0
].
size
()
-
1
);
shape
.
push_back
(
max_dim
);
for
(
int
j
=
1
;
j
<
dims_i
.
size
();
++
j
)
{
shape
.
push_back
(
dims_i
[
j
]);
}
}
output
->
shape
=
shape
;
auto
&
buffer
=
output
->
data
;
if
(
buffer
.
empty
()
||
buffer
.
length
()
<
sizeof
(
T
)
*
data
.
size
())
{
buffer
.
Resize
(
sizeof
(
T
)
*
data
.
size
());
}
std
::
memcpy
(
buffer
.
data
(),
data
.
data
(),
sizeof
(
T
)
*
data
.
size
());
// copy LoD
for
(
const
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
);
// set shape.
auto
shape
=
framework
::
vectorize
(
fetch
.
dims
());
output
->
shape
.
assign
(
shape
.
begin
(),
shape
.
end
());
// set data.
const
T
*
data
=
fetch
.
data
<
T
>
();
int
num_elems
=
inference
::
VecReduceToInt
(
shape
);
output
->
data
.
Resize
(
num_elems
*
sizeof
(
T
));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy
(
output
->
data
.
data
(),
data
,
num_elems
*
sizeof
(
T
));
// set lod
output
->
lod
.
clear
();
for
(
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
.
begin
(),
level
.
end
());
}
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
d94920ce
...
...
@@ -74,13 +74,17 @@ template <>
std
::
string
to_string
<
std
::
vector
<
std
::
vector
<
float
>>>
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
&
vec
);
template
<
typename
T
>
int
VecReduceToInt
(
const
std
::
vector
<
T
>
&
v
)
{
return
std
::
accumulate
(
v
.
begin
(),
v
.
end
(),
1
,
[](
T
a
,
T
b
)
{
return
a
*
b
;
});
}
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
// Assign buffer
int
dim
=
std
::
accumulate
(
tensor
->
shape
.
begin
(),
tensor
->
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
tensor
->
data
.
Resize
(
sizeof
(
T
)
*
dim
);
int
num_elems
=
VecReduceToInt
(
tensor
->
shape
);
tensor
->
data
.
Resize
(
sizeof
(
T
)
*
num_elems
);
int
c
=
0
;
for
(
const
auto
&
f
:
data
)
{
for
(
T
v
:
f
)
{
...
...
@@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
st
atic
st
d
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
...
...
@@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
int
dim
=
VecReduceToInt
(
tensor
.
shape
);
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
...
...
@@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
return
os
.
str
();
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms ======"
;
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d94920ce
...
...
@@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_text_classification
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
analyzer_text_classification_tester.cc
)
# seq_conv1
set
(
SEQ_CONV1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/seq_conv1"
)
download_model_and_data
(
${
SEQ_CONV1_INSTALL_DIR
}
"seq_conv1_model.tar.gz"
"seq_conv1_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_seq_conv1
${
SEQ_CONV1_INSTALL_DIR
}
analyzer_seq_conv1_tester.cc
)
# ocr
set
(
OCR_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/ocr"
)
if
(
NOT EXISTS
${
OCR_INSTALL_DIR
}
)
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
0 → 100644
浏览文件 @
d94920ce
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
Load
(
path
);
}
DataRecord
NextBatch
()
{
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
title1_all
.
begin
()
+
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
title2_all
.
begin
()
+
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
}
batch_iter
+=
batch_size
;
return
data
;
}
void
Load
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
std
::
string
line
;
int
num_lines
=
0
;
while
(
std
::
getline
(
file
,
line
))
{
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
split
(
line
,
'\t'
,
&
data
);
// load title1 data
std
::
vector
<
int64_t
>
title1_data
;
split_to_int64
(
data
[
0
],
' '
,
&
title1_data
);
// load title2 data
std
::
vector
<
int64_t
>
title2_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title2_data
);
// load title3 data
std
::
vector
<
int64_t
>
title3_data
;
split_to_int64
(
data
[
2
],
' '
,
&
title3_data
);
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1_all
.
push_back
(
std
::
move
(
title1_data
));
title2_all
.
push_back
(
std
::
move
(
title2_data
));
title3_all
.
push_back
(
std
::
move
(
title3_data
));
l1_all
.
push_back
(
std
::
move
(
l1_data
));
}
num_samples
=
num_lines
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
PaddleTensor
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
;
title1_tensor
.
name
=
"title1"
;
title2_tensor
.
name
=
"title2"
;
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
tensor
.
dtype
=
PaddleDType
::
INT64
;
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
// Easy for profiling independently.
TEST
(
Analyzer_seq_conv1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
EXPECT_LT
(
result
[
i
],
1
);
}
}
}
// Check the fuse status
TEST
(
Analyzer_seq_conv1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_seq_conv1
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
d94920ce
...
...
@@ -47,11 +47,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
ref_out
=
ref_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size
=
VecReduceToInt
(
out
.
shape
);
size_t
ref_size
=
VecReduceToInt
(
ref_out
.
shape
);
EXPECT_GT
(
size
,
0
);
EXPECT_EQ
(
size
,
ref_size
);
EXPECT_EQ
(
out
.
dtype
,
ref_out
.
dtype
);
...
...
@@ -87,10 +84,7 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
}
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
int
*
num_ops
)
{
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
d94920ce
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/operators/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
}
};
__attribute__
((
unused
))
constexpr
char
SigmoidDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SigmoidDoc
[]
=
R"DOC(
Sigmoid Activation Operator
$$out = \frac{1}{1 + e^{-x}}$$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
LogSigmoidDoc
[]
=
R"DOC(
UNUSED
constexpr
char
LogSigmoidDoc
[]
=
R"DOC(
Logsigmoid Activation Operator
$$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ExpDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ExpDoc
[]
=
R"DOC(
Exp Activation Operator.
$out = e^x$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ReluDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ReluDoc
[]
=
R"DOC(
Relu Activation Operator.
$out = \max(x, 0)$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
TanhDoc
[]
=
R"DOC(
UNUSED
constexpr
char
TanhDoc
[]
=
R"DOC(
Tanh Activation Operator.
$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
UNUSED
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
TanhShrink Activation Operator.
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SqrtDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SqrtDoc
[]
=
R"DOC(
Sqrt Activation Operator.
$out = \sqrt{x}$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
AbsDoc
[]
=
R"DOC(
UNUSED
constexpr
char
AbsDoc
[]
=
R"DOC(
Abs Activation Operator.
$out = |x|$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
CeilDoc
[]
=
R"DOC(
UNUSED
constexpr
char
CeilDoc
[]
=
R"DOC(
Ceil Activation Operator.
$out = ceil(x)$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
FloorDoc
[]
=
R"DOC(
UNUSED
constexpr
char
FloorDoc
[]
=
R"DOC(
Floor Activation Operator.
$out = floor(x)$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
CosDoc
[]
=
R"DOC(
UNUSED
constexpr
char
CosDoc
[]
=
R"DOC(
Cosine Activation Operator.
$out = cos(x)$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SinDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SinDoc
[]
=
R"DOC(
Sine Activation Operator.
$out = sin(x)$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
RoundDoc
[]
=
R"DOC(
UNUSED
constexpr
char
RoundDoc
[]
=
R"DOC(
Round Activation Operator.
$out = [x]$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
ReciprocalDoc
[]
=
R"DOC(
UNUSED
constexpr
char
ReciprocalDoc
[]
=
R"DOC(
Reciprocal Activation Operator.
$$out = \\frac{1}{x}$$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
LogDoc
[]
=
R"DOC(
UNUSED
constexpr
char
LogDoc
[]
=
R"DOC(
Log Activation Operator.
$out = \ln(x)$
...
...
@@ -212,21 +213,21 @@ Natural logarithm of x.
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SquareDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SquareDoc
[]
=
R"DOC(
Square Activation Operator.
$out = x^2$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SoftplusDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SoftplusDoc
[]
=
R"DOC(
Softplus Activation Operator.
$out = \ln(1 + e^{x})$
)DOC"
;
__attribute__
((
unused
))
constexpr
char
SoftsignDoc
[]
=
R"DOC(
UNUSED
constexpr
char
SoftsignDoc
[]
=
R"DOC(
Softsign Activation Operator.
$$out = \frac{x}{1 + |x|}$$
...
...
paddle/fluid/operators/pool_mkldnn_op.cc
浏览文件 @
d94920ce
...
...
@@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims,
dims2str
(
paddings
)
+
pooling_type
+
suffix
;
}
static
inline
int
ComputeCeiledOutput
(
int
input_size
,
int
kernel_size
,
int
padding
,
int
stride
)
{
return
(
input_size
-
kernel_size
+
2
*
padding
)
/
stride
+
1
;
}
static
inline
void
CorrectOutputSize
(
const
std
::
vector
<
int
>&
src_tz
,
const
std
::
vector
<
int
>&
dst_tz
,
const
std
::
vector
<
int
>&
kernel_size
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
right_bot_padding
)
{
// NOLINT
for
(
size_t
i
=
0
;
i
<
right_bot_padding
.
size
();
i
++
)
{
int
desired_size
=
ComputeCeiledOutput
(
src_tz
[
i
+
2
],
kernel_size
[
i
],
paddings
[
i
],
strides
[
i
]);
if
(
desired_size
!=
dst_tz
[
i
+
2
])
{
right_bot_padding
[
i
]
+=
strides
[
i
];
}
}
}
template
<
typename
T
>
class
PoolMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
pool_p
=
std
::
static_pointer_cast
<
pooling_forward
>
(
dev_ctx
.
GetBlob
(
key_pool_p
));
if
(
pool_p
==
nullptr
)
{
const
std
::
vector
<
int
>&
padding_left_top
(
paddings
);
std
::
vector
<
int
>
padding_right_bottom
(
paddings
);
bool
ceil_mode
=
ctx
.
Attr
<
bool
>
(
"ceil_mode"
);
if
(
ceil_mode
)
{
CorrectOutputSize
(
src_tz
,
dst_tz
,
ksize
,
paddings
,
strides
,
padding_right_bottom
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input_format
);
...
...
@@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn
::
memory
::
format
::
any
);
std
::
shared_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
pool_pd
=
CreatePrimitiveDesc
(
src_md
,
dst_md
,
strides
,
paddings
,
ksize
,
pooling_type
,
mkldnn_engine
);
CreatePrimitiveDesc
(
src_md
,
dst_md
,
strides
,
padding_left_top
,
padding_right_bottom
,
ksize
,
pooling_type
,
mkldnn_engine
,
ceil_mode
);
// save pool_pd into global device context to be referred in backward path
dev_ctx
.
SetBlob
(
key_pool_pd
,
pool_pd
);
...
...
@@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
private:
std
::
unique_ptr
<
mkldnn
::
pooling_forward
::
primitive_desc
>
CreatePrimitiveDesc
(
const
mkldnn
::
memory
::
desc
&
src
,
const
mkldnn
::
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding
,
const
std
::
vector
<
int
>&
kernel
,
const
std
::
string
&
pooling_type
,
const
mkldnn
::
engine
&
engine
)
const
{
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
padding_left_top
,
const
std
::
vector
<
int
>&
padding_right_bot
,
const
std
::
vector
<
int
>&
kernel
,
const
std
::
string
&
pooling_type
,
const
mkldnn
::
engine
&
engine
,
bool
ceil_mode
)
const
{
auto
pool_desc
=
mkldnn
::
pooling_forward
::
desc
(
mkldnn
::
prop_kind
::
forward
,
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
mkldnn
::
algorithm
::
pooling_avg
,
src
,
dst
,
stride
,
kernel
,
padding
,
padding
,
mkldnn
::
padding_kind
::
zero
);
src
,
dst
,
stride
,
kernel
,
padding_left_top
,
padding_right_bot
,
mkldnn
::
padding_kind
::
zero
);
auto
p_pool_pd
=
new
mkldnn
::
pooling_forward
::
primitive_desc
(
pool_desc
,
engine
);
...
...
paddle/fluid/operators/read_op.cc
浏览文件 @
d94920ce
...
...
@@ -45,10 +45,12 @@ class ReadInferVarType : public framework::VarTypeInference {
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
auto
dtypes
=
reader
->
GetDataTypes
();
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
auto
lod_levels
=
reader
->
GetLoDLevels
();
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetDataType
(
dtypes
[
i
]);
out
.
SetLoDLevel
(
lod_levels
[
i
]);
}
}
};
...
...
paddle/fluid/operators/sequence_slice_op.h
浏览文件 @
d94920ce
...
...
@@ -75,11 +75,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
}
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
PADDLE_ENFORCE_L
T
(
0
,
offset_data
[
i
],
PADDLE_ENFORCE_L
E
(
0
,
offset_data
[
i
],
"The offset[%d] must greater than zero."
,
i
);
PADDLE_ENFORCE_LT
(
0
,
length_data
[
i
],
"The length[%d] must greater than zero."
,
i
);
PADDLE_ENFORCE_L
T
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
],
PADDLE_ENFORCE_L
E
(
lod
[
0
][
i
]
+
offset_data
[
i
]
+
length_data
[
i
],
lod
[
0
][
i
+
1
],
"The target tensor's length overflow."
);
}
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
d94920ce
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#
define EIGEN_USE_GPU
#
include <algorithm>
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
...
...
@@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate,
}
}
template
<
typename
T
,
int
block_size
>
template
<
typename
T
>
__global__
void
SparseSGDFunctorKernel
(
const
T
*
selected_rows
,
const
int64_t
*
rows
,
const
T
*
learning_rate
,
T
*
tensor_out
,
int64_t
row_numel
)
{
const
int
ty
=
blockIdx
.
y
;
int
tid
=
threadIdx
.
x
;
selected_rows
+=
ty
*
row_numel
;
tensor_out
+=
rows
[
ty
]
*
row_numel
;
for
(
int
index
=
tid
;
index
<
row_numel
;
index
+=
block_size
)
{
// Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error.
paddle
::
platform
::
CudaAtomicAdd
(
tensor_out
+
index
,
-
1.0
*
learning_rate
[
0
]
*
selected_rows
[
index
]);
int64_t
row_numel
,
int64_t
limit
)
{
for
(
int64_t
i
=
blockIdx
.
x
;
i
<
limit
;
i
+=
gridDim
.
x
)
{
const
T
*
selected_rows_ptr
=
selected_rows
+
i
*
row_numel
;
T
*
tensor_out_ptr
=
tensor_out
+
rows
[
i
]
*
row_numel
;
for
(
int64_t
index
=
threadIdx
.
x
;
index
<
row_numel
;
index
+=
blockDim
.
x
)
{
// Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error.
paddle
::
platform
::
CudaAtomicAdd
(
tensor_out_ptr
+
index
,
-
1.0
*
learning_rate
[
0
]
*
selected_rows_ptr
[
index
]);
}
}
}
}
// namespace
...
...
@@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
const
int
block_size
=
256
;
dim3
threads
(
block_size
,
1
);
dim3
grid
(
1
,
in_rows
.
size
());
SparseSGDFunctorKernel
<
T
,
256
><<<
grid
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
const
int
kThreadsPerBlock
=
256
;
int
thread_x
=
kThreadsPerBlock
;
int
max_threads
=
ctx
.
cuda_device_context
().
GetMaxPhysicalThreadCount
();
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
in_data
,
in_rows
.
CUDAData
(
ctx
.
GetPlace
()),
learning_rate
->
data
<
T
>
(),
out_data
,
in_row_numel
);
out_data
,
in_row_numel
,
in_rows
.
size
()
);
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
...
...
paddle/fluid/operators/shrink_rnn_memory_op.cc
浏览文件 @
d94920ce
...
...
@@ -52,16 +52,26 @@ class ShrinkRNNMemoryOp : public ArrayOp {
size_t
height
=
dst_num_rows
;
// do shrink for the top level LoD
if
(
x_tensor
.
lod
().
size
()
>
0
&&
x_tensor
.
lod
()[
0
].
size
()
>
static_cast
<
size_t
>
(
dst_num_rows
))
{
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
height
=
lod_offset
.
second
.
second
;
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
if
(
x_tensor
.
lod
().
size
()
>
1
)
{
// MultiLevel LoD
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
height
=
lod_offset
.
second
.
second
;
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
}
else
{
// Shrink LoD
auto
lod_item
=
x_tensor
.
lod
()[
0
];
lod_item
.
resize
(
dst_num_rows
+
1
);
out_tensor
.
set_lod
({
lod_item
});
const
auto
&
const_lod_item
=
lod_item
;
height
=
const_lod_item
.
back
();
}
}
if
(
dst_num_rows
!=
0
)
{
if
(
height
!=
0
)
{
out_tensor
.
mutable_data
(
place
,
x_tensor
.
type
());
auto
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
framework
::
TensorCopy
(
x_tensor
.
Slice
(
0
,
height
),
place
,
*
dev_ctx
,
...
...
@@ -134,8 +144,11 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
}
else
{
auto
&
dout_tensor
=
dout_var
->
Get
<
framework
::
LoDTensor
>
();
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
if
(
height
!=
0
)
{
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
}
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
dx_tensor
.
dims
()[
0
]));
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
d94920ce
...
...
@@ -36,7 +36,7 @@ namespace operators {
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
TRT_DT
=
nvinfer1
::
DataType
;
namespace
{
namespace
{
// NOLINT
TRT_DT
FluidDataType2TRT
(
FluidDT
type
)
{
switch
(
type
)
{
...
...
paddle/fluid/operators/top_k_op.cc
浏览文件 @
d94920ce
...
...
@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
"Output(Indices) of TopkOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
input_dims
.
size
(),
2
,
"Rank of TopK op's input must be 2."
);
const
int
k
=
static_cast
<
int
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"k"
));
PADDLE_ENFORCE_GE
(
k
,
1
,
"k must >= 1"
);
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
d94920ce
...
...
@@ -201,6 +201,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
compute_capability
=
GetCUDAComputeCapability
(
place_
.
device
);
multi_process
=
GetCUDAMultiProcessors
(
place_
.
device
);
max_threads_per_mp
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
grid_max_dims_
=
GpuMaxGridDim
(
place_
.
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
stream_
,
place
);
...
...
@@ -239,6 +240,10 @@ int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
return
multi_process
*
max_threads_per_mp
;
}
std
::
tuple
<
int
,
int
,
int
>
CUDADeviceContext
::
GetMaxGridDims
()
const
{
return
grid_max_dims_
;
}
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
return
eigen_device_
.
get
();
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
d94920ce
...
...
@@ -13,6 +13,7 @@ limitations under the License. */
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>
...
...
@@ -91,6 +92,8 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Return the max physical thread count in the device context */
int
GetMaxPhysicalThreadCount
()
const
;
std
::
tuple
<
int
,
int
,
int
>
GetMaxGridDims
()
const
;
/*! \brief Return eigen device in the device context. */
Eigen
::
GpuDevice
*
eigen_device
()
const
;
...
...
@@ -135,6 +138,8 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t
stream_
;
cublasHandle_t
cublas_handle_
;
std
::
tuple
<
int
,
int
,
int
>
grid_max_dims_
;
int
compute_capability
;
int
multi_process
;
int
max_threads_per_mp
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
d94920ce
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#ifdef PADDLE_WITH_CUDA
...
...
@@ -47,7 +48,7 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#if !defined(__APPLE__)
and
!defined(_WIN32)
#if !defined(__APPLE__)
&&
!defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h"
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
...
...
@@ -216,7 +217,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
#endif
}
#if !defined(__APPLE__)
and
!defined(_WIN32)
#if !defined(__APPLE__)
&&
!defined(_WIN32)
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
ncclResult_t
stat
,
const
Args
&
...
args
)
{
...
...
@@ -260,14 +261,8 @@ inline void throw_on_error(T e) {
} \
} while (false)
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
;
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
...
...
@@ -281,6 +276,12 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
/*
* Some enforce helpers here, usage:
* int a = 1;
...
...
@@ -294,7 +295,7 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
...
...
@@ -307,6 +308,7 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
...
...
@@ -326,6 +328,27 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
} while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \
} \
} while (0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while (0)
#endif // !_WIN32
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/for_range.h
浏览文件 @
d94920ce
...
...
@@ -48,35 +48,54 @@ __global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
}
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
in
t
limit
)
{
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
size_
t
limit
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
idx
<
limit
)
{
func
(
idx
);
}
}
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOpGridLarge
(
Function
func
,
size_t
limit
,
int
grid_dim
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
idx
<
limit
)
{
func
(
idx
);
idx
+=
grid_dim
;
}
}
template
<
>
struct
ForRange
<
CUDADeviceContext
>
{
ForRange
(
const
CUDADeviceContext
&
dev_ctx
,
size_t
limit
)
:
dev_ctx_
(
dev_ctx
),
limit_
(
static_cast
<
int
>
(
limit
)
)
{}
:
dev_ctx_
(
dev_ctx
),
limit_
(
limit
)
{}
template
<
typename
Function
>
inline
void
operator
()(
Function
func
)
const
{
constexpr
int
num_threads
=
1024
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
int
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
if
(
grid_size
==
1
)
{
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
size_t
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
int
max_grid_dim
=
std
::
get
<
0
>
(
dev_ctx_
.
GetMaxGridDims
());
if
(
grid_size
<
max_grid_dim
)
{
int
grid_size_int
=
static_cast
<
int
>
(
grid_size
);
if
(
grid_size
==
1
)
{
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
}
else
{
ForRangeElemwiseOp
<<<
grid_size_int
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
);
}
}
else
{
ForRangeElemwiseOp
<<<
grid_size
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
);
ForRangeElemwiseOpGridLarge
<<<
max_grid_dim
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
,
max_grid_dim
);
}
}
const
CUDADeviceContext
&
dev_ctx_
;
in
t
limit_
;
size_
t
limit_
;
};
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
d94920ce
...
...
@@ -152,5 +152,22 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
PADDLE_ENFORCE
(
cudaMemsetAsync
(
dst
,
value
,
count
,
stream
),
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"
);
}
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
)
{
std
::
tuple
<
int
,
int
,
int
>
result
;
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
0
>
(
result
),
cudaDevAttrMaxBlockDimX
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
1
>
(
result
),
cudaDevAttrMaxBlockDimY
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
2
>
(
result
),
cudaDevAttrMaxBlockDimZ
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
return
result
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/gpu_info.h
浏览文件 @
d94920ce
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <cuda_runtime.h>
#include <stddef.h>
#include <string>
#include <tuple>
namespace
paddle
{
namespace
platform
{
...
...
@@ -72,6 +73,8 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
);
}
// namespace platform
}
// namespace paddle
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
d94920ce
...
...
@@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
// namespace pybind
...
...
paddle/fluid/train/CMakeLists.txt
0 → 100644
浏览文件 @
d94920ce
function
(
train_test TARGET_NAME
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs ARGS
)
cmake_parse_arguments
(
train_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
set
(
arg_list
""
)
if
(
train_test_ARGS
)
foreach
(
arg
${
train_test_ARGS
}
)
list
(
APPEND arg_list
"_
${
arg
}
"
)
endforeach
()
else
()
list
(
APPEND arg_list
"_"
)
endif
()
foreach
(
arg
${
arg_list
}
)
string
(
REGEX REPLACE
"^_$"
""
arg
"
${
arg
}
"
)
cc_test
(
test_train_
${
TARGET_NAME
}${
arg
}
SRCS test_train_
${
TARGET_NAME
}
.cc
DEPS paddle_fluid_origin
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book/
${
TARGET_NAME
}${
arg
}
.train.model/
)
set_tests_properties
(
test_train_
${
TARGET_NAME
}${
arg
}
PROPERTIES DEPENDS test_
${
TARGET_NAME
}
)
endforeach
()
endfunction
(
train_test
)
if
(
WITH_TESTING
)
train_test
(
recognize_digits ARGS mlp conv
)
endif
()
paddle/fluid/train/test_train_recognize_digits.cc
0 → 100644
浏览文件 @
d94920ce
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <time.h>
#include <fstream>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the train model."
);
namespace
paddle
{
void
Train
()
{
CHECK
(
!
FLAGS_dirname
.
empty
());
framework
::
InitDevices
(
false
);
const
auto
cpu_place
=
platform
::
CPUPlace
();
framework
::
Executor
executor
(
cpu_place
);
framework
::
Scope
scope
;
auto
train_program
=
inference
::
Load
(
&
executor
,
&
scope
,
FLAGS_dirname
+
"__model_combined__.main_program"
,
FLAGS_dirname
+
"__params_combined__"
);
std
::
string
loss_name
=
""
;
for
(
auto
op_desc
:
train_program
->
Block
(
0
).
AllOps
())
{
if
(
op_desc
->
Type
()
==
"mean"
)
{
loss_name
=
op_desc
->
Output
(
"Out"
)[
0
];
break
;
}
}
PADDLE_ENFORCE_NE
(
loss_name
,
""
,
"loss not found"
);
// prepare data
auto
x_var
=
scope
.
Var
(
"img"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
64
,
1
,
28
,
28
});
auto
x_data
=
x_tensor
->
mutable_data
<
float
>
(
cpu_place
);
for
(
int
i
=
0
;
i
<
64
*
28
*
28
;
++
i
)
{
x_data
[
i
]
=
1.0
;
}
auto
y_var
=
scope
.
Var
(
"label"
);
auto
y_tensor
=
y_var
->
GetMutable
<
framework
::
LoDTensor
>
();
y_tensor
->
Resize
({
64
,
1
});
auto
y_data
=
y_tensor
->
mutable_data
<
int64_t
>
(
cpu_place
);
for
(
int
i
=
0
;
i
<
64
*
1
;
++
i
)
{
y_data
[
i
]
=
static_cast
<
int64_t
>
(
1
);
}
auto
loss_var
=
scope
.
Var
(
loss_name
);
float
first_loss
=
0.0
;
float
last_loss
=
0.0
;
for
(
int
i
=
0
;
i
<
100
;
++
i
)
{
executor
.
Run
(
*
train_program
.
get
(),
&
scope
,
0
,
false
,
true
);
if
(
i
==
0
)
{
first_loss
=
loss_var
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
}
else
if
(
i
==
99
)
{
last_loss
=
loss_var
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
}
}
EXPECT_LT
(
last_loss
,
first_loss
);
}
TEST
(
train
,
recognize_digits
)
{
Train
();
}
}
// namespace paddle
paddle/scripts/paddle_build.sh
浏览文件 @
d94920ce
...
...
@@ -147,6 +147,7 @@ function cmake_gen() {
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
...
...
@@ -178,7 +179,8 @@ EOF
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
}
...
...
@@ -361,7 +363,7 @@ EOF
ctest
--output-on-failure
# make install should also be test when unittest
make
install
-j
`
nproc
`
pip
install
/usr/local
/opt/paddle/share/wheels/
*
.whl
pip
install
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
fi
...
...
python/paddle/dataset/wmt14.py
浏览文件 @
d94920ce
...
...
@@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size):
]
for
name
in
names
:
for
line
in
f
.
extractfile
(
name
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
'
\t
'
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
'
\t
'
)
if
len
(
line_split
)
!=
2
:
continue
src_seq
=
line_split
[
0
]
# one source sequence
...
...
python/paddle/dataset/wmt16.py
浏览文件 @
d94920ce
...
...
@@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang):
word_dict
=
defaultdict
(
int
)
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
"wmt16/train"
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
"
\t
"
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
continue
sen
=
line_split
[
0
]
if
lang
==
"en"
else
line_split
[
1
]
for
w
in
sen
.
split
():
...
...
@@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
file_name
):
line_split
=
line
.
strip
().
split
(
six
.
b
(
"
\t
"
))
line
=
cpt
.
to_text
(
line
)
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
continue
src_words
=
line_split
[
src_col
].
split
()
...
...
python/paddle/fluid/framework.py
浏览文件 @
d94920ce
...
...
@@ -18,6 +18,7 @@ import collections
import
contextlib
import
re
import
six
import
traceback
import
numpy
as
np
...
...
@@ -34,6 +35,8 @@ except ImportError as e:
except
Exception
as
e
:
raise
e
from
.
import
unique_name
import
os
PADDLE_ON_MODEL_CE
=
os
.
environ
.
get
(
'PADDLE_ON_MODEL_CE'
,
None
)
is
not
None
__all__
=
[
'Program'
,
...
...
@@ -489,7 +492,8 @@ class OpProtoHolder(object):
return
{
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
()
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
()
}
...
...
@@ -572,6 +576,11 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
if
not
PADDLE_ON_MODEL_CE
:
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
return
if
type
is
None
:
...
...
python/paddle/fluid/io.py
浏览文件 @
d94920ce
...
...
@@ -600,7 +600,7 @@ def save_inference_model(dirname,
"""
if
isinstance
(
feeded_var_names
,
six
.
string_types
):
feeded_var_names
=
[
feeded_var_names
]
el
se
:
el
if
export_for_deployment
:
if
len
(
feeded_var_names
)
>
0
:
# TODO(paddle-dev): polish these code blocks
if
not
(
bool
(
feeded_var_names
)
and
all
(
...
...
@@ -610,61 +610,60 @@ def save_inference_model(dirname,
if
isinstance
(
target_vars
,
Variable
):
target_vars
=
[
target_vars
]
el
se
:
el
if
export_for_deployment
:
if
not
(
bool
(
target_vars
)
and
all
(
isinstance
(
var
,
Variable
)
for
var
in
target_vars
)):
raise
ValueError
(
"'target_vars' should be a list of Variable."
)
if
main_program
is
None
:
main_program
=
default_main_program
()
copy_program
=
main_program
.
clone
()
# if there is lookup table, the trainer 0 will notify all pserver to save.
if
main_program
.
_is_distributed
and
main_program
.
_is_chief
and
main_program
.
_distributed_lookup_table
:
lookup_table_filename
=
os
.
path
.
join
(
dirname
,
"__lookup_table__"
)
_save_lookup_tables_by_notify
(
executor
,
lookup_table_filename
,
main_program
.
_distributed_lookup_table
,
main_program
.
_endpoints
)
if
not
os
.
path
.
isdir
(
dirname
):
os
.
makedirs
(
dirname
)
if
model_filename
is
not
None
:
model_basename
=
os
.
path
.
basename
(
model_filename
)
else
:
model_basename
=
"__model__"
model_basename
=
os
.
path
.
join
(
dirname
,
model_basename
)
# When export_for_deployment is true, we modify the program online so that
# it can only be loaded for inference directly. If it's false, the whole
# original program and related meta are saved so that future usage can be
# more flexible.
if
export_for_deployment
:
global_block
=
copy_program
.
global_block
()
main_program
=
main_program
.
clone
()
global_block
=
main_program
.
global_block
()
for
i
,
op
in
enumerate
(
global_block
.
ops
):
op
.
desc
.
set_is_target
(
False
)
if
op
.
type
==
"feed"
or
op
.
type
==
"fetch"
:
global_block
.
_remove_op
(
i
)
copy
_program
.
desc
.
flush
()
main
_program
.
desc
.
flush
()
pruned_program
=
copy
_program
.
_prune
(
targets
=
target_vars
)
saved_program
=
pruned
_program
.
_inference_optimize
(
prune_read_op
=
True
)
main_program
=
main
_program
.
_prune
(
targets
=
target_vars
)
main_program
=
main
_program
.
_inference_optimize
(
prune_read_op
=
True
)
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
prepend_feed_ops
(
saved_program
,
feeded_var_names
)
append_fetch_ops
(
saved_program
,
fetch_var_names
)
prepend_feed_ops
(
main_program
,
feeded_var_names
)
append_fetch_ops
(
main_program
,
fetch_var_names
)
with
open
(
model_basename
,
"wb"
)
as
f
:
f
.
write
(
main_program
.
desc
.
serialize_to_string
())
else
:
# TODO(panyx0718): Save more information so that it can also be used
# for training and more flexible post-processing.
saved_program
=
copy_program
if
model_filename
is
not
None
:
model_filename
=
os
.
path
.
basename
(
model_filename
)
else
:
model_filename
=
"__model__"
model_filename
=
os
.
path
.
join
(
dirname
,
model_filename
)
with
open
(
model_basename
+
".main_program"
,
"wb"
)
as
f
:
f
.
write
(
main_program
.
desc
.
serialize_to_string
())
if
params_filename
is
not
None
:
params_filename
=
os
.
path
.
basename
(
params_filename
)
with
open
(
model_filename
,
"wb"
)
as
f
:
f
.
write
(
saved_program
.
desc
.
serialize_to_string
())
save_persistables
(
executor
,
dirname
,
saved_program
,
params_filename
)
# if there is lookup table, the trainer 0 will notify all pserver to save.
if
main_program
.
_is_distributed
and
main_program
.
_is_chief
and
main_program
.
_distributed_lookup_table
:
lookup_table_filename
=
os
.
path
.
join
(
dirname
,
"__lookup_table__"
)
_save_lookup_tables_by_notify
(
executor
,
lookup_table_filename
,
main_program
.
_distributed_lookup_table
,
main_program
.
_endpoints
)
save_persistables
(
executor
,
dirname
,
main_program
,
params_filename
)
def
load_inference_model
(
dirname
,
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
d94920ce
...
...
@@ -311,6 +311,7 @@ def _copy_reader_var_(block, var):
new_var
=
block
.
create_var
(
name
=
var
.
name
,
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
desc
.
set_lod_levels
(
var
.
desc
.
lod_levels
())
new_var
.
persistable
=
True
return
new_var
...
...
@@ -632,6 +633,7 @@ def py_reader(capacity,
})
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
desc
.
set_lod_levels
(
lod_levels
)
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
d94920ce
...
...
@@ -6471,12 +6471,14 @@ def _elementwise_op(helper):
assert
y
is
not
None
,
'y cannot be None in {}'
.
format
(
op_type
)
axis
=
helper
.
kwargs
.
get
(
'axis'
,
-
1
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
out
=
helper
.
kwargs
.
get
(
'out'
,
None
)
if
out
is
None
:
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
op_type
,
...
...
@@ -6489,7 +6491,13 @@ def _elementwise_op(helper):
@
templatedoc
()
def
scale
(
x
,
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
act
=
None
,
name
=
None
):
def
scale
(
x
,
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
out
=
None
,
act
=
None
,
name
=
None
):
"""
${comment}
...
...
@@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
scale(${scale_type}): ${scale_comment}
bias(${bias_type}): ${bias_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
out(Tensor): Output tensor.
act(basestring|None): Activation applied to the output.
name(basestring|None): Name of the output.
...
...
@@ -6506,11 +6515,12 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
helper
=
LayerHelper
(
'scale'
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
if
out
is
None
:
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
'scale'
,
...
...
@@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
return
helper
.
append_activation
(
out
)
def
elementwise_add
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_add
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_add'
,
**
locals
()))
def
elementwise_div
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_div
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_div'
,
**
locals
()))
def
elementwise_sub
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_sub
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_sub'
,
**
locals
()))
def
elementwise_mul
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_mul
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_mul'
,
**
locals
()))
def
elementwise_max
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_max
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_max'
,
**
locals
()))
def
elementwise_min
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_min
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_min'
,
**
locals
()))
def
elementwise_pow
(
x
,
y
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
def
elementwise_pow
(
x
,
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_pow'
,
**
locals
()))
...
...
@@ -6560,6 +6612,7 @@ for func in [
func
.
__doc__
=
_generate_doc_string_
(
op_proto
,
additional_args_lines
=
[
"out (Tensor): The output tensor of elementwise op."
,
"act (basestring|None): Activation applied to the output."
,
"name (basestring|None): Name of the output."
])
python/paddle/fluid/parallel_executor.py
浏览文件 @
d94920ce
...
...
@@ -74,28 +74,7 @@ class ParallelExecutor(object):
build_strategy
=
None
,
num_trainers
=
1
,
trainer_id
=
0
,
scope
=
None
,
**
kwargs
):
if
len
(
kwargs
)
!=
0
:
err_msg
=
""
for
key
in
kwargs
:
if
key
in
dir
(
ExecutionStrategy
):
err_msg
+=
\
"Setting {0} by constructor is deprecated. Use "
\
"strategy=ExecutionStrategy(); strategy.{0}=xxx; "
\
"pe=ParallelExecutor(exec_strategy=strategy) "
\
"instead.
\n
"
.
format
(
key
)
elif
key
in
dir
(
BuildStrategy
):
err_msg
+=
\
"Setting {0} by constructor is deprecated. Use "
\
"strategy=BuildStrategy(); See help("
\
"paddle.fluid.ParallelExecutor.BuildStrategy)
\n
"
.
format
(
key
)
else
:
err_msg
+=
"Setting {0} by constructor is deprecated. Use strategy.
\n
"
.
format
(
key
)
raise
ValueError
(
err_msg
)
scope
=
None
):
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
...
...
python/paddle/fluid/param_attr.py
浏览文件 @
d94920ce
...
...
@@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr):
Args:
dim(list): The parameter's name. Default None.
kwargs: Any field in ParamAttr. Default None.
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average.
Default False.
Examples:
.. code-block:: python
...
...
@@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr):
# these paramters for inference.
params_with_weight_norm
=
[]
def
__init__
(
self
,
dim
=
None
,
**
kwargs
):
super
(
WeightNormParamAttr
,
self
).
__init__
(
**
kwargs
)
def
__init__
(
self
,
dim
=
None
,
name
=
None
,
initializer
=
None
,
learning_rate
=
1.0
,
regularizer
=
None
,
trainable
=
True
,
gradient_clip
=
None
,
do_model_average
=
False
):
super
(
WeightNormParamAttr
,
self
).
__init__
(
name
=
name
,
initializer
=
initializer
,
learning_rate
=
learning_rate
,
regularizer
=
regularizer
,
trainable
=
trainable
,
gradient_clip
=
gradient_clip
,
do_model_average
=
do_model_average
)
self
.
dim
=
dim
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
d94920ce
...
...
@@ -67,6 +67,7 @@ def train(nn_type,
use_cuda
,
parallel
,
save_dirname
=
None
,
save_full_dirname
=
None
,
model_filename
=
None
,
params_filename
=
None
,
is_local
=
True
):
...
...
@@ -143,6 +144,13 @@ def train(nn_type,
exe
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
if
save_full_dirname
is
not
None
:
fluid
.
io
.
save_inference_model
(
save_full_dirname
,
[],
[],
exe
,
model_filename
=
model_filename
,
params_filename
=
params_filename
,
export_for_deployment
=
False
)
return
else
:
print
(
...
...
@@ -214,10 +222,12 @@ def infer(use_cuda,
def
main
(
use_cuda
,
parallel
,
nn_type
,
combine
):
save_dirname
=
None
save_full_dirname
=
None
model_filename
=
None
params_filename
=
None
if
not
use_cuda
and
not
parallel
:
save_dirname
=
"recognize_digits_"
+
nn_type
+
".inference.model"
save_full_dirname
=
"recognize_digits_"
+
nn_type
+
".train.model"
if
combine
==
True
:
model_filename
=
"__model_combined__"
params_filename
=
"__params_combined__"
...
...
@@ -228,6 +238,7 @@ def main(use_cuda, parallel, nn_type, combine):
use_cuda
=
use_cuda
,
parallel
=
parallel
,
save_dirname
=
save_dirname
,
save_full_dirname
=
save_full_dirname
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
infer
(
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
d94920ce
...
...
@@ -661,22 +661,25 @@ class TestLoadSliceVar(TranspilerTest):
class
TestNCCL2Transpile
(
TranspilerTest
):
def
test_nccl2_transpile
(
self
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
self
.
net_conf
()
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
mode
=
"nccl2"
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
0
,
trainers
=
"127.0.0.1:6174,127.0.0.1:6175"
,
current_endpoint
=
"127.0.0.1:6174"
,
startup_program
=
startup
)
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
if
fluid
.
core
.
is_compiled_with_cuda
():
#test nccl2 only with cuda
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
self
.
net_conf
()
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
mode
=
"nccl2"
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
0
,
trainers
=
"127.0.0.1:6174,127.0.0.1:6175"
,
current_endpoint
=
"127.0.0.1:6174"
,
startup_program
=
startup
)
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
else
:
pass
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
d94920ce
...
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
"op_namescope"
,
"op_callstack"
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录