Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b5670d4c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b5670d4c
编写于
11月 15, 2018
作者:
W
wangguibao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into async_executor
上级
eb6a941f
9e6b1c5f
变更
56
隐藏空白更改
内联
并排
Showing
56 changed file
with
1424 addition
and
219 deletion
+1424
-219
CMakeLists.txt
CMakeLists.txt
+2
-0
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+5
-3
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+6
-2
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+10
-6
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+10
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+21
-1
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+39
-0
paddle/fluid/framework/ngraph_bridge.h
paddle/fluid/framework/ngraph_bridge.h
+58
-0
paddle/fluid/framework/ngraph_operator.cc
paddle/fluid/framework/ngraph_operator.cc
+220
-0
paddle/fluid/framework/ngraph_operator.h
paddle/fluid/framework/ngraph_operator.h
+72
-0
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
...uid/inference/analysis/passes/ir_analysis_compose_pass.cc
+1
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+1
-1
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+1
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+1
-1
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+2
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+2
-2
paddle/fluid/inference/tensorrt/CMakeLists.txt
paddle/fluid/inference/tensorrt/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+6
-3
paddle/fluid/inference/tensorrt/convert/concat_op.cc
paddle/fluid/inference/tensorrt/convert/concat_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/split_op.cc
paddle/fluid/inference/tensorrt/convert/split_op.cc
+75
-0
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
+53
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+6
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+5
-0
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/plugin/serialize.h
paddle/fluid/inference/tensorrt/plugin/serialize.h
+111
-0
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+81
-0
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
+74
-0
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
+61
-0
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
+80
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+1
-2
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+5
-2
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+6
-3
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+4
-2
paddle/fluid/inference/tests/api/config_printer.h
paddle/fluid/inference/tests/api/config_printer.h
+79
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+64
-23
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+123
-122
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+8
-2
paddle/fluid/operators/math/softmax.cc
paddle/fluid/operators/math/softmax.cc
+4
-2
paddle/fluid/operators/math/softmax.cu
paddle/fluid/operators/math/softmax.cu
+8
-3
paddle/fluid/operators/math/softmax.h
paddle/fluid/operators/math/softmax.h
+1
-1
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+37
-4
paddle/fluid/operators/softmax_op.h
paddle/fluid/operators/softmax_op.h
+6
-1
paddle/fluid/operators/softmax_with_cross_entropy_op.h
paddle/fluid/operators/softmax_with_cross_entropy_op.h
+2
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+16
-6
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+15
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+4
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+1
-1
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_pass_builder.py
python/paddle/fluid/tests/unittests/test_pass_builder.py
+6
-1
未找到文件。
CMakeLists.txt
浏览文件 @
b5670d4c
...
...
@@ -66,6 +66,8 @@ option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
REPLACE_ENFORCE_GLOG
"Replace PADDLE_ENFORCE with glog/CHECK for better debug."
OFF
)
option
(
WITH_ANAKIN
"Compile with Anakin library"
OFF
)
option
(
ANAKIN_BUILD_FAT_BIN
"Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF"
OFF
)
option
(
ANAKIN_BUILD_CROSS_PLANTFORM
"Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF"
ON
)
option
(
WITH_GRPC
"Use grpc as the default rpc framework"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization."
OFF
)
...
...
cmake/external/anakin.cmake
浏览文件 @
b5670d4c
...
...
@@ -58,19 +58,21 @@ ExternalProject_Add(
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
-DBUILD_FAT_BIN=
${
ANAKIN_BUILD_FAT_BIN
}
-DBUILD_CROSS_PLANTFORM=
${
ANAKIN_BUILD_CROSS_PLANTFORM
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
)
message
(
STATUS
"Anakin for inference is enabled"
)
message
(
STATUS
"Anakin is set INCLUDE:
${
ANAKIN_INCLUDE
}
LIBRARY:
${
ANAKIN_LIBRARY
}
"
)
add_dependencies
(
extern_anakin protobuf mklml
)
add_library
(
anakin_shared SHARED IMPORTED GLOBAL
)
set_property
(
TARGET anakin_shared PROPERTY IMPORTED_LOCATION
${
ANAKIN_SHARED_LIB
}
)
add_dependencies
(
anakin_shared extern_anakin
protobuf mklml
)
add_dependencies
(
anakin_shared extern_anakin
)
add_library
(
anakin_saber SHARED IMPORTED GLOBAL
)
set_property
(
TARGET anakin_saber PROPERTY IMPORTED_LOCATION
${
ANAKIN_SABER_LIB
}
)
add_dependencies
(
anakin_saber extern_anakin
protobuf mklml
)
add_dependencies
(
anakin_saber extern_anakin
)
list
(
APPEND external_project_dependencies anakin_shared anakin_saber
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
b5670d4c
...
...
@@ -137,6 +137,10 @@ cc_library(version SRCS version.cc)
cc_test
(
version_test SRCS version_test.cc DEPS version
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version
)
cc_library
(
ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto
)
cc_library
(
ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc
)
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
...
...
@@ -164,10 +168,10 @@ if(WITH_DISTRIBUTE)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
ngraph_operator
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
if
(
NOT WIN32
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
b5670d4c
...
...
@@ -79,9 +79,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
BuildStrategy
strategy_
;
};
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
()
const
{
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
(
bool
finalize_strategy
)
const
{
if
(
is_finalized_
)
{
return
pass_builder_
;
}
pass_builder_
.
reset
(
new
ParallelExecutorPassBuilder
(
*
this
));
if
(
finalize_strategy
)
{
is_finalized_
=
true
;
}
return
pass_builder_
;
}
...
...
@@ -95,10 +101,8 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
#else
const
bool
use_cuda
)
const
{
#endif
// Create a default one if not initialized by user.
if
(
!
pass_builder_
)
{
CreatePassesFromStrategy
();
}
// Create a default one if not finalized by user.
CreatePassesFromStrategy
(
false
);
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
b5670d4c
...
...
@@ -75,12 +75,20 @@ struct BuildStrategy {
bool
remove_unnecessary_lock_
{
false
};
// NOTE:
// Before you add new options, think if it's a general strategy that works
// with other strategy. If not, the strategy should be created through
// CreatePassesFromStrategy and the pass can be managed separately.
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// A new PassBuilder is created based on configs defined above and
// passes are owned by the PassBuilder.
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
()
const
;
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
(
bool
finalize_strategy
)
const
;
bool
IsFinalized
()
const
{
return
is_finalized_
;
}
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
...
...
@@ -97,6 +105,7 @@ struct BuildStrategy {
#endif
private:
mutable
bool
is_finalized_
=
false
;
mutable
std
::
shared_ptr
<
ir
::
PassBuilder
>
pass_builder_
;
};
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
b5670d4c
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/operators/detail/macros.h"
...
...
@@ -25,6 +26,7 @@ limitations under the License. */
DECLARE_bool
(
benchmark
);
DEFINE_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
DEFINE_bool
(
use_ngraph
,
false
,
"Use NGRAPH to run"
);
namespace
paddle
{
namespace
framework
{
...
...
@@ -81,6 +83,24 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
}
}
static
void
EnableFusedOp
(
ExecutorPrepareContext
*
ctx
)
{
#ifdef PADDLE_WITH_NGRAPH
VLOG
(
3
)
<<
"use_ngraph=True"
;
auto
intervals
=
FusedOperator
::
FusedOpIntervals
(
&
ctx
->
ops_
);
for
(
auto
&
interval
:
intervals
)
{
auto
*
fused_op
=
new
FusedOperator
(
ctx
->
prog_
,
ctx
->
block_id_
,
interval
.
at
(
0
),
interval
.
at
(
1
));
*
interval
[
0
]
=
std
::
unique_ptr
<
OperatorBase
>
(
fused_op
);
}
for
(
auto
it
=
intervals
.
rbegin
();
it
!=
intervals
.
rend
();
++
it
)
{
ctx
->
ops_
.
erase
(
it
->
at
(
0
)
+
1
,
it
->
at
(
1
));
}
#else
LOG
(
WARNING
)
<<
"'NGRAPH' is not supported, Please re-compile with WITH_NGRAPH option"
;
#endif
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
...
...
@@ -338,6 +358,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
if
(
FLAGS_use_ngraph
)
EnableFusedOp
(
ctx
.
get
());
return
ctx
;
}
...
...
@@ -486,6 +507,5 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
<<
"'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"
;
#endif
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ngraph_bridge.cc
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <functional>
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NgraphBridge
::
NG_NODE_MAP
=
{};
void
NgraphBridge
::
build_graph
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
auto
&
op_type
=
op
->
Type
();
NG_NODE_MAP
[
op_type
](
op
,
ngb_node_map
);
}
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_bridge.h
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
class
NgraphBridge
{
public:
static
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NG_NODE_MAP
;
explicit
NgraphBridge
(
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
var_node_map
)
:
ngb_node_map
(
var_node_map
)
{}
void
build_graph
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
);
private:
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
;
};
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.cc
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <glog/logging.h>
#include <algorithm>
#include <map>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/var_type.h"
namespace
paddle
{
namespace
framework
{
static
std
::
map
<
proto
::
VarType
::
Type
,
ngraph
::
element
::
Type
>
pd2ng_type_map
=
{
{
proto
::
VarType
::
FP32
,
ngraph
::
element
::
f32
},
{
proto
::
VarType
::
FP64
,
ngraph
::
element
::
f64
},
{
proto
::
VarType
::
INT32
,
ngraph
::
element
::
i32
},
{
proto
::
VarType
::
INT64
,
ngraph
::
element
::
i64
},
{
proto
::
VarType
::
BOOL
,
ngraph
::
element
::
boolean
},
};
typedef
enum
{
/* nGraph support state on ops */
FULL_TRAIN
,
/* Support full ops for train */
PARTIAL_TRAIN
,
/* Support partial ops for train */
FULL_TEST
,
/* Support full list of ops for test */
PARTIAL_TEST
/* Support partial list of ops for test */
}
op_state
;
class
NgraphOperator
{
public:
explicit
NgraphOperator
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>&
ops
,
const
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>&
var_type_map
,
const
std
::
unordered_set
<
std
::
string
>&
persist
,
const
std
::
unordered_set
<
std
::
string
>&
fetches
,
const
std
::
unordered_set
<
std
::
string
>&
post_op_inputs
,
op_state
ng_op_state
)
:
scope_
(
scope
),
place_
(
place
),
fused_ops_
(
ops
),
var_type_map_
(
var_type_map
),
persistables_
(
persist
),
fetches_
(
fetches
),
post_op_inputs_
(
post_op_inputs
),
ng_op_state_
(
ng_op_state
)
{}
void
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
;
private:
static
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Function
>>
func_cache
;
const
Scope
&
scope_
;
const
platform
::
Place
&
place_
;
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
fused_ops_
;
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>
var_type_map_
;
std
::
unordered_set
<
std
::
string
>
persistables_
;
std
::
unordered_set
<
std
::
string
>
fetches_
;
std
::
unordered_set
<
std
::
string
>
post_op_inputs_
;
op_state
ng_op_state_
;
};
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
FusedOperator
::
FusedOpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
)
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
intervals
;
if
(
ops
->
empty
())
{
return
intervals
;
}
size_t
size
=
ops
->
size
();
size_t
left
=
0
;
while
(
left
<
size
&&
ops
.
at
(
left
)
->
Type
()
!=
kFeedOpType
)
{
++
left
;
}
if
(
left
==
size
)
{
return
intervals
;
}
while
(
left
<
size
&&
ops
->
at
(
left
)
->
Type
()
==
kFeedOpType
)
{
++
left
;
}
size_t
right
=
left
;
while
(
right
<
size
&&
ops
->
at
(
right
)
->
Type
()
!=
kFetchOpType
)
{
++
right
;
}
if
(
right
==
size
)
{
return
intervals
;
}
if
(
left
>=
right
)
return
intervals
;
// (left, right - 1) represents indices between feed and fetch
size_t
pivot
=
left
;
while
(
pivot
<
right
)
{
auto
op_type
=
ops
->
at
(
pivot
)
->
Type
();
if
(
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
find
(
op_type
)
==
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
end
())
{
++
pivot
;
}
else
{
size_t
start
=
pivot
,
end
=
start
;
while
(
pivot
<
right
&&
(
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
find
(
ops
.
at
(
pivot
)
->
Type
())
!=
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
end
()))
{
++
pivot
;
++
end
;
}
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>
interval
=
{
ops
->
begin
()
+
start
,
ops
->
begin
()
+
end
};
intervals
.
push_back
(
interval
);
}
}
// end while
return
intervals
;
}
FusedOperator
::
FusedOperator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
),
pdesc
(
prog
),
block
(
block_id
)
{
for
(
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
it
=
start
;
it
!=
end
;
++
it
)
{
fused_ops_
.
push_back
(
std
::
move
(
*
it
));
}
for
(
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
it
=
end
;
(
*
it
)
->
Type
()
!=
kFetchOpType
;
++
it
)
{
for
(
auto
&
var_name_item
:
(
*
it
)
->
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
post_op_inputs_
.
insert
(
var_name
);
}
}
}
if
((
*
(
start
-
1
))
->
Type
()
==
kFeedOpType
&&
(
*
end
)
->
Type
()
==
kFetchOpType
)
{
is_complete
=
true
;
}
Process
();
}
void
FusedOperator
::
Process
()
{
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
for
(
auto
&
var
:
bdesc
.
AllVars
())
{
if
(
!
(
var
->
GetType
()
==
proto
::
VarType
::
SELECTED_ROWS
||
var
->
GetType
()
==
proto
::
VarType
::
LOD_TENSOR
||
var
->
GetType
()
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
continue
;
}
auto
var_name
=
var
->
Name
();
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
}
if
(
var_name
!=
"fetch"
&&
var_name
!=
"feed"
)
{
auto
pd_type
=
var
->
GetDataType
();
if
(
pd2ng_type_map
.
find
(
pd_type
)
==
pd2ng_type_map
.
end
())
{
PADDLE_THROW
(
"Data type of var %s not found in pd2ng_type_map"
,
var_name
);
}
var_type_map_
[
var_name
]
=
pd2ng_type_map
[
pd_type
];
}
if
(
var
->
Persistable
())
{
persistables_
.
insert
(
var
->
Name
());
}
}
for
(
auto
*
op
:
bdesc
.
AllOps
())
{
if
(
op
->
Type
()
==
kFetchOpType
)
{
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
fetches_
.
insert
(
fetch_target_name
);
}
}
}
void
FusedOperator
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
op_state
ng_op_state
=
PARTIAL_TEST
;
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
for
(
auto
*
op
:
bdesc
.
AllOps
())
{
if
(
op
->
Type
().
find
(
"_grad"
)
!=
std
::
string
::
npos
)
{
ng_op_state
=
PARTIAL_TRAIN
;
break
;
}
}
if
(
is_full
)
{
ng_op_state
=
ng_op_state
==
PARTIAL_TEST
?
FULL_TEST
:
FULL_TRAIN
;
}
NgraphOperator
ngraph_op
(
scope
,
place
,
fused_ops_
,
var_type_map_
,
persistables_
,
fetches_
,
post_op_inputs_
,
ng_op_state
);
ngraph_op
.
Run
(
scope
,
place
);
}
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.h
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <atomic>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/variant.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
class
FusedOperator
:
public
OperatorBase
{
public:
static
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
FusedOpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
);
explicit
FusedOperator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
const
std
::
string
&
type
=
"fused_op"
,
const
VariableNameMap
&
inputs
=
{},
const
VariableNameMap
&
outputs
=
{},
const
AttributeMap
&
attrs
=
{});
void
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
final
;
private:
const
ProgramDesc
pdesc_
;
size_t
block_
;
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
fused_ops_
;
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>
var_type_map_
;
std
::
unordered_set
<
std
::
string
>
persistables_
;
std
::
unordered_set
<
std
::
string
>
fetches_
;
std
::
unordered_set
<
std
::
string
>
post_op_inputs_
;
bool
is_full_
=
false
;
void
Process
();
};
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
浏览文件 @
b5670d4c
...
...
@@ -45,7 +45,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"dropout"
});
"elementwise_add"
,
"dropout"
,
"split"
});
if
(
!
node
->
IsOp
())
return
false
;
if
(
teller_set
.
count
(
node
->
Op
()
->
Type
()))
{
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
b5670d4c
...
...
@@ -548,4 +548,5 @@ USE_TRT_CONVERTER(batch_norm);
USE_TRT_CONVERTER
(
concat
);
USE_TRT_CONVERTER
(
dropout
);
USE_TRT_CONVERTER
(
pad
);
USE_TRT_CONVERTER
(
split
);
#endif
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -15,7 +15,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <thread>
#include <thread>
// NOLINT
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
...
...
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
浏览文件 @
b5670d4c
...
...
@@ -23,7 +23,7 @@ limitations under the License. */
#include <memory>
#include <thread> //NOLINT
#include "utils.h"
#include "utils.h"
// NOLINT
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
b5670d4c
...
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
b5670d4c
...
...
@@ -49,6 +49,8 @@ struct AnalysisConfig : public NativeConfig {
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
void
EnableMKLDNN
();
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
b5670d4c
...
...
@@ -91,7 +91,7 @@ class CpuPassStrategy : public PassStrategy {
virtual
~
CpuPassStrategy
()
=
default
;
v
irtual
v
oid
EnableMKLDNN
()
override
{
void
EnableMKLDNN
()
override
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
...
...
@@ -123,7 +123,7 @@ class GpuPassStrategy : public PassStrategy {
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
v
irtual
v
oid
EnableMKLDNN
()
override
;
void
EnableMKLDNN
()
override
;
virtual
~
GpuPassStrategy
()
=
default
;
};
...
...
paddle/fluid/inference/tensorrt/CMakeLists.txt
浏览文件 @
b5670d4c
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto device_context
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
add_subdirectory
(
plugin
)
add_subdirectory
(
convert
)
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
b5670d4c
# Add TRT tests
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc pad_op.cc
DEPS tensorrt_engine operator scope framework_proto op_registry
)
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
pad_op.cc split_op.cc
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry
)
nv_test
(
test_op_converter SRCS test_op_converter.cc DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine tensorrt_converter
)
...
...
@@ -28,6 +29,8 @@ nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine concat_op SERIAL
)
nv_test
(
test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine dropout_op SERIAL
)
nv_test
(
test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine pad_op SERIAL
)
nv_test
(
test_trt_split_op SRCS test_split_op.cc split_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine tensorrt_plugin
split_op concat_op SERIAL
)
paddle/fluid/inference/tensorrt/convert/concat_op.cc
浏览文件 @
b5670d4c
...
...
@@ -19,7 +19,7 @@ namespace inference {
namespace
tensorrt
{
/*
*
MulOp, IMatrixMultiplyLayer in TRT. This Layer doesn't has weights.
*
ConcatOp
*/
class
ConcatOpConverter
:
public
OpConverter
{
public:
...
...
paddle/fluid/inference/tensorrt/convert/split_op.cc
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
/*
* SplitOp.
*/
class
SplitOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
40
)
<<
"convert a fluid split op to tensorrt split layer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
input_dims
=
input
->
getDimensions
();
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
// Get Attrs
PADDLE_ENFORCE
(
input_num
==
1
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
std
::
vector
<
int
>
output_lengths
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
PADDLE_ENFORCE
(
axis
!=
0
);
if
(
axis
<
0
)
{
axis
+=
input_dims
.
nbDims
;
}
else
{
axis
-=
1
;
}
PADDLE_ENFORCE
(
output_lengths
.
size
()
==
output_num
);
//
SplitPlugin
*
plugin
=
new
SplitPlugin
(
axis
,
output_lengths
);
nvinfer1
::
IPluginLayer
*
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
std
::
string
layer_name
=
"split (Output: "
;
for
(
size_t
i
=
0
;
i
<
output_num
;
i
++
)
{
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
i
];
layer
->
getOutput
(
i
)
->
setName
(
output_name
.
c_str
());
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
i
));
layer_name
+=
output_name
;
if
(
test_mode
)
{
engine_
->
DeclareOutput
(
output_name
);
}
}
layer
->
setName
((
layer_name
+
")"
).
c_str
());
}
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
split
,
SplitOpConverter
);
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
TEST
(
split_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
10
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"split_input"
,
nvinfer1
::
DimsCHW
(
3
,
2
,
2
));
validator
.
DeclOutputVar
(
"split_out1"
,
nvinfer1
::
DimsCHW
(
2
,
2
,
2
));
validator
.
DeclOutputVar
(
"split_out2"
,
nvinfer1
::
DimsCHW
(
1
,
2
,
2
));
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"split"
);
desc
.
SetInput
(
"X"
,
{
"split_input"
});
desc
.
SetOutput
(
"Out"
,
{
"split_out1"
,
"split_out2"
});
int
num
=
0
;
int
axis
=
1
;
std
::
vector
<
int
>
output_lengths
=
{
2
,
1
};
desc
.
SetAttr
(
"axis"
,
axis
);
desc
.
SetAttr
(
"num"
,
num
);
desc
.
SetAttr
(
"sections"
,
output_lengths
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
split
);
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
b5670d4c
...
...
@@ -255,6 +255,12 @@ void TensorRTEngine::freshDeviceId() {
cudaSetDevice
(
device_
);
}
nvinfer1
::
IPluginLayer
*
TensorRTEngine
::
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
nbInputs
,
PluginTensorRT
*
plugin
)
{
owned_plugin_
.
emplace_back
(
plugin
);
return
infer_network_
.
get
()
->
addPluginExt
(
inputs
,
nbInputs
,
*
plugin
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
b5670d4c
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
...
...
@@ -125,6 +126,8 @@ class TensorRTEngine : public EngineBase {
void
SetRuntimeBatch
(
size_t
batch_size
);
int
GetRuntimeBatch
();
int
GetDevice
()
{
return
device_
;
}
nvinfer1
::
IPluginLayer
*
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
nbInputs
,
PluginTensorRT
*
);
// A pointer to CPU memory is needed of the TRT weight.
// Before TRT runs, fluid loads weight into GPU storage.
...
...
@@ -164,8 +167,10 @@ class TensorRTEngine : public EngineBase {
std
::
unordered_map
<
std
::
string
/*name*/
,
size_t
/*max size*/
>
buffer_sizes_
;
std
::
unordered_map
<
std
::
string
/*name*/
,
nvinfer1
::
ITensor
*
/*ITensor*/
>
itensor_map_
;
// The specific GPU id that the TensorRTEngine bounded to.
int
device_
;
std
::
vector
<
std
::
unique_ptr
<
PluginTensorRT
>>
owned_plugin_
;
// TensorRT related internal members
template
<
typename
T
>
...
...
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
0 → 100644
浏览文件 @
b5670d4c
nv_library
(
tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu DEPS enforce
)
paddle/fluid/inference/tensorrt/plugin/serialize.h
0 → 100644
浏览文件 @
b5670d4c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <cstring>
#include <type_traits>
#include <vector>
template
<
typename
T
>
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
);
template
<
typename
T
>
inline
void
DeserializeValue
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
);
namespace
{
template
<
typename
T
,
class
Enable
=
void
>
struct
Serializer
{};
template
<
typename
T
>
struct
Serializer
<
T
,
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
||
std
::
is_enum
<
T
>::
value
||
std
::
is_pod
<
T
>::
value
>::
type
>
{
static
size_t
SerializedSize
(
T
const
&
value
)
{
return
sizeof
(
T
);
}
static
void
Serialize
(
void
**
buffer
,
T
const
&
value
)
{
std
::
memcpy
(
*
buffer
,
&
value
,
sizeof
(
T
));
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
sizeof
(
T
);
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
)
{
assert
(
*
buffer_size
>=
sizeof
(
T
));
std
::
memcpy
(
value
,
*
buffer
,
sizeof
(
T
));
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
sizeof
(
T
);
*
buffer_size
-=
sizeof
(
T
);
}
};
template
<
>
struct
Serializer
<
const
char
*>
{
static
size_t
SerializedSize
(
const
char
*
value
)
{
return
strlen
(
value
)
+
1
;
}
static
void
Serialize
(
void
**
buffer
,
const
char
*
value
)
{
std
::
strcpy
(
static_cast
<
char
*>
(
*
buffer
),
value
);
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
strlen
(
value
)
+
1
;
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
const
char
**
value
)
{
*
value
=
static_cast
<
char
const
*>
(
*
buffer
);
size_t
data_size
=
strnlen
(
*
value
,
*
buffer_size
)
+
1
;
assert
(
*
buffer_size
>=
data_size
);
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
data_size
;
*
buffer_size
-=
data_size
;
}
};
template
<
typename
T
>
struct
Serializer
<
std
::
vector
<
T
>
,
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
||
std
::
is_enum
<
T
>::
value
||
std
::
is_pod
<
T
>::
value
>::
type
>
{
static
size_t
SerializedSize
(
std
::
vector
<
T
>
const
&
value
)
{
return
sizeof
(
value
.
size
())
+
value
.
size
()
*
sizeof
(
T
);
}
static
void
Serialize
(
void
**
buffer
,
std
::
vector
<
T
>
const
&
value
)
{
SerializeValue
(
buffer
,
value
.
size
());
size_t
nbyte
=
value
.
size
()
*
sizeof
(
T
);
std
::
memcpy
(
*
buffer
,
value
.
data
(),
nbyte
);
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
nbyte
;
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
std
::
vector
<
T
>*
value
)
{
size_t
size
;
DeserializeValue
(
buffer
,
buffer_size
,
&
size
);
value
->
resize
(
size
);
size_t
nbyte
=
value
->
size
()
*
sizeof
(
T
);
assert
(
*
buffer_size
>=
nbyte
);
std
::
memcpy
(
value
->
data
(),
*
buffer
,
nbyte
);
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
nbyte
;
*
buffer_size
-=
nbyte
;
}
};
}
// namespace
template
<
typename
T
>
inline
size_t
SerializedSize
(
T
const
&
value
)
{
return
Serializer
<
T
>::
SerializedSize
(
value
);
}
template
<
typename
T
>
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
)
{
return
Serializer
<
T
>::
Serialize
(
buffer
,
value
);
}
template
<
typename
T
>
inline
void
DeserializeValue
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
)
{
return
Serializer
<
T
>::
Deserialize
(
buffer
,
buffer_size
,
value
);
}
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
0 → 100644
浏览文件 @
b5670d4c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <cassert>
#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
)
{
assert
(
nbInputs
==
1
);
assert
(
index
<
this
->
getNbOutputs
());
nvinfer1
::
Dims
const
&
input_dims
=
inputDims
[
0
];
nvinfer1
::
Dims
output_dims
=
input_dims
;
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
return
output_dims
;
}
int
SplitPlugin
::
initialize
()
{
std
::
vector
<
int
>
segment_offsets
(
1
,
0
);
for
(
int
i
=
0
;
i
<
this
->
getNbOutputs
();
++
i
)
{
segment_offsets
.
push_back
(
segment_offsets
.
back
()
+
output_length_
[
i
]);
}
segment_offsets_
=
segment_offsets
;
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
nx_
=
1
;
for
(
int
i
=
dims
.
nbDims
-
1
;
i
>
axis_
;
--
i
)
{
nx_
*=
dims
.
d
[
i
];
}
ny_
=
dims
.
d
[
axis_
];
nz_
=
1
;
for
(
int
i
=
axis_
-
1
;
i
>=
0
;
--
i
)
{
nz_
*=
dims
.
d
[
i
];
}
return
0
;
}
int
SplitPlugin
::
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
{
auto
const
&
input_dims
=
this
->
getInputDims
(
0
);
int
input_size
=
0
;
float
const
*
idata
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
**
odatas
=
reinterpret_cast
<
float
**>
(
outputs
);
// kernel impl here.
int
inputBatchOffset
=
nx_
*
ny_
*
nz_
;
for
(
size_t
i
=
0
;
i
<
this
->
getNbOutputs
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
batchSize
;
j
++
)
{
cudaMemcpyAsync
(
odatas
[
i
]
+
j
*
(
segment_offsets_
[
i
+
1
]
-
segment_offsets_
[
i
])
*
nx_
*
sizeof
(
float
),
inputs
[
0
]
+
(
inputBatchOffset
*
j
+
segment_offsets_
[
i
]
*
nx_
)
*
sizeof
(
float
),
(
segment_offsets_
[
i
+
1
]
-
segment_offsets_
[
i
])
*
nx_
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
,
stream
);
}
}
return
cudaGetLastError
()
!=
cudaSuccess
;
}
}
// tensorrt
}
// inference
}
// paddle
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
0 → 100644
浏览文件 @
b5670d4c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
SplitPlugin
:
public
PluginTensorRT
{
int
axis_
;
std
::
vector
<
int
>
output_length_
;
int
nx_
,
ny_
,
nz_
;
std
::
vector
<
int
>
segment_offsets_
;
protected:
virtual
size_t
getSerializationSize
()
override
{
return
SerializedSize
(
axis_
)
+
SerializedSize
(
output_length_
)
+
getBaseSerializationSize
();
}
// TRT will call this func when we need to serialize the configuration of
// tensorrt.
// It should not be called by users.
virtual
void
serialize
(
void
*
buffer
)
override
{
serializeBase
(
buffer
);
SerializeValue
(
&
buffer
,
axis_
);
SerializeValue
(
&
buffer
,
output_length_
);
}
public:
SplitPlugin
(
int
axis
,
std
::
vector
<
int
>
const
&
output_lengths
)
:
axis_
(
axis
),
output_length_
(
output_lengths
)
{
assert
(
axis
<=
nvinfer1
::
Dims
::
MAX_DIMS
);
}
// It was used for tensorrt deserialization.
// It should not be called by users.
SplitPlugin
(
void
const
*
serialData
,
size_t
serialLength
)
{
deserializeBase
(
serialData
,
serialLength
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
axis_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
output_length_
);
}
SplitPlugin
*
clone
()
const
override
{
return
new
SplitPlugin
(
axis_
,
output_length_
);
}
virtual
const
char
*
getPluginType
()
const
override
{
return
"split"
;
}
virtual
int
getNbOutputs
()
const
override
{
return
output_length_
.
size
();
}
virtual
nvinfer1
::
Dims
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputs
,
int
nbInputDims
)
override
;
virtual
int
initialize
()
override
;
virtual
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
override
;
};
}
// tensorrt
}
// inference
}
// paddle
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
0 → 100644
浏览文件 @
b5670d4c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
PluginTensorRT
::
serializeBase
(
void
*&
buffer
)
{
SerializeValue
(
&
buffer
,
input_dims_
);
SerializeValue
(
&
buffer
,
max_batch_size_
);
SerializeValue
(
&
buffer
,
data_type_
);
SerializeValue
(
&
buffer
,
data_format_
);
}
void
PluginTensorRT
::
deserializeBase
(
void
const
*&
serialData
,
size_t
&
serialLength
)
{
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
input_dims_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
max_batch_size_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
data_type_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
data_format_
);
}
size_t
PluginTensorRT
::
getBaseSerializationSize
()
{
return
(
SerializedSize
(
input_dims_
)
+
SerializedSize
(
max_batch_size_
)
+
SerializedSize
(
data_type_
)
+
SerializedSize
(
data_format_
));
}
bool
PluginTensorRT
::
supportsFormat
(
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
)
const
{
return
((
type
==
nvinfer1
::
DataType
::
kFLOAT
)
&&
(
format
==
nvinfer1
::
PluginFormat
::
kNCHW
));
}
void
PluginTensorRT
::
configureWithFormat
(
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
,
const
nvinfer1
::
Dims
*
outputDims
,
int
nbOutputs
,
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
,
int
maxBatchSize
)
{
data_type_
=
type
;
data_format_
=
format
;
input_dims_
.
assign
(
inputDims
,
inputDims
+
nbInputs
);
max_batch_size_
=
maxBatchSize
;
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
0 → 100644
浏览文件 @
b5670d4c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <cstring>
#include <iostream>
#include <unordered_map>
#include <vector>
#include "NvInfer.h"
#include "paddle/fluid/inference/tensorrt/plugin/serialize.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
PluginTensorRT
:
public
nvinfer1
::
IPluginExt
{
public:
PluginTensorRT
()
{}
PluginTensorRT
(
const
void
*
serialized_data
,
size_t
length
)
{}
nvinfer1
::
Dims
const
&
getInputDims
(
int
index
)
const
{
return
input_dims_
.
at
(
index
);
}
size_t
getMaxBatchSize
()
const
{
return
max_batch_size_
;
}
nvinfer1
::
DataType
getDataType
()
const
{
return
data_type_
;
}
nvinfer1
::
PluginFormat
getDataFormat
()
const
{
return
data_format_
;
}
virtual
const
char
*
getPluginVersion
()
const
{
return
"1"
;
}
size_t
getWorkspaceSize
(
int
)
const
override
{
return
0
;
}
void
terminate
()
override
{}
virtual
~
PluginTensorRT
()
{}
// Check format support. The default is FLOAT32 and NCHW.
bool
supportsFormat
(
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
)
const
override
;
void
configureWithFormat
(
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
,
const
nvinfer1
::
Dims
*
outputDims
,
int
nbOutputs
,
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
,
int
maxBatchSize
)
override
;
// *NOTE* The following functions need to be overrided in the subclass.
virtual
nvinfer1
::
IPluginExt
*
clone
()
const
=
0
;
virtual
const
char
*
getPluginType
()
const
=
0
;
// Initialize the layer for execution. This is called when the engine is
// created.
int
initialize
()
override
{
return
0
;
}
// Serialize the layer config to buffer.
virtual
void
serialize
(
void
*
buffer
)
=
0
;
virtual
size_t
getSerializationSize
()
=
0
;
virtual
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
=
0
;
protected:
// Deserialize input_dims, max_batch_size, data_type, data_format
void
deserializeBase
(
void
const
*&
serialData
,
size_t
&
serialLength
);
size_t
getBaseSerializationSize
();
// Serialize input_dims, max_batch_size, data_type, data_format
void
serializeBase
(
void
*&
buffer
);
std
::
vector
<
nvinfer1
::
Dims
>
input_dims_
;
size_t
max_batch_size_
;
nvinfer1
::
DataType
data_type_
;
nvinfer1
::
PluginFormat
data_format_
;
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
b5670d4c
...
...
@@ -108,8 +108,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
if
(
NOT EXISTS
${
TRT_MODEL_INSTALL_DIR
}
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
endif
()
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
ARGS --
dirname
=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
ARGS --
infer_model
=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
endif
()
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -178,7 +178,8 @@ TEST(Analyzer_dam, profile) {
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
...
...
@@ -216,7 +217,9 @@ TEST(Analyzer_dam, compare) {
SetInput
(
&
input_slots_all
);
if
(
FLAGS_use_analysis
)
{
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -133,7 +133,8 @@ TEST(Analyzer_LAC, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
...
...
@@ -175,7 +176,8 @@ TEST(Analyzer_LAC, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -121,7 +121,8 @@ TEST(Analyzer_Chinese_ner, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
...
...
@@ -160,7 +161,8 @@ TEST(Analyzer_Chinese_ner, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -45,7 +45,8 @@ void profile(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
TEST
(
Analyzer_resnet50
,
profile
)
{
profile
();
}
...
...
@@ -74,7 +75,8 @@ void compare(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
TEST
(
Analyzer_resnet50
,
compare
)
{
compare
();
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -233,8 +233,8 @@ TEST(Analyzer_rnn1, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
LOG
(
INFO
)
<<
"to test prediction"
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
// Check the fuse status
...
...
@@ -261,7 +261,8 @@ TEST(Analyzer_rnn1, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Test Multi-Thread.
...
...
@@ -272,7 +273,8 @@ TEST(Analyzer_rnn1, multi_thread) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -132,7 +132,8 @@ TEST(Analyzer_rnn2, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
...
...
@@ -153,7 +154,8 @@ TEST(Analyzer_rnn2, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -161,7 +161,8 @@ TEST(Analyzer_seq_conv1, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
...
...
@@ -199,7 +200,8 @@ TEST(Analyzer_seq_conv1, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -74,7 +74,8 @@ TEST(Analyzer_Text_Classification, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
)
{
// Get output
...
...
@@ -101,7 +102,8 @@ TEST(Analyzer_Text_Classification, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
TEST
(
Analyzer_Text_Classification
,
compare_against_embedding_fc_lstm_fused
)
{
...
...
@@ -112,7 +114,8 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
b5670d4c
...
...
@@ -94,7 +94,8 @@ void profile(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
const
float
ocr_result_data
[]
=
{
...
...
@@ -136,7 +137,8 @@ void compare(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
TEST
(
Analyzer_vis
,
compare
)
{
compare
();
}
...
...
paddle/fluid/inference/tests/api/config_printer.h
0 → 100644
浏览文件 @
b5670d4c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <ostream>
#include <string>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
namespace
inference
{
thread_local
int
num_spaces
=
0
;
static
std
::
string
GenSpaces
(
int
num_spaces
)
{
std
::
ostringstream
os
;
for
(
int
i
=
0
;
i
<
num_spaces
;
++
i
)
{
os
<<
" "
;
}
return
os
.
str
();
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
PaddlePredictor
::
Config
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"PaddlePredictor::Config {
\n
"
;
num_spaces
++
;
os
<<
GenSpaces
(
num_spaces
)
<<
"model_dir: "
<<
config
.
model_dir
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
NativeConfig
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"NativeConfig {
\n
"
;
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
config
);
os
<<
GenSpaces
(
num_spaces
)
<<
"use_gpu: "
<<
config
.
use_gpu
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"device: "
<<
config
.
device
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"fraction_of_gpu_memory: "
<<
config
.
fraction_of_gpu_memory
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
param_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"specify_input_name: "
<<
config
.
specify_input_name
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
contrib
::
AnalysisConfig
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"contrib::AnalysisConfig {
\n
"
;
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
NativeConfig
*>
(
&
config
);
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
enable_ir_optim
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
use_tensorrt
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
()
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
b5670d4c
...
...
@@ -19,13 +19,16 @@
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -38,10 +41,18 @@ DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool
(
use_analysis
,
true
,
"Running the inference program in analysis mode."
);
DECLARE_bool
(
profile
);
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
void
PrintConfig
(
const
PaddlePredictor
::
Config
*
config
,
bool
use_analysis
)
{
if
(
use_analysis
)
{
LOG
(
INFO
)
<<
*
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
);
return
;
}
LOG
(
INFO
)
<<
*
config
;
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
...
...
@@ -77,12 +88,13 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
}
std
::
unique_ptr
<
PaddlePredictor
>
CreateTestPredictor
(
const
AnalysisConfig
&
config
,
bool
use_analysis
=
true
)
{
const
PaddlePredictor
::
Config
*
config
,
bool
use_analysis
=
true
)
{
if
(
use_analysis
)
{
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
config
);
}
else
{
return
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
*
(
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
)));
}
return
CreatePaddlePredictor
<
NativeConfig
>
(
*
(
reinterpret_cast
<
const
NativeConfig
*>
(
config
)));
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
...
...
@@ -111,11 +123,23 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
}
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
const
std
::
string
&
dirname
)
{
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
std
::
string
model_filename
=
"model"
,
std
::
string
params_filename
=
"params"
)
{
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
dirname
,
true
,
"model"
,
"params"
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
dirname
,
is_combined
,
model_filename
,
params_filename
);
std
::
ostringstream
os
;
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
os
<<
"feed target "
<<
i
<<
": {"
<<
feed_target_shapes
[
i
][
0
];
for
(
size_t
j
=
1
;
j
<
feed_target_shapes
[
i
].
size
();
++
j
)
{
os
<<
", "
<<
feed_target_shapes
[
i
][
j
];
}
os
<<
"}
\n
"
;
}
LOG
(
INFO
)
<<
os
.
str
();
int
dim1
=
feed_target_shapes
[
0
][
1
];
int
dim2
=
feed_target_shapes
[
0
][
2
];
int
dim3
=
feed_target_shapes
[
0
][
3
];
...
...
@@ -139,25 +163,43 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
}
void
TestOneThreadPrediction
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
outputs
);
// warmup run
LOG
(
INFO
)
<<
"Warm up run..."
;
{
Timer
warmup_timer
;
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
1
,
0
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
#endif
}
LOG
(
INFO
)
<<
"Run "
<<
num_times
<<
" times..."
;
{
Timer
run_timer
;
run_timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
outputs
,
batch_size
);
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
run_timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
void
TestMultiThreadPrediction
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
...
...
@@ -200,12 +242,11 @@ void TestMultiThreadPrediction(
}
}
void
TestPrediction
(
const
AnalysisConfig
&
config
,
void
TestPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
<<
", use_mkldnn: "
<<
config
.
use_mkldnn
();
PrintConfig
(
config
,
use_analysis
);
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
}
else
{
...
...
@@ -215,9 +256,9 @@ void TestPrediction(const AnalysisConfig &config,
}
void
CompareNativeAndAnalysis
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
LOG
(
INFO
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
(
);
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
b5670d4c
/
/
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
//
Licensed under the Apache License, Version 2.0 (the "License");
//
you may not use this file except in compliance with the License.
//
You may obtain a copy of the License at
//
//
http://www.apache.org/licenses/LICENSE-2.0
//
//
Unless required by applicable law or agreed to in writing, software
//
distributed under the License is distributed on an "AS IS" BASIS,
//
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
See the License for the specific language governing permissions and
// limitations under the License.
/
*
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
using
paddle
::
contrib
::
AnalysisConfig
;
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
NativeConfig
GetConfigNative
()
{
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
// LOG(INFO) << "dirname " << config.model_dir;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
void
PrepareTRTConfig
(
AnalysisConfig
*
config
)
{
config
->
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
config
->
fraction_of_gpu_memory
=
0.15
;
config
->
EnableTensorRtEngine
(
1
<<
10
,
5
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
TurnOnDebug
();
namespace
inference
{
DEFINE_bool
(
use_tensorrt
,
true
,
"Test the performance of TensorRT engine."
);
DEFINE_string
(
prog_filename
,
""
,
"Name of model file."
);
DEFINE_string
(
param_filename
,
""
,
"Name of parameters file."
);
template
<
typename
ConfigType
>
void
SetConfig
(
ConfigType
*
config
,
std
::
string
model_dir
,
bool
use_gpu
,
bool
use_tensorrt
=
false
,
int
batch_size
=
-
1
)
{
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
}
else
{
config
->
model_dir
=
model_dir
;
}
if
(
use_gpu
)
{
config
->
use_gpu
=
true
;
config
->
device
=
0
;
config
->
fraction_of_gpu_memory
=
0.15
;
}
}
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
tensors
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
tensors
->
size
(),
1UL
);
auto
&
tensor
=
tensors
->
front
();
int
height
=
224
;
int
width
=
224
;
float
*
data
=
new
float
[
batch_size
*
3
*
height
*
width
];
memset
(
data
,
0
,
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
data
[
0
]
=
1.0
f
;
// Prepare inputs
tensor
.
name
=
"input_0"
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
data
),
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
template
<
>
void
SetConfig
<
contrib
::
AnalysisConfig
>
(
contrib
::
AnalysisConfig
*
config
,
std
::
string
model_dir
,
bool
use_gpu
,
bool
use_tensorrt
,
int
batch_size
)
{
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
}
else
{
config
->
model_dir
=
model_dir
;
}
if
(
use_gpu
)
{
config
->
use_gpu
=
true
;
config
->
device
=
0
;
config
->
fraction_of_gpu_memory
=
0.15
;
if
(
use_tensorrt
)
{
config
->
EnableTensorRtEngine
(
1
<<
10
,
batch_size
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
TurnOnDebug
();
}
else
{
config
->
enable_ir_optim
=
true
;
}
}
}
void
CompareTensorRTWithFluid
(
int
batch_size
,
std
::
string
model_dirname
)
{
auto
config0
=
GetConfigNative
();
config0
.
model_dir
=
model_dirname
;
AnalysisConfig
config1
(
true
);
PrepareTRTConfig
(
&
config1
);
config1
.
model_dir
=
model_dirname
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
// Prepare inputs
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
// Prepare outputs
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
void
profile
(
std
::
string
model_dir
,
bool
use_analysis
,
bool
use_tensorrt
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
true
,
FLAGS_prog_filename
,
FLAGS_param_filename
);
}
else
{
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
}
}
TEST
(
trt_models_test
,
mobilenet
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"mobilenet"
);
}
TEST
(
trt_models_test
,
resnet50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"resnet50"
);
}
TEST
(
trt_models_test
,
resnext50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"resnext50"
);
std
::
vector
<
PaddleTensor
>
outputs
;
if
(
use_analysis
||
use_tensorrt
)
{
contrib
::
AnalysisConfig
config
(
true
);
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
TestPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
config
),
inputs_all
,
&
outputs
,
FLAGS_num_threads
,
true
);
}
else
{
NativeConfig
config
;
SetConfig
<
NativeConfig
>
(
&
config
,
model_dir
,
true
,
false
);
TestPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
config
),
inputs_all
,
&
outputs
,
FLAGS_num_threads
,
false
);
}
}
TEST
(
trt_models_test
,
raw_gpu
)
{
std
::
string
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
auto
config0
=
GetConfigNative
();
config0
.
model_dir
=
model_dir
;
int
batch_size
=
2
;
AnalysisConfig
config1
(
true
);
config1
.
fraction_of_gpu_memory
=
0.1
;
config1
.
enable_ir_optim
=
true
;
config1
.
model_dir
=
model_dir
;
void
compare
(
std
::
string
model_dir
,
bool
use_tensorrt
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
true
,
FLAGS_prog_filename
,
FLAGS_param_filename
);
}
else
{
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
}
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
std
::
vector
<
PaddleTensor
>
native_outputs
;
NativeConfig
native_config
;
SetConfig
<
NativeConfig
>
(
&
native_config
,
model_dir
,
true
,
false
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
native_config
),
inputs_all
,
&
native_outputs
,
false
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
contrib
::
AnalysisConfig
analysis_config
(
true
);
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
analysis_config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
analysis_config
),
inputs_all
,
&
analysis_outputs
,
true
);
CompareResult
(
native_outputs
,
analysis_outputs
);
}
// Prepare inputs
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
TEST
(
TensorRT_mobilenet
,
compare
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/mobilenet"
;
compare
(
model_dir
,
/* use_tensorrt */
true
);
}
// Prepare outputs
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
TEST
(
TensorRT_resnet50
,
compare
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnet50"
;
compare
(
model_dir
,
/* use_tensorrt */
true
);
}
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
TEST
(
TensorRT_resnext50
,
compare
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnext50"
;
compare
(
model_dir
,
/* use_tensorrt */
true
);
}
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
TEST
(
TensorRT_resnext50
,
profile
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnext50"
;
profile
(
model_dir
,
/* use_analysis */
true
,
FLAGS_use_tensorrt
);
}
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
TEST
(
TensorRT_mobilenet
,
analysis
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
compare
(
model_dir
,
/* use_tensorrt */
false
);
}
}
// namespace inference
}
// namespace paddle
USE_PASS
(
tensorrt_subgraph_pass
);
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
b5670d4c
...
...
@@ -50,12 +50,18 @@ static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache";
static
constexpr
size_t
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
=
static_cast
<
size_t
>
(
1024
)
*
1024
*
1024
;
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
CUDNN_CONVOLUTION_BWD_FILTER
_ALGO_COUNT
;
#if CUDNN_VERSION_MIN(6, 0, 5)
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
CUDNN_CONVOLUTION_FWD
_ALGO_COUNT
;
static
constexpr
size_t
kNUM_CUDNN_BWD_FILTER_ALGS
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT
;
static
constexpr
size_t
kNUM_CUDNN_BWD_DATA_ALGS
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT
;
#else
// cuDNN v5 has no CUDNN_CONVOLUTION_FWD_ALGO_COUNT etc.
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
7
;
static
constexpr
size_t
kNUM_CUDNN_BWD_FILTER_ALGS
=
4
;
static
constexpr
size_t
kNUM_CUDNN_BWD_DATA_ALGS
=
5
;
#endif
template
<
typename
T
>
class
CUDNNConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
paddle/fluid/operators/math/softmax.cc
浏览文件 @
b5670d4c
...
...
@@ -19,8 +19,10 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
false
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
double
>;
...
...
paddle/fluid/operators/math/softmax.cu
浏览文件 @
b5670d4c
...
...
@@ -98,9 +98,14 @@ template class SoftmaxGradCUDNNFunctor<float>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxGradCUDNNFunctor
<
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
,
true
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/math/softmax.h
浏览文件 @
b5670d4c
...
...
@@ -19,7 +19,7 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
class
SoftmaxFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
b5670d4c
...
...
@@ -32,10 +32,10 @@ struct ValueClip {
}
};
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxFunctor
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
...
...
@@ -65,6 +65,39 @@ void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
.
broadcast
(
one_by_class
));
}
template
<
typename
DeviceContext
,
typename
T
>
class
SoftmaxFunctor
<
DeviceContext
,
T
,
true
>
{
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
const
int
kBatchDim
=
0
;
const
int
kClassDim
=
1
;
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
auto
shifted_logits
=
(
logits
-
logits
.
maximum
(
along_class
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
softmax
.
device
(
*
context
.
eigen_device
())
=
shifted_logits
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
sum
(
along_class
)
.
inverse
()
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
}
};
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxGradFunctor
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
y
,
...
...
paddle/fluid/operators/softmax_op.h
浏览文件 @
b5670d4c
...
...
@@ -35,8 +35,13 @@ class SoftmaxKernel : public framework::OpKernel<T> {
Tensor
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Tensor
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
>
()(
#ifdef ON_INFER
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
true
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
#else
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
#endif
}
};
...
...
paddle/fluid/operators/softmax_with_cross_entropy_op.h
浏览文件 @
b5670d4c
...
...
@@ -42,8 +42,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
dev_ctx
,
logits
,
softmax
);
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
false
>
()(
dev_ctx
,
logits
,
softmax
);
math
::
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
dev_ctx
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"soft_label"
),
context
.
Attr
<
int
>
(
"ignore_index"
));
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
b5670d4c
...
...
@@ -651,9 +651,9 @@ All parameter, weight, gradient are variables in Paddle.
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
})
.
def
(
"set_int"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
}
);
.
def
(
"set_int"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"type"
,
&
ir
::
Pass
::
Type
);
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
m
,
"PassBuilder"
);
...
...
@@ -792,6 +792,7 @@ All parameter, weight, gradient are variables in Paddle.
"reduce_strategy"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
reduce_
;
},
[](
BuildStrategy
&
self
,
BuildStrategy
::
ReduceStrategy
strategy
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
reduce_
=
strategy
;
},
R"DOC(The type is STR, there are two reduce strategies in ParallelExecutor,
...
...
@@ -805,6 +806,7 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
BuildStrategy
&
self
)
{
return
self
.
gradient_scale_
;
},
[](
BuildStrategy
&
self
,
BuildStrategy
::
GradientScaleStrategy
strategy
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
gradient_scale_
=
strategy
;
},
R"DOC(The type is STR, there are three ways of defining :math:`loss@grad` in
...
...
@@ -816,6 +818,7 @@ All parameter, weight, gradient are variables in Paddle.
"debug_graphviz_path"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
debug_graphviz_path_
;
},
[](
BuildStrategy
&
self
,
const
std
::
string
&
path
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
debug_graphviz_path_
=
path
;
},
R"DOC(The type is STR, debug_graphviz_path indicate the path that
...
...
@@ -825,6 +828,7 @@ All parameter, weight, gradient are variables in Paddle.
"enable_data_balance"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
enable_data_balance_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
enable_data_balance_
=
b
;
})
// FIXME(chengudo): enable_data_balance seems not important
.
def_property
(
...
...
@@ -833,6 +837,7 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
enable_sequential_execution_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
enable_sequential_execution_
=
b
;
},
R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.)DOC"
)
...
...
@@ -842,6 +847,7 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
remove_unnecessary_lock_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
remove_unnecessary_lock_
=
b
;
},
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default False.)DOC"
)
...
...
@@ -851,15 +857,19 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
fuse_elewise_add_act_ops_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
fuse_elewise_add_act_ops_
=
b
;
},
R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether
to fuse elementwise_add_op and activation_op,
it may make the execution faster. Default False)DOC"
)
.
def
(
"_
create_passes_from_strategy
"
,
.
def
(
"_
finalize_strategy_and_create_passes
"
,
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
return
self
.
CreatePassesFromStrategy
();
});
return
self
.
CreatePassesFromStrategy
(
true
);
},
R"DOC(Allow user to customized passes. Normally model-specific
optimization passes should be defined in this way. BuildStrategy
cannot be updated after being finalized.)DOC"
);
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
b5670d4c
...
...
@@ -156,6 +156,8 @@ function cmake_gen() {
-DWITH_INFERENCE_API_TEST=
${
WITH_INFERENCE_API_TEST
:-
ON
}
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DANAKIN_BUILD_FAT_BIN=
${
ANAKIN_BUILD_FAT_BIN
:OFF
}
-DANAKIN_BUILD_CROSS_PLANTFORM=
${
ANAKIN_BUILD_CROSS_PLANTFORM
:ON
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
========================================
...
...
@@ -188,6 +190,8 @@ EOF
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DANAKIN_BUILD_FAT_BIN
=
${
ANAKIN_BUILD_FAT_BIN
:OFF
}
\
-DANAKIN_BUILD_CROSS_PLANTFORM
=
${
ANAKIN_BUILD_CROSS_PLANTFORM
:ON
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
...
...
@@ -777,6 +781,17 @@ function main() {
test_fluid_lib
assert_api_spec_approvals
;;
assert_api
)
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
test_inference
)
gen_capi_package
gen_fluid_lib
test_fluid_lib
;;
assert_api_approvals
)
assert_api_spec_approvals
;;
maccheck
)
cmake_gen
${
PYTHON_ABI
:-
""
}
build_mac
...
...
python/paddle/fluid/__init__.py
浏览文件 @
b5670d4c
...
...
@@ -117,10 +117,10 @@ def __bootstrap__():
read_env_flags
=
[
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'
initial_cpu_memory_in_mb
'
,
'init
_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads
'
,
'
dist_threadpool_size'
,
'cpu_deterministic'
,
'eager_delete_tensor_gb
'
,
'reader_queue_speed_test_mode'
'eager_delete_scope'
,
'use_mkldnn'
,
'
use_ngraph
'
,
'init
ial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory
'
,
'
paddle_num_threads'
,
'dist_threadpool_size'
,
'cpu_deterministic
'
,
'
eager_delete_tensor_gb'
,
'
reader_queue_speed_test_mode'
]
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
b5670d4c
...
...
@@ -6822,7 +6822,7 @@ def prelu(x, mode, param_attr=None, name=None):
alpha_shape
=
x
.
shape
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
alpha
=
helper
.
create_parameter
(
attr
=
param_attr
,
attr
=
helper
.
param_attr
,
shape
=
alpha_shape
,
dtype
=
'float32'
,
is_bias
=
False
,
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
b5670d4c
...
...
@@ -105,7 +105,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
if
args
.
batch_merge_repeat
>
1
:
pass_builder
=
build_stra
.
_
create_passes_from_strategy
()
pass_builder
=
build_stra
.
_
finalize_strategy_and_create_passes
()
mypass
=
pass_builder
.
insert_pass
(
len
(
pass_builder
.
all_passes
())
-
2
,
"multi_batch_merge_pass"
)
mypass
.
set_int
(
"num_repeats"
,
args
.
batch_merge_repeat
)
...
...
python/paddle/fluid/tests/unittests/test_pass_builder.py
浏览文件 @
b5670d4c
...
...
@@ -94,7 +94,12 @@ class TestPassBuilder(unittest.TestCase):
def
test_parallel_testing_with_new_strategy
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
pass_builder
=
build_strategy
.
_create_passes_from_strategy
()
self
.
assertFalse
(
build_strategy
.
fuse_elewise_add_act_ops
)
build_strategy
.
fuse_elewise_add_act_ops
=
True
pass_builder
=
build_strategy
.
_finalize_strategy_and_create_passes
()
self
.
assertTrue
(
"fuse_elewise_add_act_pass"
in
[
p
.
type
()
for
p
in
pass_builder
.
all_passes
()])
origin_len
=
len
(
pass_builder
.
all_passes
())
viz_pass
=
pass_builder
.
append_pass
(
"graph_viz_pass"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录