Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
beb12455
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
beb12455
编写于
5月 03, 2018
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add relu converter and unit-test
上级
9945265f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
197 addition
and
26 deletion
+197
-26
paddle/fluid/inference/tensorrt/CMakeLists.txt
paddle/fluid/inference/tensorrt/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+3
-2
paddle/fluid/inference/tensorrt/convert/activation_op.cc
paddle/fluid/inference/tensorrt/convert/activation_op.cc
+40
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+21
-19
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+94
-0
paddle/fluid/inference/tensorrt/convert/test_op_converter.cc
paddle/fluid/inference/tensorrt/convert/test_op_converter.cc
+2
-2
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+28
-2
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+8
-0
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+0
-1
未找到文件。
paddle/fluid/inference/tensorrt/CMakeLists.txt
浏览文件 @
beb12455
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda
)
set
(
ENGINE_FILE
${
CMAKE_CURRENT_SOURCE_DIR
}
/engine.cc
)
add_subdirectory
(
convert
)
add_subdirectory
(
convert
)
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
beb12455
file
(
GLOB TENSORRT_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"*_op.cc"
)
nv_test
(
test_tensorrt_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_tensorrt_op_converter SRCS test_op_converter.cc
${
TENSORRT_OPS
}
DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_tensorrt_activation_op SRCS test_activation_op.cc
${
ENGINE_FILE
}
activation_op.cc
DEPS
${
FLUID_CORE_MODULES
}
activation_op
)
paddle/fluid/inference/tensorrt/convert/activation_op.cc
0 → 100644
浏览文件 @
beb12455
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
ReluOpConverter
:
public
OpConverter
{
public:
ReluOpConverter
()
{}
void
operator
()(
const
framework
::
OpDesc
&
op
)
override
{
LOG
(
INFO
)
<<
"convert a fluid relu op to tensorrt activation layer whose "
"type is Relu"
;
const
nvinfer1
::
ITensor
*
input_tensor
=
engine_
->
GetITensor
(
op
.
Input
(
"X"
)[
0
]);
nvinfer1
::
IActivationLayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
input_tensor
),
nvinfer1
::
ActivationType
::
kRELU
);
engine_
->
SetITensor
(
op
.
Output
(
"Out"
)[
0
],
layer
->
getOutput
(
0
));
}
};
REGISTER_TRT_OP_CONVERTER
(
relu
,
ReluOpConverter
);
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
beb12455
...
@@ -30,13 +30,14 @@ namespace tensorrt {
...
@@ -30,13 +30,14 @@ namespace tensorrt {
class
OpConverter
{
class
OpConverter
{
public:
public:
OpConverter
()
{}
OpConverter
()
{}
virtual
void
operator
()(
const
framework
::
OpDesc
&
op
)
{}
virtual
void
operator
()(
const
framework
::
OpDesc
&
op
)
{}
void
Execute
(
const
framework
::
OpDesc
&
op
)
{
void
Execute
(
const
framework
::
OpDesc
&
op
,
TensorRTEngine
*
engine
)
{
std
::
string
type
=
op
.
Type
();
std
::
string
type
=
op
.
Type
();
auto
it
=
converters_
.
find
(
type
);
auto
it
=
converters_
.
find
(
type
);
PADDLE_ENFORCE
(
it
!=
converters_
.
end
(),
"no OpConverter for optype [%s]"
,
PADDLE_ENFORCE
(
it
!=
converters_
.
end
(),
"no OpConverter for optype [%s]"
,
type
);
type
);
it
->
second
->
SetEngine
(
engine
);
(
*
it
->
second
)(
op
);
(
*
it
->
second
)(
op
);
}
}
...
@@ -50,18 +51,31 @@ class OpConverter {
...
@@ -50,18 +51,31 @@ class OpConverter {
converters_
[
key
]
=
new
T
;
converters_
[
key
]
=
new
T
;
}
}
// convert fluid op to tensorrt layer
void
ConvertOp
(
const
framework
::
OpDesc
&
op
,
TensorRTEngine
*
engine
)
{
OpConverter
::
Global
().
Execute
(
op
,
engine
);
}
// convert fluid block to tensorrt network
void
ConvertBlock
(
const
framework
::
BlockDesc
&
block
,
TensorRTEngine
*
engine
)
{
for
(
auto
op
:
block
.
AllOps
())
{
OpConverter
::
Global
().
Execute
(
*
op
,
engine
);
}
}
void
SetEngine
(
TensorRTEngine
*
engine
)
{
engine_
=
engine
;
}
virtual
~
OpConverter
()
{}
virtual
~
OpConverter
()
{}
// TensorRT engine
TensorRTEngine
*
engine_
{
nullptr
};
private:
private:
// registered op converter map, whose key is the fluid op type, and value is
// registered op converter map, whose key is the fluid op type, and value is
// the pointer position of corresponding OpConverter class.
// the pointer position of corresponding OpConverter class.
std
::
unordered_map
<
std
::
string
,
OpConverter
*>
converters_
;
std
::
unordered_map
<
std
::
string
,
OpConverter
*>
converters_
;
// fluid inference scope
// fluid inference scope
framework
::
Scope
*
scope_
;
framework
::
Scope
*
scope_
{
nullptr
};
// tensorrt input/output tensor map, whose key is the fluid variable name,
// and value is the pointer position of tensorrt tensor
std
::
unordered_map
<
std
::
string
,
nvinfer1
::
ITensor
*>
tr_tensors_
;
};
};
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
...
@@ -72,18 +86,6 @@ class OpConverter {
...
@@ -72,18 +86,6 @@ class OpConverter {
}; \
}; \
trt_##op_type__##_converter trt_##op_type__##_converter__;
trt_##op_type__##_converter trt_##op_type__##_converter__;
class
BlockConverter
{
public:
BlockConverter
()
{}
// convert fluid block to tensorrt network
void
ConvertBlock
(
const
framework
::
BlockDesc
&
block
)
{
for
(
auto
op
:
block
.
AllOps
())
{
OpConverter
::
Global
().
Execute
(
*
op
);
}
}
};
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
0 → 100644
浏览文件 @
beb12455
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
USE_OP
(
relu
);
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
compare
(
float
input
,
float
expect
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
// init fluid op and variable
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
auto
out_var
=
scope
.
Var
(
"Out"
);
auto
out_tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
out_tensor
->
Resize
({
1
,
1
});
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"relu"
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
relu_op
=
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
// run fluid op
relu_op
->
Run
(
scope
,
place
);
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
// init tensorrt op
cudaStream_t
stream
;
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
TensorRTEngine
*
engine
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream
);
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
op_desc
,
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
engine
->
SetInputFromCPU
(
"X"
,
&
input
,
1
*
sizeof
(
float
));
// run tensorrt op
engine
->
Execute
(
1
);
float
out2
;
engine
->
GetOutputInCPU
(
"Out"
,
&
out2
,
1
*
sizeof
(
float
));
ASSERT_EQ
(
out1
[
0
],
out2
);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
cudaStreamDestroy
(
stream
);
}
TEST
(
OpConverter
,
ConvertRelu
)
{
compare
(
1
,
1
);
// relu(1) = 1
compare
(
-
5
,
0
);
// relu(-5) = 0
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/convert/test_op_converter.cc
浏览文件 @
beb12455
...
@@ -28,8 +28,8 @@ TEST(BlockConverter, ConvertBlock) {
...
@@ -28,8 +28,8 @@ TEST(BlockConverter, ConvertBlock) {
auto
*
conv2d_op
=
block
->
AppendOp
();
auto
*
conv2d_op
=
block
->
AppendOp
();
conv2d_op
->
SetType
(
"conv2d"
);
conv2d_op
->
SetType
(
"conv2d"
);
Block
Converter
converter
;
Op
Converter
converter
;
converter
.
ConvertBlock
(
*
block
);
converter
.
ConvertBlock
(
*
block
,
nullptr
/*TensorRTEngine*/
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
beb12455
...
@@ -80,8 +80,8 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
...
@@ -80,8 +80,8 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
PADDLE_ENFORCE
(
infer_network_
!=
nullptr
,
"should initnetwork first"
);
PADDLE_ENFORCE
(
infer_network_
!=
nullptr
,
"should initnetwork first"
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dim
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dim
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
AccumDims
(
dim
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
AccumDims
(
dim
);
TensorRTEngine
::
SetITensor
(
name
,
input
);
return
input
;
return
input
;
}
}
...
@@ -99,6 +99,19 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
...
@@ -99,6 +99,19 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
buffer_sizes_
[
name
]
=
0
;
buffer_sizes_
[
name
]
=
0
;
}
}
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
0
,
buffer_sizes_
.
count
(
name
),
"duplicate output name %s"
,
name
);
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
output
->
setName
(
name
.
c_str
());
infer_network_
->
markOutput
(
*
output
);
// output buffers' size can only be decided latter, set zero here to mark this
// and will reset latter.
buffer_sizes_
[
name
]
=
0
;
}
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
void
*
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
)
{
return
buffer
(
name
);
return
buffer
(
name
);
}
}
...
@@ -110,7 +123,6 @@ void TensorRTEngine::GetOutputInCPU(const std::string& name, void* dst,
...
@@ -110,7 +123,6 @@ void TensorRTEngine::GetOutputInCPU(const std::string& name, void* dst,
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buffer
(
name
),
it
->
second
,
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buffer
(
name
),
it
->
second
,
cudaMemcpyDeviceToHost
,
*
stream_
));
cudaMemcpyDeviceToHost
,
*
stream_
));
}
}
...
@@ -126,10 +138,24 @@ void*& TensorRTEngine::buffer(const std::string& name) {
...
@@ -126,10 +138,24 @@ void*& TensorRTEngine::buffer(const std::string& name) {
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
void
*
data
,
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
void
*
data
,
size_t
size
)
{
size_t
size
)
{
void
*
buf
=
buffer
(
name
);
void
*
buf
=
buffer
(
name
);
cudaMemcpyAsync
(
buf
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
buf
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
0
,
cudaMemcpyAsync
(
buf
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
}
}
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
)
{
PADDLE_ENFORCE
(
tensor
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
"duplicate itensor name %s"
,
name
);
itensor_map_
[
name
]
=
tensor
;
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
itensor_map_
.
count
(
name
),
"no itensor %s"
,
name
);
return
itensor_map_
[
name
];
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
beb12455
...
@@ -80,6 +80,8 @@ class TensorRTEngine : public EngineBase {
...
@@ -80,6 +80,8 @@ class TensorRTEngine : public EngineBase {
// name.
// name.
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
);
const
std
::
string
&
name
);
// Set the itensor_map_[name] as the network's output, and set its name.
void
DeclareOutput
(
const
std
::
string
&
name
);
// GPU memory address for an ITensor with specific name. One can operate on
// GPU memory address for an ITensor with specific name. One can operate on
// these memory directly for acceleration, for example, output the converted
// these memory directly for acceleration, for example, output the converted
...
@@ -98,6 +100,10 @@ class TensorRTEngine : public EngineBase {
...
@@ -98,6 +100,10 @@ class TensorRTEngine : public EngineBase {
// LOW EFFICENCY! Get output to CPU, this will trigger a memory copy from GPU
// LOW EFFICENCY! Get output to CPU, this will trigger a memory copy from GPU
// to CPU.
// to CPU.
void
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
);
void
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
);
// Fill an ITensor into map itensor_map_.
void
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
);
// Get an ITensor called name.
nvinfer1
::
ITensor
*
GetITensor
(
const
std
::
string
&
name
);
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
...
@@ -113,6 +119,8 @@ class TensorRTEngine : public EngineBase {
...
@@ -113,6 +119,8 @@ class TensorRTEngine : public EngineBase {
std
::
vector
<
void
*>
buffers_
;
std
::
vector
<
void
*>
buffers_
;
// max data size for the buffers.
// max data size for the buffers.
std
::
unordered_map
<
std
::
string
/*name*/
,
size_t
/*max size*/
>
buffer_sizes_
;
std
::
unordered_map
<
std
::
string
/*name*/
,
size_t
/*max size*/
>
buffer_sizes_
;
std
::
unordered_map
<
std
::
string
/*name*/
,
nvinfer1
::
ITensor
*
/*ITensor*/
>
itensor_map_
;
// TensorRT related internal members
// TensorRT related internal members
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
beb12455
...
@@ -70,7 +70,6 @@ TEST_F(TensorRTEngineTest, add_layer) {
...
@@ -70,7 +70,6 @@ TEST_F(TensorRTEngineTest, add_layer) {
engine_
->
Execute
(
1
);
engine_
->
Execute
(
1
);
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
// void* y_v =
float
y_cpu
;
float
y_cpu
;
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
sizeof
(
float
));
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
sizeof
(
float
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录