Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
89dcb0bd
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
89dcb0bd
编写于
5月 08, 2018
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine EngineIOConverter, and use io_convert in test_trt_activation_op
上级
9a98a572
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
131 addition
and
69 deletion
+131
-69
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+1
-1
paddle/fluid/inference/tensorrt/convert/io_converter.cc
paddle/fluid/inference/tensorrt/convert/io_converter.cc
+29
-13
paddle/fluid/inference/tensorrt/convert/io_converter.h
paddle/fluid/inference/tensorrt/convert/io_converter.h
+34
-19
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+24
-15
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
+43
-20
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+0
-1
未找到文件。
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
89dcb0bd
nv_test
(
test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc
${
ENGINE_FILE
}
activation_op.cc
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc
io_converter.cc
${
ENGINE_FILE
}
activation_op.cc
DEPS
${
FLUID_CORE_MODULES
}
activation_op
)
nv_test
(
test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor
)
paddle/fluid/inference/tensorrt/convert/io_converter.cc
浏览文件 @
89dcb0bd
...
...
@@ -23,26 +23,42 @@ namespace tensorrt {
using
platform
::
is_gpu_place
;
using
platform
::
is_cpu_place
;
class
DefaultI
nputConverter
:
public
EngineInput
Converter
{
class
DefaultI
OConverter
:
public
EngineIO
Converter
{
public:
DefaultI
nput
Converter
()
{}
DefaultI
O
Converter
()
{}
// NOTE out is GPU memory.
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
out
!=
nullptr
);
PADDLE_ENFORCE
_LE
(
in
.
memory_size
(),
max_size
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
in
.
place
();
size_t
size
=
in
.
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyHostToDevice
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyHostToHost
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
cudaMemcpyHostToHost
,
*
stream_
));
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
}
cudaStreamSynchronize
(
*
stream_
);
}
// NOTE in is GPU memory.
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
in
!=
nullptr
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
out
->
place
();
size_t
size
=
out
->
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyDeviceToHost
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyHostToHost
,
*
stream_
));
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
}
...
...
@@ -50,7 +66,7 @@ class DefaultInputConverter : public EngineInputConverter {
}
};
REGISTER_TENSORRT_I
NPUT_CONVERTER
(
default
,
DefaultInput
Converter
);
REGISTER_TENSORRT_I
O_CONVERTER
(
default
,
DefaultIO
Converter
);
}
// namespace tensorrt
}
// namespace inference
...
...
paddle/fluid/inference/tensorrt/convert/io_converter.h
浏览文件 @
89dcb0bd
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/utils/singleton.h"
...
...
@@ -25,43 +26,57 @@ namespace tensorrt {
using
framework
::
LoDTensor
;
/*
* Convert Input from Fluid to an Engine.
* TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in
* most cases just need to copy the data.
* Convert Input from Fluid to TensorRT Engine.
* Convert Output from TensorRT Engine to Fluid.
*
* Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row
* major,
* so in the default case just need to copy the data.
*/
class
EngineI
nput
Converter
{
class
EngineI
O
Converter
{
public:
EngineI
nput
Converter
()
{}
EngineI
O
Converter
()
{}
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
{}
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
{}
void
SetStream
(
cudaStream_t
*
stream
)
{
stream_
=
stream
;
}
static
void
Run
(
const
std
::
string
&
in_op_type
,
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
static
void
ConvertInput
(
const
std
::
string
&
op_type
,
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineI
nput
Converter
>::
Lookup
(
in_
op_type
,
"default"
/* default_type */
);
auto
*
converter
=
Registry
<
EngineI
O
Converter
>::
Lookup
(
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
}
virtual
~
EngineInputConverter
()
{}
static
void
ConvertOutput
(
const
std
::
string
&
op_type
,
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineIOConverter
>::
Lookup
(
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
}
virtual
~
EngineIOConverter
()
{}
protected:
cudaStream_t
*
stream_
{
nullptr
};
};
#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \
struct trt_io_##op_type__##_converter { \
trt_io_##op_type__##_converter() { \
Registry<EngineIOConverter>::Register<Converter__>(#op_type__); \
} \
}; \
trt_io_##op_type__##_converter trt_io_##op_type__##_converter__;
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \
struct trt_input_##in_op_type__##_converter { \
trt_input_##in_op_type__##_converter() { \
::paddle::inference::Registry<EngineInputConverter>::Register< \
Converter__>(#in_op_type__); \
} \
}; \
trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__;
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
89dcb0bd
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -26,7 +27,7 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
void
Compare
(
float
input
,
float
expect
)
{
void
Compare
(
const
std
::
string
op_type
,
float
input
,
float
expect
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
...
...
@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
...
...
@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"relu"
);
op_desc
.
SetType
(
op_type
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
relu_
op
=
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
// run fluid op
relu_op
->
Run
(
scope
,
place
);
op
->
Run
(
scope
,
place
);
// get fluid output
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
...
...
@@ -63,21 +66,27 @@ void Compare(float input, float expect) {
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
// convert op
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
op_desc
,
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
engine
->
SetInputFromCPU
(
"X"
,
&
input
,
1
*
sizeof
(
float
));
// run tensorrt op
// convert LoDTensor to ITensor
size_t
size
=
x_tensor
->
memory_size
();
EngineIOConverter
::
ConvertInput
(
op_type
,
*
x_tensor
,
engine
->
buffer
(
"X"
),
size
,
&
stream
);
// run tensorrt Outp
engine
->
Execute
(
1
);
float
out2
;
engine
->
GetOutputInCPU
(
"Out"
,
&
out2
,
1
*
sizeof
(
float
));
ASSERT_EQ
(
out1
[
0
],
out2
);
// convert ITensor to LoDTensor
EngineIOConverter
::
ConvertOutput
(
op_type
,
engine
->
buffer
(
"Out"
),
out_tensor
,
size
,
&
stream
);
// get tensorrt output
std
::
vector
<
float
>
out2
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out2
);
// compare
ASSERT_EQ
(
out1
[
0
],
out2
[
0
]);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
...
...
@@ -85,8 +94,8 @@ void Compare(float input, float expect) {
}
TEST
(
OpConverter
,
ConvertRelu
)
{
Compare
(
1
,
1
);
// relu(1) = 1
Compare
(
-
5
,
0
);
// relu(-5) = 0
Compare
(
"relu"
,
1
,
1
);
// relu(1) = 1
Compare
(
"relu"
,
-
5
,
0
);
// relu(-5) = 0
}
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
浏览文件 @
89dcb0bd
...
...
@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
EngineInputConverterTester
:
public
::
testing
::
Test
{
public:
void
SetUp
()
override
{
tensor
.
Resize
({
10
,
10
});
}
void
IOConverterTester
(
const
platform
::
DeviceContext
&
ctx
)
{
cudaStream_t
stream
;
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
framework
::
LoDTensor
tensor
;
};
// init fluid in_tensor
framework
::
LoDTensor
in_tensor
;
in_tensor
.
Resize
({
10
,
10
});
auto
place
=
ctx
.
GetPlace
();
in_tensor
.
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
10
*
10
;
++
i
)
{
init
.
push_back
(
i
);
}
framework
::
TensorFromVector
(
init
,
ctx
,
&
in_tensor
);
TEST_F
(
EngineInputConverterTester
,
DefaultCPU
)
{
// init tensorrt buffer
void
*
buffer
;
tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()
),
0
);
size_t
size
=
in_tensor
.
memory_size
(
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
size
),
0
);
cudaStream_t
stream
;
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
&
stream
);
// convert fluid in_tensor to tensorrt buffer
EngineIOConverter
::
ConvertInput
(
"test"
,
in_tensor
,
buffer
,
size
,
&
stream
);
// convert tensorrt buffer to fluid out_tensor
framework
::
LoDTensor
out_tensor
;
out_tensor
.
Resize
({
10
,
10
});
out_tensor
.
mutable_data
<
float
>
(
place
);
EngineIOConverter
::
ConvertOutput
(
"test"
,
buffer
,
&
out_tensor
,
size
,
&
stream
);
// compare in_tensor and out_tensor
std
::
vector
<
float
>
result
;
framework
::
TensorToVector
(
out_tensor
,
ctx
,
&
result
);
EXPECT_EQ
(
init
.
size
(),
result
.
size
());
for
(
size_t
i
=
0
;
i
<
init
.
size
();
i
++
)
{
EXPECT_EQ
(
init
[
i
],
result
[
i
]);
}
cudaStreamDestroy
(
stream
);
}
TEST_F
(
EngineInputConverterTester
,
DefaultGPU
)
{
void
*
buffer
;
tensor
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()),
0
);
TEST
(
EngineIOConverterTester
,
DefaultCPU
)
{
platform
::
CPUPlace
place
;
platform
::
CPUDeviceContext
ctx
(
place
);
IOConverterTester
(
ctx
);
}
cudaStream_t
stream
;
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
&
stream
);
TEST
(
EngineIOConverterTester
,
DefaultGPU
)
{
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
IOConverterTester
(
ctx
);
}
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
89dcb0bd
...
...
@@ -138,7 +138,6 @@ void*& TensorRTEngine::buffer(const std::string& name) {
void
TensorRTEngine
::
SetInputFromCPU
(
const
std
::
string
&
name
,
void
*
data
,
size_t
size
)
{
void
*
buf
=
buffer
(
name
);
cudaMemcpyAsync
(
buf
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
buf
,
data
,
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录