Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
6cbe597a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6cbe597a
编写于
5月 15, 2018
作者:
T
Tao Luo
提交者:
GitHub
5月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10495 from luotao1/refine_relu_test
refine EngineIOConverter, and use io_convert in test_trt_activation_op
上级
dfdcb7ea
1992f709
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
137 addition
and
71 deletion
+137
-71
paddle/fluid/inference/analysis/dot.h
paddle/fluid/inference/analysis/dot.h
+1
-0
paddle/fluid/inference/engine.h
paddle/fluid/inference/engine.h
+3
-2
paddle/fluid/inference/tensorrt/CMakeLists.txt
paddle/fluid/inference/tensorrt/CMakeLists.txt
+0
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+2
-2
paddle/fluid/inference/tensorrt/convert/io_converter.cc
paddle/fluid/inference/tensorrt/convert/io_converter.cc
+30
-13
paddle/fluid/inference/tensorrt/convert/io_converter.h
paddle/fluid/inference/tensorrt/convert/io_converter.h
+34
-19
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+24
-14
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
+43
-20
未找到文件。
paddle/fluid/inference/analysis/dot.h
浏览文件 @
6cbe597a
...
...
@@ -21,6 +21,7 @@
#include <glog/logging.h>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
...
...
paddle/fluid/inference/engine.h
浏览文件 @
6cbe597a
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/framework.pb.h"
namespace
paddle
{
...
...
@@ -58,8 +59,8 @@ class EngineBase {
struct
Buffer
{
void
*
buffer
{
nullptr
};
// buffer should be allocated only once.
int
max_size
;
// buffer allocated space.
int
size
;
// data size.
size_t
max_size
;
// buffer allocated space.
size_t
size
;
// data size.
DeviceType
device
{
DeviceType
::
UNK
};
// tells which device this buffer is on.
};
...
...
paddle/fluid/inference/tensorrt/CMakeLists.txt
浏览文件 @
6cbe597a
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
add_subdirectory
(
convert
)
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
6cbe597a
nv_test
(
test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc
op_converter.h
DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
nv_test
(
test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
io_converter.cc
DEPS
${
FLUID_CORE_MODULES
}
activation_op tensorrt_engine
)
nv_test
(
test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor
)
paddle/fluid/inference/tensorrt/convert/io_converter.cc
浏览文件 @
6cbe597a
...
...
@@ -23,26 +23,42 @@ namespace tensorrt {
using
platform
::
is_gpu_place
;
using
platform
::
is_cpu_place
;
class
DefaultI
nputConverter
:
public
EngineInput
Converter
{
class
DefaultI
OConverter
:
public
EngineIO
Converter
{
public:
DefaultI
nput
Converter
()
{}
DefaultI
O
Converter
()
{}
// NOTE out is GPU memory.
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
out
!=
nullptr
);
PADDLE_ENFORCE
_LE
(
in
.
memory_size
(),
max_size
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
in
.
place
();
size_t
size
=
in
.
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyHostToDevice
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
cudaMemcpyHostToDevice
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyHostToHost
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
cudaMemcpyDeviceToDevice
,
*
stream_
));
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
}
cudaStreamSynchronize
(
*
stream_
);
}
// NOTE in is GPU memory.
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
in
!=
nullptr
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
out
->
place
();
size_t
size
=
out
->
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyDeviceToHost
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyDeviceToDevice
,
*
stream_
));
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
}
...
...
@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter {
}
};
REGISTER_TENSORRT_INPUT_CONVERTER
(
default
,
DefaultInputConverter
);
// fluid LodTensor <-> tensorrt ITensor
REGISTER_TENSORRT_IO_CONVERTER
(
default
,
DefaultIOConverter
);
}
// namespace tensorrt
}
// namespace inference
...
...
paddle/fluid/inference/tensorrt/convert/io_converter.h
浏览文件 @
6cbe597a
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/utils/singleton.h"
...
...
@@ -25,43 +26,57 @@ namespace tensorrt {
using
framework
::
LoDTensor
;
/*
* Convert Input from Fluid to an Engine.
* TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in
* most cases just need to copy the data.
* Convert Input from Fluid to TensorRT Engine.
* Convert Output from TensorRT Engine to Fluid.
*
* Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row
* major,
* so in the default case just need to copy the data.
*/
class
EngineI
nput
Converter
{
class
EngineI
O
Converter
{
public:
EngineI
nput
Converter
()
{}
EngineI
O
Converter
()
{}
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
{}
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
{}
void
SetStream
(
cudaStream_t
*
stream
)
{
stream_
=
stream
;
}
static
void
Run
(
const
std
::
string
&
in_op_type
,
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
static
void
ConvertInput
(
const
std
::
string
&
op_type
,
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineI
nput
Converter
>::
Lookup
(
in_
op_type
,
"default"
/* default_type */
);
auto
*
converter
=
Registry
<
EngineI
O
Converter
>::
Lookup
(
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
}
virtual
~
EngineInputConverter
()
{}
static
void
ConvertOutput
(
const
std
::
string
&
op_type
,
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineIOConverter
>::
Lookup
(
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
}
virtual
~
EngineIOConverter
()
{}
protected:
cudaStream_t
*
stream_
{
nullptr
};
};
#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \
struct trt_io_##op_type__##_converter { \
trt_io_##op_type__##_converter() { \
Registry<EngineIOConverter>::Register<Converter__>(#op_type__); \
} \
}; \
trt_io_##op_type__##_converter trt_io_##op_type__##_converter__;
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \
struct trt_input_##in_op_type__##_converter { \
trt_input_##in_op_type__##_converter() { \
::paddle::inference::Registry<EngineInputConverter>::Register< \
Converter__>(#in_op_type__); \
} \
}; \
trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__;
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
6cbe597a
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -26,7 +27,7 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
void
Compare
(
float
input
,
float
expect
)
{
void
Compare
(
const
std
::
string
op_type
,
float
input
,
float
expect
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
...
...
@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
...
...
@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"relu"
);
op_desc
.
SetType
(
op_type
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
relu_
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
// run fluid op
relu_op
->
Run
(
scope
,
place
);
op
->
Run
(
scope
,
place
);
// get fluid output
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
...
...
@@ -63,21 +66,28 @@ void Compare(float input, float expect) {
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
// convert op
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
*
op_desc
.
Proto
(),
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
engine
->
SetInputFromCPU
(
"X"
,
&
input
,
1
*
sizeof
(
float
));
// run tensorrt op
// convert LoDTensor to ITensor
size_t
size
=
x_tensor
->
memory_size
();
EngineIOConverter
::
ConvertInput
(
op_type
,
*
x_tensor
,
engine
->
buffer
(
"X"
).
buffer
,
size
,
&
stream
);
// run tensorrt Outp
engine
->
Execute
(
1
);
float
out2
;
engine
->
GetOutputInCPU
(
"Out"
,
&
out2
,
1
*
sizeof
(
float
));
ASSERT_EQ
(
out1
[
0
],
out2
);
// convert ITensor to LoDTensor
EngineIOConverter
::
ConvertOutput
(
op_type
,
engine
->
buffer
(
"Out"
).
buffer
,
out_tensor
,
size
,
&
stream
);
// get tensorrt output
std
::
vector
<
float
>
out2
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out2
);
// compare
ASSERT_EQ
(
out1
[
0
],
out2
[
0
]);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
...
...
@@ -85,8 +95,8 @@ void Compare(float input, float expect) {
}
TEST
(
OpConverter
,
ConvertRelu
)
{
Compare
(
1
,
1
);
// relu(1) = 1
Compare
(
-
5
,
0
);
// relu(-5) = 0
Compare
(
"relu"
,
1
,
1
);
// relu(1) = 1
Compare
(
"relu"
,
-
5
,
0
);
// relu(-5) = 0
}
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
浏览文件 @
6cbe597a
...
...
@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
EngineInputConverterTester
:
public
::
testing
::
Test
{
public:
void
SetUp
()
override
{
tensor
.
Resize
({
10
,
10
});
}
void
IOConverterTester
(
const
platform
::
DeviceContext
&
ctx
)
{
cudaStream_t
stream
;
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
framework
::
LoDTensor
tensor
;
};
// init fluid in_tensor
framework
::
LoDTensor
in_tensor
;
in_tensor
.
Resize
({
10
,
10
});
auto
place
=
ctx
.
GetPlace
();
in_tensor
.
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
10
*
10
;
++
i
)
{
init
.
push_back
(
i
);
}
framework
::
TensorFromVector
(
init
,
ctx
,
&
in_tensor
);
TEST_F
(
EngineInputConverterTester
,
DefaultCPU
)
{
// init tensorrt buffer
void
*
buffer
;
tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()
),
0
);
size_t
size
=
in_tensor
.
memory_size
(
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
size
),
0
);
cudaStream_t
stream
;
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
&
stream
);
// convert fluid in_tensor to tensorrt buffer
EngineIOConverter
::
ConvertInput
(
"test"
,
in_tensor
,
buffer
,
size
,
&
stream
);
// convert tensorrt buffer to fluid out_tensor
framework
::
LoDTensor
out_tensor
;
out_tensor
.
Resize
({
10
,
10
});
out_tensor
.
mutable_data
<
float
>
(
place
);
EngineIOConverter
::
ConvertOutput
(
"test"
,
buffer
,
&
out_tensor
,
size
,
&
stream
);
// compare in_tensor and out_tensor
std
::
vector
<
float
>
result
;
framework
::
TensorToVector
(
out_tensor
,
ctx
,
&
result
);
EXPECT_EQ
(
init
.
size
(),
result
.
size
());
for
(
size_t
i
=
0
;
i
<
init
.
size
();
i
++
)
{
EXPECT_EQ
(
init
[
i
],
result
[
i
]);
}
cudaStreamDestroy
(
stream
);
}
TEST_F
(
EngineInputConverterTester
,
DefaultGPU
)
{
void
*
buffer
;
tensor
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()),
0
);
TEST
(
EngineIOConverterTester
,
DefaultCPU
)
{
platform
::
CPUPlace
place
;
platform
::
CPUDeviceContext
ctx
(
place
);
IOConverterTester
(
ctx
);
}
cudaStream_t
stream
;
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
&
stream
);
TEST
(
EngineIOConverterTester
,
DefaultGPU
)
{
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
IOConverterTester
(
ctx
);
}
}
// namespace tensorrt
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录