Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
6cbe597a
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6cbe597a
编写于
5月 15, 2018
作者:
T
Tao Luo
提交者:
GitHub
5月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10495 from luotao1/refine_relu_test
refine EngineIOConverter, and use io_convert in test_trt_activation_op
上级
dfdcb7ea
1992f709
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
137 addition
and
71 deletion
+137
-71
paddle/fluid/inference/analysis/dot.h
paddle/fluid/inference/analysis/dot.h
+1
-0
paddle/fluid/inference/engine.h
paddle/fluid/inference/engine.h
+3
-2
paddle/fluid/inference/tensorrt/CMakeLists.txt
paddle/fluid/inference/tensorrt/CMakeLists.txt
+0
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+2
-2
paddle/fluid/inference/tensorrt/convert/io_converter.cc
paddle/fluid/inference/tensorrt/convert/io_converter.cc
+30
-13
paddle/fluid/inference/tensorrt/convert/io_converter.h
paddle/fluid/inference/tensorrt/convert/io_converter.h
+34
-19
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+24
-14
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
+43
-20
未找到文件。
paddle/fluid/inference/analysis/dot.h
浏览文件 @
6cbe597a
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include <glog/logging.h>
#include <glog/logging.h>
#include <sstream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
...
...
paddle/fluid/inference/engine.h
浏览文件 @
6cbe597a
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <string>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -58,8 +59,8 @@ class EngineBase {
...
@@ -58,8 +59,8 @@ class EngineBase {
struct
Buffer
{
struct
Buffer
{
void
*
buffer
{
nullptr
};
// buffer should be allocated only once.
void
*
buffer
{
nullptr
};
// buffer should be allocated only once.
int
max_size
;
// buffer allocated space.
size_t
max_size
;
// buffer allocated space.
int
size
;
// data size.
size_t
size
;
// data size.
DeviceType
device
{
DeviceType
::
UNK
};
// tells which device this buffer is on.
DeviceType
device
{
DeviceType
::
UNK
};
// tells which device this buffer is on.
};
};
...
...
paddle/fluid/inference/tensorrt/CMakeLists.txt
浏览文件 @
6cbe597a
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto
)
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
add_subdirectory
(
convert
)
add_subdirectory
(
convert
)
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
6cbe597a
nv_test
(
test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc
op_converter.h
DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
io_converter.cc
DEPS
${
FLUID_CORE_MODULES
}
activation_op tensorrt_engine
)
DEPS
${
FLUID_CORE_MODULES
}
activation_op tensorrt_engine
)
nv_test
(
test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor
)
nv_test
(
test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor
)
paddle/fluid/inference/tensorrt/convert/io_converter.cc
浏览文件 @
6cbe597a
...
@@ -23,26 +23,42 @@ namespace tensorrt {
...
@@ -23,26 +23,42 @@ namespace tensorrt {
using
platform
::
is_gpu_place
;
using
platform
::
is_gpu_place
;
using
platform
::
is_cpu_place
;
using
platform
::
is_cpu_place
;
class
DefaultI
nputConverter
:
public
EngineInput
Converter
{
class
DefaultI
OConverter
:
public
EngineIO
Converter
{
public:
public:
DefaultI
nput
Converter
()
{}
DefaultI
O
Converter
()
{}
// NOTE out is GPU memory.
// NOTE out is GPU memory.
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
override
{
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
out
!=
nullptr
);
PADDLE_ENFORCE
(
out
!=
nullptr
);
PADDLE_ENFORCE
_LE
(
in
.
memory_size
(),
max_size
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
in
.
place
();
const
auto
&
place
=
in
.
place
();
size_t
size
=
in
.
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyHostToDevice
,
*
stream_
));
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyHostToDevice
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
size
,
cudaMemcpyAsync
(
out
,
in
.
data
<
float
>
(),
in
.
memory_size
(),
cudaMemcpyDeviceToDevice
,
*
stream_
));
cudaMemcpyHostToHost
,
*
stream_
));
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
}
cudaStreamSynchronize
(
*
stream_
);
}
// NOTE in is GPU memory.
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
override
{
PADDLE_ENFORCE
(
in
!=
nullptr
);
PADDLE_ENFORCE
(
stream_
!=
nullptr
);
const
auto
&
place
=
out
->
place
();
size_t
size
=
out
->
memory_size
();
PADDLE_ENFORCE_LE
(
size
,
max_size
);
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyDeviceToHost
,
*
stream_
));
}
else
if
(
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
out
->
data
<
float
>
(),
in
,
size
,
cudaMemcpyDeviceToDevice
,
*
stream_
));
}
else
{
}
else
{
PADDLE_THROW
(
"Unknown device for converter"
);
PADDLE_THROW
(
"Unknown device for converter"
);
}
}
...
@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter {
...
@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter {
}
}
};
};
REGISTER_TENSORRT_INPUT_CONVERTER
(
default
,
DefaultInputConverter
);
// fluid LodTensor <-> tensorrt ITensor
REGISTER_TENSORRT_IO_CONVERTER
(
default
,
DefaultIOConverter
);
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tensorrt/convert/io_converter.h
浏览文件 @
6cbe597a
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_map>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
...
@@ -25,43 +26,57 @@ namespace tensorrt {
...
@@ -25,43 +26,57 @@ namespace tensorrt {
using
framework
::
LoDTensor
;
using
framework
::
LoDTensor
;
/*
/*
* Convert Input from Fluid to an Engine.
* Convert Input from Fluid to TensorRT Engine.
* TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in
* Convert Output from TensorRT Engine to Fluid.
* most cases just need to copy the data.
*
* Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row
* major,
* so in the default case just need to copy the data.
*/
*/
class
EngineI
nput
Converter
{
class
EngineI
O
Converter
{
public:
public:
EngineI
nput
Converter
()
{}
EngineI
O
Converter
()
{}
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
{}
virtual
void
operator
()(
const
LoDTensor
&
in
,
void
*
out
,
size_t
max_size
)
{}
virtual
void
operator
()(
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
)
{}
void
SetStream
(
cudaStream_t
*
stream
)
{
stream_
=
stream
;
}
void
SetStream
(
cudaStream_t
*
stream
)
{
stream_
=
stream
;
}
static
void
Run
(
const
std
::
string
&
in_op_type
,
const
LoDTensor
&
in
,
void
*
out
,
static
void
ConvertInput
(
const
std
::
string
&
op_type
,
const
LoDTensor
&
in
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
void
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineI
nput
Converter
>::
Lookup
(
auto
*
converter
=
Registry
<
EngineI
O
Converter
>::
Lookup
(
in_
op_type
,
"default"
/* default_type */
);
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
(
*
converter
)(
in
,
out
,
max_size
);
}
}
virtual
~
EngineInputConverter
()
{}
static
void
ConvertOutput
(
const
std
::
string
&
op_type
,
const
void
*
in
,
LoDTensor
*
out
,
size_t
max_size
,
cudaStream_t
*
stream
)
{
PADDLE_ENFORCE
(
stream
!=
nullptr
);
auto
*
converter
=
Registry
<
EngineIOConverter
>::
Lookup
(
op_type
,
"default"
/* default_type */
);
PADDLE_ENFORCE_NOT_NULL
(
converter
);
converter
->
SetStream
(
stream
);
(
*
converter
)(
in
,
out
,
max_size
);
}
virtual
~
EngineIOConverter
()
{}
protected:
protected:
cudaStream_t
*
stream_
{
nullptr
};
cudaStream_t
*
stream_
{
nullptr
};
};
};
#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \
struct trt_io_##op_type__##_converter { \
trt_io_##op_type__##_converter() { \
Registry<EngineIOConverter>::Register<Converter__>(#op_type__); \
} \
}; \
trt_io_##op_type__##_converter trt_io_##op_type__##_converter__;
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \
struct trt_input_##in_op_type__##_converter { \
trt_input_##in_op_type__##_converter() { \
::paddle::inference::Registry<EngineInputConverter>::Register< \
Converter__>(#in_op_type__); \
} \
}; \
trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__;
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
6cbe597a
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
...
@@ -26,7 +27,7 @@ namespace paddle {
...
@@ -26,7 +27,7 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
void
Compare
(
float
input
,
float
expect
)
{
void
Compare
(
const
std
::
string
op_type
,
float
input
,
float
expect
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
platform
::
CUDADeviceContext
ctx
(
place
);
...
@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
...
@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
...
@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
...
@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
out_tensor
->
mutable_data
<
float
>
(
place
);
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"relu"
);
op_desc
.
SetType
(
op_type
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
relu_
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
// run fluid op
// run fluid op
relu_op
->
Run
(
scope
,
place
);
op
->
Run
(
scope
,
place
);
// get fluid output
std
::
vector
<
float
>
out1
;
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
...
@@ -63,21 +66,28 @@ void Compare(float input, float expect) {
...
@@ -63,21 +66,28 @@ void Compare(float input, float expect) {
engine
->
InitNetwork
();
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
// convert op
OpConverter
op_converter
;
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
*
op_desc
.
Proto
(),
engine
);
op_converter
.
ConvertOp
(
*
op_desc
.
Proto
(),
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
engine
->
FreezeNetwork
();
engine
->
SetInputFromCPU
(
"X"
,
&
input
,
1
*
sizeof
(
float
));
// run tensorrt op
// convert LoDTensor to ITensor
size_t
size
=
x_tensor
->
memory_size
();
EngineIOConverter
::
ConvertInput
(
op_type
,
*
x_tensor
,
engine
->
buffer
(
"X"
).
buffer
,
size
,
&
stream
);
// run tensorrt Outp
engine
->
Execute
(
1
);
engine
->
Execute
(
1
);
// convert ITensor to LoDTensor
float
out2
;
EngineIOConverter
::
ConvertOutput
(
op_type
,
engine
->
buffer
(
"Out"
).
buffer
,
engine
->
GetOutputInCPU
(
"Out"
,
&
out2
,
1
*
sizeof
(
float
));
out_tensor
,
size
,
&
stream
);
// get tensorrt output
ASSERT_EQ
(
out1
[
0
],
out2
);
std
::
vector
<
float
>
out2
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out2
);
// compare
ASSERT_EQ
(
out1
[
0
],
out2
[
0
]);
ASSERT_EQ
(
out1
[
0
],
expect
);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
delete
engine
;
...
@@ -85,8 +95,8 @@ void Compare(float input, float expect) {
...
@@ -85,8 +95,8 @@ void Compare(float input, float expect) {
}
}
TEST
(
OpConverter
,
ConvertRelu
)
{
TEST
(
OpConverter
,
ConvertRelu
)
{
Compare
(
1
,
1
);
// relu(1) = 1
Compare
(
"relu"
,
1
,
1
);
// relu(1) = 1
Compare
(
-
5
,
0
);
// relu(-5) = 0
Compare
(
"relu"
,
-
5
,
0
);
// relu(-5) = 0
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_io_converter.cc
浏览文件 @
6cbe597a
...
@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
class
EngineInputConverterTester
:
public
::
testing
::
Test
{
void
IOConverterTester
(
const
platform
::
DeviceContext
&
ctx
)
{
public:
cudaStream_t
stream
;
void
SetUp
()
override
{
tensor
.
Resize
({
10
,
10
});
}
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
framework
::
LoDTensor
tensor
;
// init fluid in_tensor
};
framework
::
LoDTensor
in_tensor
;
in_tensor
.
Resize
({
10
,
10
});
auto
place
=
ctx
.
GetPlace
();
in_tensor
.
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
10
*
10
;
++
i
)
{
init
.
push_back
(
i
);
}
framework
::
TensorFromVector
(
init
,
ctx
,
&
in_tensor
);
TEST_F
(
EngineInputConverterTester
,
DefaultCPU
)
{
// init tensorrt buffer
void
*
buffer
;
void
*
buffer
;
tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()
);
size_t
size
=
in_tensor
.
memory_size
(
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()
),
0
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
size
),
0
);
cudaStream_t
stream
;
// convert fluid in_tensor to tensorrt buffer
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
EngineIOConverter
::
ConvertInput
(
"test"
,
in_tensor
,
buffer
,
size
,
&
stream
);
&
stream
);
// convert tensorrt buffer to fluid out_tensor
framework
::
LoDTensor
out_tensor
;
out_tensor
.
Resize
({
10
,
10
});
out_tensor
.
mutable_data
<
float
>
(
place
);
EngineIOConverter
::
ConvertOutput
(
"test"
,
buffer
,
&
out_tensor
,
size
,
&
stream
);
// compare in_tensor and out_tensor
std
::
vector
<
float
>
result
;
framework
::
TensorToVector
(
out_tensor
,
ctx
,
&
result
);
EXPECT_EQ
(
init
.
size
(),
result
.
size
());
for
(
size_t
i
=
0
;
i
<
init
.
size
();
i
++
)
{
EXPECT_EQ
(
init
[
i
],
result
[
i
]);
}
cudaStreamDestroy
(
stream
);
}
}
TEST_F
(
EngineInputConverterTester
,
DefaultGPU
)
{
TEST
(
EngineIOConverterTester
,
DefaultCPU
)
{
void
*
buffer
;
platform
::
CPUPlace
place
;
tensor
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
platform
::
CPUDeviceContext
ctx
(
place
);
ASSERT_EQ
(
cudaMalloc
(
&
buffer
,
tensor
.
memory_size
()),
0
);
IOConverterTester
(
ctx
);
}
cudaStream_t
stream
;
TEST
(
EngineIOConverterTester
,
DefaultGPU
)
{
EngineInputConverter
::
Run
(
"test"
,
tensor
,
buffer
,
tensor
.
memory_size
(),
platform
::
CUDAPlace
place
;
&
stream
);
platform
::
CUDADeviceContext
ctx
(
place
);
IOConverterTester
(
ctx
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录