Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c6fb1635
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c6fb1635
编写于
7月 24, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix-mixed-tensor
上级
18d539e8
24bea401
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
178 addition
and
123 deletion
+178
-123
paddle/fluid/API.spec
paddle/fluid/API.spec
+0
-11
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+6
-8
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
+7
-6
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/ut_helper.h
paddle/fluid/inference/tensorrt/convert/ut_helper.h
+5
-5
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+39
-23
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+4
-0
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+37
-3
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+4
-3
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+16
-16
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+3
-11
python/paddle/fluid/tests/test_if_else_op.py
python/paddle/fluid/tests/test_if_else_op.py
+8
-5
python/paddle/fluid/tests/unittests/test_conditional_block.py
...on/paddle/fluid/tests/unittests/test_conditional_block.py
+3
-2
python/paddle/fluid/tests/unittests/test_const_value.py
python/paddle/fluid/tests/unittests/test_const_value.py
+1
-1
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
+11
-7
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
+2
-1
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
...paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
+12
-7
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
...n/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+2
-1
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
...on/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+7
-4
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
...uid/tests/unittests/test_split_and_merge_lod_tensor_op.py
+6
-6
未找到文件。
paddle/fluid/API.spec
浏览文件 @
c6fb1635
...
@@ -200,31 +200,23 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
...
@@ -200,31 +200,23 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.split_lod_tensor ArgSpec(args=['input', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.merge_lod_tensor ArgSpec(args=['in_true', 'in_false', 'x', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_rank_table ArgSpec(args=['x', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.max_sequence_len ArgSpec(args=['rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_tensor_to_array ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_to_lod_tensor ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shrink_memory ArgSpec(args=['x', 'i', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.parent_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
...
@@ -233,9 +225,6 @@ paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs',
...
@@ -233,9 +225,6 @@ paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs',
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.__init__ ArgSpec(args=['self', 'inputs', 'is_scalar_condition', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.ConditionalBlock.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.complete_op ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.complete_op ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
c6fb1635
...
@@ -32,11 +32,11 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
...
@@ -32,11 +32,11 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
for
(
int
h
=
0
;
h
<
shape
.
h
();
++
h
)
{
for
(
int
h
=
0
;
h
<
shape
.
h
();
++
h
)
{
for
(
int
w
=
0
;
w
<
shape
.
w
();
++
w
)
{
for
(
int
w
=
0
;
w
<
shape
.
w
();
++
w
)
{
odata
[
h
*
ostrides
.
h
()
+
w
*
ostrides
.
w
()]
=
odata
[
h
*
ostrides
.
h
()
+
w
*
ostrides
.
w
()]
=
idata
[
h
*
ostrides
.
h
()
+
w
*
o
strides
.
w
()];
idata
[
h
*
istrides
.
h
()
+
w
*
i
strides
.
w
()];
}
}
}
}
}
}
// indata c * k
// Reorder the data layout from CK to KC.
// Reorder the data layout from CK to KC.
void
ReorderCKtoKC
(
TensorRTEngine
::
Weight
&
iweights
,
void
ReorderCKtoKC
(
TensorRTEngine
::
Weight
&
iweights
,
TensorRTEngine
::
Weight
*
oweights
)
{
TensorRTEngine
::
Weight
*
oweights
)
{
...
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
...
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
framework
::
LoDTensor
tmp
;
framework
::
LoDTensor
tmp
;
tmp
.
Resize
(
Y_t
->
dims
());
tmp
.
Resize
(
Y_t
->
dims
());
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
Y_t
->
data
<
float
>
(),
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weight_data
,
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]);
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]
*
sizeof
(
float
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
void
*>
(
weight_data
),
Y_t
->
memory_size
()
/
sizeof
(
float
)};
Y_t
->
memory_size
()
/
sizeof
(
float
)};
...
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
...
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
// The data layout of TRT FC layer's weight is different from fluid's FC,
// The data layout of TRT FC layer's weight is different from fluid's FC,
// need to reorder the elements.
// need to reorder the elements.
ReorderCKtoKC
(
tmp_weight
,
&
weight
);
ReorderCKtoKC
(
weight
,
&
tmp_
weight
);
// Currently, the framework can only handle one fluid op -> one TRT layer,
// Currently, the framework can only handle one fluid op -> one TRT layer,
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
...
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
...
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
X
),
*
const_cast
<
nvinfer1
::
ITensor
*>
(
X
),
n_output
,
weight
.
get
(),
bias
.
get
());
n_output
,
tmp_
weight
.
get
(),
bias
.
get
());
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
...
@@ -118,4 +117,3 @@ class FcOpConverter : public OpConverter {
...
@@ -118,4 +117,3 @@ class FcOpConverter : public OpConverter {
}
// namespace paddle
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
fc
,
FcOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
fc
,
FcOpConverter
);
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
c6fb1635
...
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
...
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
浏览文件 @
c6fb1635
...
@@ -23,11 +23,11 @@ namespace tensorrt {
...
@@ -23,11 +23,11 @@ namespace tensorrt {
TEST
(
fc_op
,
test
)
{
TEST
(
fc_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul-Y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul-Y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
2
0
,
parameters
,
scope
,
1000
);
TRTConvertValidation
validator
(
1
0
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
1
,
10
,
1
,
1
));
validator
.
Decl
InputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
8
,
3
,
1
,
1
));
validator
.
Decl
ParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
10
,
2
));
validator
.
DeclParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
3
,
2
));
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8
, 2));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
8
,
2
));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
1
,
2
));
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -38,9 +38,10 @@ TEST(fc_op, test) {
...
@@ -38,9 +38,10 @@ TEST(fc_op, test) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
浏览文件 @
c6fb1635
...
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
...
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/ut_helper.h
浏览文件 @
c6fb1635
...
@@ -39,7 +39,7 @@ namespace tensorrt {
...
@@ -39,7 +39,7 @@ namespace tensorrt {
float
random
(
float
low
,
float
high
)
{
float
random
(
float
low
,
float
high
)
{
static
std
::
random_device
rd
;
static
std
::
random_device
rd
;
static
std
::
mt19937
mt
(
rd
());
static
std
::
mt19937
mt
(
rd
());
std
::
uniform_real_distribution
<
double
>
dist
(
1.0
,
10.0
);
std
::
uniform_real_distribution
<
double
>
dist
(
low
,
high
);
return
dist
(
mt
);
return
dist
(
mt
);
}
}
...
@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
...
@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
auto
*
data
=
tensor
->
mutable_data
<
float
>
(
place
);
auto
*
data
=
tensor
->
mutable_data
<
float
>
(
place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
data
+
i
)
=
random
(
0.
,
1.
);
*
(
data
+
i
)
=
random
(
0.
,
1.
);
}
}
...
@@ -68,7 +69,7 @@ class TRTConvertValidation {
...
@@ -68,7 +69,7 @@ class TRTConvertValidation {
int
workspace_size
=
1
<<
10
)
int
workspace_size
=
1
<<
10
)
:
parameters_
(
parameters
),
scope_
(
scope
)
{
:
parameters_
(
parameters
),
scope_
(
scope
)
{
// create engine.
// create engine.
engine_
.
reset
(
new
TensorRTEngine
(
10
,
1
<<
10
,
&
stream_
));
engine_
.
reset
(
new
TensorRTEngine
(
batch_size
,
workspace_size
,
&
stream_
));
engine_
->
InitNetwork
();
engine_
->
InitNetwork
();
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
...
@@ -138,12 +139,11 @@ class TRTConvertValidation {
...
@@ -138,12 +139,11 @@ class TRTConvertValidation {
cudaStreamSynchronize
(
*
engine_
->
stream
());
cudaStreamSynchronize
(
*
engine_
->
stream
());
ASSERT_FALSE
(
op_desc_
->
OutputArgumentNames
().
empty
());
ASSERT_FALSE
(
op_desc_
->
OutputArgumentNames
().
empty
());
const
size_t
output_space_size
=
200
;
const
size_t
output_space_size
=
200
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
trt_out
(
output_space_size
);
std
::
vector
<
float
>
trt_out
(
output_space_size
);
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
output_space_size
);
output_space_size
*
sizeof
(
float
));
cudaStreamSynchronize
(
*
engine_
->
stream
());
cudaStreamSynchronize
(
*
engine_
->
stream
());
auto
*
var
=
scope_
.
FindVar
(
output
);
auto
*
var
=
scope_
.
FindVar
(
output
);
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
c6fb1635
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use
you may not use
this file except in compliance with the License.
this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
...
@@ -26,6 +26,8 @@ namespace paddle {
...
@@ -26,6 +26,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
PADDLE_ENFORCE
(
false
,
"not implemented"
);
PADDLE_ENFORCE
(
false
,
"not implemented"
);
}
}
...
@@ -42,6 +44,7 @@ void TensorRTEngine::Execute(int batch_size) {
...
@@ -42,6 +44,7 @@ void TensorRTEngine::Execute(int batch_size) {
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
cudaStreamSynchronize
(
*
stream_
);
cudaStreamSynchronize
(
*
stream_
);
SetRuntimeBatch
(
batch_size
);
}
}
TensorRTEngine
::~
TensorRTEngine
()
{
TensorRTEngine
::~
TensorRTEngine
()
{
...
@@ -80,17 +83,17 @@ void TensorRTEngine::FreezeNetwork() {
...
@@ -80,17 +83,17 @@ void TensorRTEngine::FreezeNetwork() {
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
}
}
auto
&
buf
=
buffer
(
item
.
first
);
auto
&
buf
=
buffer
(
item
.
first
);
buf
.
max_size
=
item
.
second
*
max_batch_
;
buf
.
max_size
=
item
.
second
*
max_batch_
;
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
buf
.
max_size
));
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
item
.
second
*
max_batch_
));
// buf.size will changed in the runtime.
buf
.
size
=
0
;
buf
.
size
=
0
;
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
buf
.
device
=
DeviceType
::
GPU
;
buf
.
device
=
DeviceType
::
GPU
;
}
}
}
}
...
@@ -105,7 +108,7 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
...
@@ -105,7 +108,7 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
TensorRTEngine
::
SetITensor
(
name
,
input
);
TensorRTEngine
::
SetITensor
(
name
,
input
);
return
input
;
return
input
;
...
@@ -149,35 +152,42 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
...
@@ -149,35 +152,42 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
// determine data size
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
cudaMemcpyDeviceToDevice
,
*
stream_
),
cudaMemcpyDeviceToDevice
,
*
stream_
),
0
);
0
);
}
}
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
VLOG
(
4
)
<<
"get output in cpu"
;
auto
&
buf
=
buffer
(
name
);
// Update needed buffer size.
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
name
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
buf
.
size
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_LE
(
buf
.
size
,
buf
.
max_size
);
// determine data size
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
// DEBUG
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
memset
(
dst
,
0
,
buf
.
size
);
cudaMemcpyDeviceToHost
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpy
(
dst
,
buf
.
buffer
,
buf
.
size
,
cudaMemcpyDeviceToHost
));
}
}
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
...
@@ -225,6 +235,12 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
...
@@ -225,6 +235,12 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
return
itensor_map_
[
name
];
return
itensor_map_
[
name
];
}
}
void
TensorRTEngine
::
SetRuntimeBatch
(
size_t
batch_size
)
{
runtime_batch_
=
batch_size
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
c6fb1635
...
@@ -117,10 +117,14 @@ class TensorRTEngine : public EngineBase {
...
@@ -117,10 +117,14 @@ class TensorRTEngine : public EngineBase {
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
void
SetRuntimeBatch
(
size_t
batch_size
);
int
GetRuntimeBatch
();
private:
private:
// the max batch size
// the max batch size
int
max_batch_
;
int
max_batch_
;
// the runtime batch size
static
int
runtime_batch_
;
// the max memory size the engine uses
// the max memory size the engine uses
int
max_workspace_
;
int
max_workspace_
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
c6fb1635
...
@@ -28,7 +28,7 @@ class TensorRTEngineTest : public ::testing::Test {
...
@@ -28,7 +28,7 @@ class TensorRTEngineTest : public ::testing::Test {
protected:
protected:
void
SetUp
()
override
{
void
SetUp
()
override
{
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream_
));
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream_
));
engine_
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream_
);
engine_
=
new
TensorRTEngine
(
1
0
,
1
<<
10
,
&
stream_
);
engine_
->
InitNetwork
();
engine_
->
InitNetwork
();
}
}
...
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
...
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
;
float
y_cpu
;
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
sizeof
(
float
));
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
1
*
sizeof
(
float
));
LOG
(
INFO
)
<<
"to checkout output"
;
LOG
(
INFO
)
<<
"to checkout output"
;
ASSERT_EQ
(
y_cpu
,
x_v
*
2
+
3
);
ASSERT_EQ
(
y_cpu
,
x_v
*
2
+
3
);
...
@@ -103,15 +103,49 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
...
@@ -103,15 +103,49 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
2
*
sizeof
(
float
)
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
}
}
TEST_F
(
TensorRTEngineTest
,
test_conv2d_temp
)
{
// Weight in CPU memory.
float
raw_weight
[
9
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
float
raw_bias
[
1
]
=
{
0
};
TensorRTEngine
::
Weight
weight
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_weight
,
9
);
TensorRTEngine
::
Weight
bias
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_bias
,
1
);
auto
*
x
=
engine_
->
DeclareInput
(
"x"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
Dims3
{
1
,
3
,
3
});
auto
*
conv_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Convolution
,
*
x
,
1
,
nvinfer1
::
DimsHW
{
3
,
3
},
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
conv_layer
!=
nullptr
);
conv_layer
->
setStride
(
nvinfer1
::
DimsHW
{
1
,
1
});
conv_layer
->
setPadding
(
nvinfer1
::
DimsHW
{
1
,
1
});
engine_
->
DeclareOutput
(
conv_layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
ASSERT_EQ
(
engine_
->
engine
()
->
getNbBindings
(),
2
);
float
x_v
[
18
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
engine_
->
SetInputFromCPU
(
"x"
,
reinterpret_cast
<
void
*>
(
&
x_v
),
18
*
sizeof
(
float
));
engine_
->
Execute
(
2
);
LOG
(
INFO
)
<<
"to get output"
;
float
*
y_cpu
=
new
float
[
18
];
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
18
*
sizeof
(
float
));
ASSERT_EQ
(
y_cpu
[
0
],
4.0
);
ASSERT_EQ
(
y_cpu
[
1
],
6.0
);
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
c6fb1635
...
@@ -55,13 +55,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
...
@@ -55,13 +55,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
switch
(
shape
.
size
())
{
case
2
:
case
2
:
return
nvinfer1
::
Dims2
(
shape
[
0
]
,
shape
[
1
]);
return
nvinfer1
::
Dims2
(
1
,
shape
[
1
]);
case
3
:
case
3
:
return
nvinfer1
::
Dims3
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
]);
return
nvinfer1
::
Dims3
(
1
,
shape
[
1
],
shape
[
2
]);
case
4
:
case
4
:
return
nvinfer1
::
Dims4
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
return
nvinfer1
::
Dims4
(
1
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
default:
default:
return
nvinfer1
::
Dims
();
return
nvinfer1
::
Dims
();
}
}
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
c6fb1635
...
@@ -93,13 +93,15 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
...
@@ -93,13 +93,15 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
// TODO(Superjomn) find some way to determine which device to output the
// TODO(Superjomn) find some way to determine which device to output the
// tensor.
// tensor.
// if (platform::is_cpu_place(fluid_t->place())) {
// if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size.
// TODO(Superjomn) change this float to dtype size.
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
FLAGS_tensorrt_engine_batch_size
;
engine
->
GetOutputInCPU
(
y
,
engine
->
GetOutputInCPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
size
*
sizeof
(
float
));
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
c6fb1635
...
@@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) {
...
@@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) {
LOG
(
INFO
)
<<
"create block desc"
;
LOG
(
INFO
)
<<
"create block desc"
;
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
LOG
(
INFO
)
<<
"create
mul
op"
;
LOG
(
INFO
)
<<
"create
fc
op"
;
auto
*
mul
=
block_desc
.
AppendOp
();
auto
*
fc0
=
block_desc
.
AppendOp
();
mul
->
SetType
(
"mul
"
);
fc0
->
SetType
(
"fc
"
);
mul
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 2 x 4
fc0
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 4 x 1 x 1
mul
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
fc0
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
mul
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 2 x 6
fc0
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 6 x 1 x 1
LOG
(
INFO
)
<<
"create fc op"
;
LOG
(
INFO
)
<<
"create fc op"
;
auto
*
fc
=
block_desc
.
AppendOp
();
auto
*
fc
1
=
block_desc
.
AppendOp
();
fc
->
SetType
(
"mul
"
);
fc
1
->
SetType
(
"fc
"
);
fc
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
1
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
1
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 2 x 8
fc
1
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 8 x 1 x 1
// Set inputs' variable shape in BlockDesc
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
// the batch size is 2, so the dims of 'x' is {2, 4, 1, 1}
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
,
1
,
1
}));
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
AddTensorToBlockDesc
(
block_
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
AddTensorToBlockDesc
(
block_
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
// It is wired, need to copy manually.
// It is wired, need to copy manually.
*
block_
->
add_ops
()
=
*
mul
->
Proto
();
*
block_
->
add_ops
()
=
*
fc0
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
1
->
Proto
();
ASSERT_EQ
(
block_
->
ops_size
(),
2
);
ASSERT_EQ
(
block_
->
ops_size
(),
2
);
LOG
(
INFO
)
<<
"create tensorrt desc"
;
LOG
(
INFO
)
<<
"create tensorrt desc"
;
framework
::
OpDesc
engine_op_desc
(
nullptr
);
framework
::
OpDesc
engine_op_desc
(
nullptr
);
engine_op_desc
.
SetType
(
"tensorrt_engine"
);
engine_op_desc
.
SetType
(
"tensorrt_engine"
);
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
,
"y"
,
"y0"
}));
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
block_
->
SerializeAsString
());
block_
->
SerializeAsString
());
...
@@ -207,5 +208,4 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
...
@@ -207,5 +208,4 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
USE_TRT_CONVERTER
(
mul
)
USE_TRT_CONVERTER
(
fc
)
USE_TRT_CONVERTER
(
fc
)
python/paddle/fluid/layers/control_flow.py
浏览文件 @
c6fb1635
...
@@ -23,25 +23,17 @@ from ops import logical_and, logical_not, logical_or
...
@@ -23,25 +23,17 @@ from ops import logical_and, logical_not, logical_or
import
numpy
import
numpy
__all__
=
[
__all__
=
[
'split_lod_tensor'
,
'merge_lod_tensor'
,
'While'
,
'While'
,
'Switch'
,
'Switch'
,
'lod_rank_table'
,
'max_sequence_len'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
'increment'
,
'array_write'
,
'array_write'
,
'create_array'
,
'create_array'
,
'less_than'
,
'less_than'
,
'equal'
,
'equal'
,
'array_read'
,
'array_read'
,
'shrink_memory'
,
'array_length'
,
'array_length'
,
'IfElse'
,
'IfElse'
,
'DynamicRNN'
,
'DynamicRNN'
,
'ConditionalBlock'
,
'StaticRNN'
,
'StaticRNN'
,
'reorder_lod_tensor_by_rank'
,
'reorder_lod_tensor_by_rank'
,
'ParallelDo'
,
'ParallelDo'
,
...
@@ -1457,7 +1449,7 @@ class IfElse(object):
...
@@ -1457,7 +1449,7 @@ class IfElse(object):
if
self
.
status
==
IfElse
.
OUT_IF_ELSE_BLOCKS
:
if
self
.
status
==
IfElse
.
OUT_IF_ELSE_BLOCKS
:
raise
ValueError
(
"input must in true/false blocks"
)
raise
ValueError
(
"input must in true/false blocks"
)
if
id
(
x
)
not
in
self
.
input_table
:
if
id
(
x
)
not
in
self
.
input_table
:
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
out_true
=
parent_block
.
create_var
(
out_true
=
parent_block
.
create_var
(
name
=
unique_name
.
generate
(
'ifelse_input'
+
self
.
helper
.
name
),
name
=
unique_name
.
generate
(
'ifelse_input'
+
self
.
helper
.
name
),
dtype
=
x
.
dtype
)
dtype
=
x
.
dtype
)
...
@@ -1483,7 +1475,7 @@ class IfElse(object):
...
@@ -1483,7 +1475,7 @@ class IfElse(object):
else
:
else
:
return
out_false
return
out_false
def
parent_block
(
self
):
def
_
parent_block
(
self
):
current_block
=
self
.
helper
.
main_program
.
current_block
()
current_block
=
self
.
helper
.
main_program
.
current_block
()
return
self
.
helper
.
main_program
.
block
(
current_block
.
parent_idx
)
return
self
.
helper
.
main_program
.
block
(
current_block
.
parent_idx
)
...
@@ -1499,7 +1491,7 @@ class IfElse(object):
...
@@ -1499,7 +1491,7 @@ class IfElse(object):
out_table
=
self
.
output_table
[
1
if
self
.
status
==
out_table
=
self
.
output_table
[
1
if
self
.
status
==
self
.
IN_IF_ELSE_TRUE_BLOCKS
else
0
]
self
.
IN_IF_ELSE_TRUE_BLOCKS
else
0
]
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
for
each_out
in
outs
:
for
each_out
in
outs
:
if
not
isinstance
(
each_out
,
Variable
):
if
not
isinstance
(
each_out
,
Variable
):
raise
TypeError
(
"Each output should be a variable"
)
raise
TypeError
(
"Each output should be a variable"
)
...
...
python/paddle/fluid/tests/test_if_else_op.py
浏览文件 @
c6fb1635
...
@@ -19,6 +19,10 @@ from paddle.fluid.executor import Executor
...
@@ -19,6 +19,10 @@ from paddle.fluid.executor import Executor
from
paddle.fluid.optimizer
import
MomentumOptimizer
from
paddle.fluid.optimizer
import
MomentumOptimizer
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -34,11 +38,10 @@ class TestMNISTIfElseOp(unittest.TestCase):
...
@@ -34,11 +38,10 @@ class TestMNISTIfElseOp(unittest.TestCase):
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
true_image
,
false_image
=
layers
.
split_lod_tensor
(
true_image
,
false_image
=
split_lod_tensor
(
input
=
image
,
mask
=
cond
)
input
=
image
,
mask
=
cond
)
true_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
true_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
true_cond
=
layers
.
ConditionalBlock
([
cond
])
true_cond
=
ConditionalBlock
([
cond
])
with
true_cond
.
block
():
with
true_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
...
@@ -46,14 +49,14 @@ class TestMNISTIfElseOp(unittest.TestCase):
...
@@ -46,14 +49,14 @@ class TestMNISTIfElseOp(unittest.TestCase):
layers
.
assign
(
input
=
prob
,
output
=
true_out
)
layers
.
assign
(
input
=
prob
,
output
=
true_out
)
false_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
false_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
false_cond
=
layers
.
ConditionalBlock
([
cond
])
false_cond
=
ConditionalBlock
([
cond
])
with
false_cond
.
block
():
with
false_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
layers
.
assign
(
input
=
prob
,
output
=
false_out
)
layers
.
assign
(
input
=
prob
,
output
=
false_out
)
prob
=
layers
.
merge_lod_tensor
(
prob
=
merge_lod_tensor
(
in_true
=
true_out
,
in_false
=
false_out
,
mask
=
cond
,
x
=
image
)
in_true
=
true_out
,
in_false
=
false_out
,
mask
=
cond
,
x
=
image
)
loss
=
layers
.
cross_entropy
(
input
=
prob
,
label
=
label
)
loss
=
layers
.
cross_entropy
(
input
=
prob
,
label
=
label
)
avg_loss
=
layers
.
mean
(
loss
)
avg_loss
=
layers
.
mean
(
loss
)
...
...
python/paddle/fluid/tests/unittests/test_conditional_block.py
浏览文件 @
c6fb1635
...
@@ -18,14 +18,15 @@ import paddle.fluid.core as core
...
@@ -18,14 +18,15 @@ import paddle.fluid.core as core
from
paddle.fluid.framework
import
default_startup_program
,
default_main_program
from
paddle.fluid.framework
import
default_startup_program
,
default_main_program
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
numpy
import
numpy
class
ConditionalBlock
(
unittest
.
TestCase
):
class
ConditionalBlock
Test
(
unittest
.
TestCase
):
def
test_forward
(
self
):
def
test_forward
(
self
):
data
=
layers
.
data
(
name
=
'X'
,
shape
=
[
1
],
dtype
=
'float32'
)
data
=
layers
.
data
(
name
=
'X'
,
shape
=
[
1
],
dtype
=
'float32'
)
data
.
stop_gradient
=
False
data
.
stop_gradient
=
False
cond
=
layers
.
ConditionalBlock
(
inputs
=
[
data
])
cond
=
ConditionalBlock
(
inputs
=
[
data
])
out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
with
cond
.
block
():
with
cond
.
block
():
hidden
=
layers
.
fc
(
input
=
data
,
size
=
10
)
hidden
=
layers
.
fc
(
input
=
data
,
size
=
10
)
...
...
python/paddle/fluid/tests/unittests/test_const_value.py
浏览文件 @
c6fb1635
...
@@ -16,7 +16,7 @@ import unittest
...
@@ -16,7 +16,7 @@ import unittest
import
paddle.fluid.framework
as
framework
import
paddle.fluid.framework
as
framework
class
Con
ditionalBlock
(
unittest
.
TestCase
):
class
Con
stantTest
(
unittest
.
TestCase
):
def
test_const_value
(
self
):
def
test_const_value
(
self
):
self
.
assertEqual
(
framework
.
GRAD_VAR_SUFFIX
,
"@GRAD"
)
self
.
assertEqual
(
framework
.
GRAD_VAR_SUFFIX
,
"@GRAD"
)
self
.
assertEqual
(
framework
.
TEMP_VAR_NAME
,
"@TEMP@"
)
self
.
assertEqual
(
framework
.
TEMP_VAR_NAME
,
"@TEMP@"
)
...
...
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
浏览文件 @
c6fb1635
...
@@ -17,6 +17,12 @@ import paddle
...
@@ -17,6 +17,12 @@ import paddle
import
unittest
import
unittest
import
numpy
import
numpy
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
from
paddle.fluid.layers.control_flow
import
shrink_memory
class
TestDynRNN
(
unittest
.
TestCase
):
class
TestDynRNN
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -38,12 +44,11 @@ class TestDynRNN(unittest.TestCase):
...
@@ -38,12 +44,11 @@ class TestDynRNN(unittest.TestCase):
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'float32'
)
rank_table
=
fluid
.
layers
.
lod_rank_table
(
x
=
sent_emb
)
rank_table
=
lod_rank_table
(
x
=
sent_emb
)
sent_emb_array
=
fluid
.
layers
.
lod_tensor_to_array
(
sent_emb_array
=
lod_tensor_to_array
(
x
=
sent_emb
,
table
=
rank_table
)
x
=
sent_emb
,
table
=
rank_table
)
seq_len
=
fluid
.
layers
.
max_sequence_len
(
rank_table
=
rank_table
)
seq_len
=
max_sequence_len
(
rank_table
=
rank_table
)
i
=
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
i
=
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
i
.
stop_gradient
=
False
i
.
stop_gradient
=
False
...
@@ -66,7 +71,7 @@ class TestDynRNN(unittest.TestCase):
...
@@ -66,7 +71,7 @@ class TestDynRNN(unittest.TestCase):
mem
=
fluid
.
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
mem
=
fluid
.
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
ipt
=
fluid
.
layers
.
array_read
(
array
=
sent_emb_array
,
i
=
i
)
ipt
=
fluid
.
layers
.
array_read
(
array
=
sent_emb_array
,
i
=
i
)
mem
=
fluid
.
layers
.
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
mem
=
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
hidden
=
fluid
.
layers
.
fc
(
input
=
[
mem
,
ipt
],
size
=
100
,
act
=
'tanh'
)
hidden
=
fluid
.
layers
.
fc
(
input
=
[
mem
,
ipt
],
size
=
100
,
act
=
'tanh'
)
...
@@ -75,8 +80,7 @@ class TestDynRNN(unittest.TestCase):
...
@@ -75,8 +80,7 @@ class TestDynRNN(unittest.TestCase):
fluid
.
layers
.
array_write
(
x
=
hidden
,
i
=
i
,
array
=
mem_array
)
fluid
.
layers
.
array_write
(
x
=
hidden
,
i
=
i
,
array
=
mem_array
)
fluid
.
layers
.
less_than
(
x
=
i
,
y
=
seq_len
,
cond
=
cond
)
fluid
.
layers
.
less_than
(
x
=
i
,
y
=
seq_len
,
cond
=
cond
)
all_timesteps
=
fluid
.
layers
.
array_to_lod_tensor
(
all_timesteps
=
array_to_lod_tensor
(
x
=
out
,
table
=
rank_table
)
x
=
out
,
table
=
rank_table
)
last
=
fluid
.
layers
.
sequence_last_step
(
input
=
all_timesteps
)
last
=
fluid
.
layers
.
sequence_last_step
(
input
=
all_timesteps
)
logits
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
1
,
act
=
None
)
logits
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
1
,
act
=
None
)
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
...
...
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
浏览文件 @
c6fb1635
...
@@ -12,7 +12,8 @@
...
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddle.fluid.layers
import
lod_rank_table
,
data
from
paddle.fluid.layers
import
data
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
numpy
import
numpy
...
...
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
浏览文件 @
c6fb1635
...
@@ -20,6 +20,11 @@ from paddle.fluid.framework import Program, program_guard
...
@@ -20,6 +20,11 @@ from paddle.fluid.framework import Program, program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
def
place
(
self
):
def
place
(
self
):
...
@@ -137,13 +142,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
...
@@ -137,13 +142,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
with
program_guard
(
program
):
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
10
])
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
10
])
x
.
persistable
=
True
x
.
persistable
=
True
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
level
)
table
=
lod_rank_table
(
x
,
level
=
level
)
max_len
=
layers
.
max_sequence_len
(
table
)
max_len
=
max_sequence_len
(
table
)
max_len
.
persistable
=
True
max_len
.
persistable
=
True
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
array
=
lod_tensor_to_array
(
x
,
table
)
array
.
persistable
=
True
array
.
persistable
=
True
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
result
.
persistable
=
True
result
.
persistable
=
True
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
...
@@ -181,9 +186,9 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
...
@@ -181,9 +186,9 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
with
program_guard
(
program
):
with
program_guard
(
program
):
x
=
layers
.
data
(
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
1
],
dtype
=
'float32'
,
stop_gradient
=
False
)
name
=
'x'
,
shape
=
[
1
],
dtype
=
'float32'
,
stop_gradient
=
False
)
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
0
)
table
=
lod_rank_table
(
x
,
level
=
0
)
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
array
=
lod_tensor_to_array
(
x
,
table
)
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
mean
=
layers
.
mean
(
result
)
mean
=
layers
.
mean
(
result
)
...
...
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
浏览文件 @
c6fb1635
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
import
unittest
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.fluid.layers.control_flow
import
lod_rank_table
import
numpy
import
numpy
...
@@ -34,7 +35,7 @@ class TestReorderLoDTensor(unittest.TestCase):
...
@@ -34,7 +35,7 @@ class TestReorderLoDTensor(unittest.TestCase):
dat
.
stop_gradient
=
False
dat
.
stop_gradient
=
False
rank_dat
=
fluid
.
layers
.
data
(
rank_dat
=
fluid
.
layers
.
data
(
name
=
cls
.
data_desc
[
1
][
0
],
shape
=
cls
.
data_desc
[
1
][
1
])
name
=
cls
.
data_desc
[
1
][
0
],
shape
=
cls
.
data_desc
[
1
][
1
])
table
=
fluid
.
layers
.
lod_rank_table
(
rank_dat
)
table
=
lod_rank_table
(
rank_dat
)
new_dat
=
fluid
.
layers
.
reorder_lod_tensor_by_rank
(
new_dat
=
fluid
.
layers
.
reorder_lod_tensor_by_rank
(
x
=
dat
,
rank_table
=
table
)
x
=
dat
,
rank_table
=
table
)
loss
=
fluid
.
layers
.
reduce_sum
(
new_dat
)
loss
=
fluid
.
layers
.
reduce_sum
(
new_dat
)
...
...
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
浏览文件 @
c6fb1635
...
@@ -21,6 +21,9 @@ from paddle.fluid.framework import default_main_program, switch_main_program
...
@@ -21,6 +21,9 @@ from paddle.fluid.framework import default_main_program, switch_main_program
from
paddle.fluid.framework
import
Program
from
paddle.fluid.framework
import
Program
import
numpy
as
np
import
numpy
as
np
from
paddle.fluid.layers.control_flow
import
shrink_memory
from
paddle.fluid.layers.control_flow
import
lod_rank_table
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -30,15 +33,15 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
...
@@ -30,15 +33,15 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
rank_table_tensor
=
layers
.
data
(
rank_table_tensor
=
layers
.
data
(
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
table
=
l
ayers
.
l
od_rank_table
(
x
=
rank_table_tensor
)
table
=
lod_rank_table
(
x
=
rank_table_tensor
)
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
self
.
mem1
=
layers
.
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
self
.
mem1
=
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
self
.
mem2
=
layers
.
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
self
.
mem2
=
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
self
.
mem3
=
layers
.
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
self
.
mem3
=
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
mem3_mean
=
layers
.
mean
(
self
.
mem3
)
mem3_mean
=
layers
.
mean
(
self
.
mem3
)
append_backward
(
loss
=
mem3_mean
)
append_backward
(
loss
=
mem3_mean
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
...
...
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
浏览文件 @
c6fb1635
...
@@ -19,6 +19,8 @@ import paddle.fluid.layers as layers
...
@@ -19,6 +19,8 @@ import paddle.fluid.layers as layers
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
...
@@ -96,12 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
...
@@ -96,12 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
])
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
])
y
.
persistable
=
True
y
.
persistable
=
True
out_true
,
out_false
=
layers
.
split_lod_tensor
(
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
input
=
x
,
mask
=
y
,
level
=
level
)
out_true
.
persistable
=
True
out_true
.
persistable
=
True
out_false
.
persistable
=
True
out_false
.
persistable
=
True
out
=
layers
.
merge_lod_tensor
(
out
=
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
out
.
persistable
=
True
out
.
persistable
=
True
...
@@ -142,9 +143,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
...
@@ -142,9 +143,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
level
=
0
level
=
0
out_true
,
out_false
=
layers
.
split_lod_tensor
(
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
input
=
x
,
mask
=
y
,
level
=
level
)
out
=
merge_lod_tensor
(
out
=
layers
.
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
mean
=
layers
.
mean
(
out
)
mean
=
layers
.
mean
(
out
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录