Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c6fb1635
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c6fb1635
编写于
7月 24, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix-mixed-tensor
上级
18d539e8
24bea401
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
178 addition
and
123 deletion
+178
-123
paddle/fluid/API.spec
paddle/fluid/API.spec
+0
-11
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+6
-8
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
+7
-6
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/ut_helper.h
paddle/fluid/inference/tensorrt/convert/ut_helper.h
+5
-5
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+39
-23
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+4
-0
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+37
-3
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+4
-3
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+16
-16
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+3
-11
python/paddle/fluid/tests/test_if_else_op.py
python/paddle/fluid/tests/test_if_else_op.py
+8
-5
python/paddle/fluid/tests/unittests/test_conditional_block.py
...on/paddle/fluid/tests/unittests/test_conditional_block.py
+3
-2
python/paddle/fluid/tests/unittests/test_const_value.py
python/paddle/fluid/tests/unittests/test_const_value.py
+1
-1
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
+11
-7
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
+2
-1
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
...paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
+12
-7
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
...n/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+2
-1
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
...on/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+7
-4
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
...uid/tests/unittests/test_split_and_merge_lod_tensor_op.py
+6
-6
未找到文件。
paddle/fluid/API.spec
浏览文件 @
c6fb1635
...
...
@@ -200,31 +200,23 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.split_lod_tensor ArgSpec(args=['input', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.merge_lod_tensor ArgSpec(args=['in_true', 'in_false', 'x', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_rank_table ArgSpec(args=['x', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.max_sequence_len ArgSpec(args=['rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_tensor_to_array ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_to_lod_tensor ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shrink_memory ArgSpec(args=['x', 'i', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.parent_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
...
...
@@ -233,9 +225,6 @@ paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs',
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.__init__ ArgSpec(args=['self', 'inputs', 'is_scalar_condition', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.ConditionalBlock.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.complete_op ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
c6fb1635
...
...
@@ -32,11 +32,11 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
for
(
int
h
=
0
;
h
<
shape
.
h
();
++
h
)
{
for
(
int
w
=
0
;
w
<
shape
.
w
();
++
w
)
{
odata
[
h
*
ostrides
.
h
()
+
w
*
ostrides
.
w
()]
=
idata
[
h
*
ostrides
.
h
()
+
w
*
o
strides
.
w
()];
idata
[
h
*
istrides
.
h
()
+
w
*
i
strides
.
w
()];
}
}
}
// indata c * k
// Reorder the data layout from CK to KC.
void
ReorderCKtoKC
(
TensorRTEngine
::
Weight
&
iweights
,
TensorRTEngine
::
Weight
*
oweights
)
{
...
...
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
framework
::
LoDTensor
tmp
;
tmp
.
Resize
(
Y_t
->
dims
());
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
Y_t
->
data
<
float
>
(),
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]);
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weight_data
,
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]
*
sizeof
(
float
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
Y_t
->
memory_size
()
/
sizeof
(
float
)};
...
...
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
// The data layout of TRT FC layer's weight is different from fluid's FC,
// need to reorder the elements.
ReorderCKtoKC
(
tmp_weight
,
&
weight
);
ReorderCKtoKC
(
weight
,
&
tmp_
weight
);
// Currently, the framework can only handle one fluid op -> one TRT layer,
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
...
...
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
X
),
n_output
,
weight
.
get
(),
bias
.
get
());
n_output
,
tmp_
weight
.
get
(),
bias
.
get
());
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
...
...
@@ -118,4 +117,3 @@ class FcOpConverter : public OpConverter {
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
fc
,
FcOpConverter
);
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
c6fb1635
...
...
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
浏览文件 @
c6fb1635
...
...
@@ -23,11 +23,11 @@ namespace tensorrt {
TEST
(
fc_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul-Y"
});
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
2
0
,
parameters
,
scope
,
1000
);
validator
.
Decl
InputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
8
,
3
,
1
,
1
));
validator
.
DeclParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
3
,
2
));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
8
,
2
));
TRTConvertValidation
validator
(
1
0
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
1
,
10
,
1
,
1
));
validator
.
Decl
ParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
10
,
2
));
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8
, 2));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
1
,
2
));
// Prepare Op description
framework
::
OpDesc
desc
;
...
...
@@ -38,9 +38,10 @@ TEST(fc_op, test) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
浏览文件 @
c6fb1635
...
...
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/ut_helper.h
浏览文件 @
c6fb1635
...
...
@@ -39,7 +39,7 @@ namespace tensorrt {
float
random
(
float
low
,
float
high
)
{
static
std
::
random_device
rd
;
static
std
::
mt19937
mt
(
rd
());
std
::
uniform_real_distribution
<
double
>
dist
(
1.0
,
10.0
);
std
::
uniform_real_distribution
<
double
>
dist
(
low
,
high
);
return
dist
(
mt
);
}
...
...
@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
auto
*
data
=
tensor
->
mutable_data
<
float
>
(
place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
data
+
i
)
=
random
(
0.
,
1.
);
}
...
...
@@ -68,7 +69,7 @@ class TRTConvertValidation {
int
workspace_size
=
1
<<
10
)
:
parameters_
(
parameters
),
scope_
(
scope
)
{
// create engine.
engine_
.
reset
(
new
TensorRTEngine
(
10
,
1
<<
10
,
&
stream_
));
engine_
.
reset
(
new
TensorRTEngine
(
batch_size
,
workspace_size
,
&
stream_
));
engine_
->
InitNetwork
();
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
...
...
@@ -138,12 +139,11 @@ class TRTConvertValidation {
cudaStreamSynchronize
(
*
engine_
->
stream
());
ASSERT_FALSE
(
op_desc_
->
OutputArgumentNames
().
empty
());
const
size_t
output_space_size
=
200
;
const
size_t
output_space_size
=
200
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
trt_out
(
output_space_size
);
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
output_space_size
*
sizeof
(
float
));
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
output_space_size
);
cudaStreamSynchronize
(
*
engine_
->
stream
());
auto
*
var
=
scope_
.
FindVar
(
output
);
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
c6fb1635
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use
this file except in compliance with the License.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use
this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
...
...
@@ -26,6 +26,8 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
PADDLE_ENFORCE
(
false
,
"not implemented"
);
}
...
...
@@ -42,6 +44,7 @@ void TensorRTEngine::Execute(int batch_size) {
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
cudaStreamSynchronize
(
*
stream_
);
SetRuntimeBatch
(
batch_size
);
}
TensorRTEngine
::~
TensorRTEngine
()
{
...
...
@@ -80,17 +83,17 @@ void TensorRTEngine::FreezeNetwork() {
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
}
auto
&
buf
=
buffer
(
item
.
first
);
buf
.
max_size
=
item
.
second
*
max_batch_
;
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
buf
.
max_size
));
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
// buf.size will changed in the runtime.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
item
.
second
*
max_batch_
));
buf
.
size
=
0
;
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
buf
.
device
=
DeviceType
::
GPU
;
}
}
...
...
@@ -105,7 +108,7 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
TensorRTEngine
::
SetITensor
(
name
,
input
);
return
input
;
...
...
@@ -149,35 +152,42 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
cudaMemcpyDeviceToDevice
,
*
stream_
),
0
);
}
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
VLOG
(
4
)
<<
"get output in cpu"
;
auto
&
buf
=
buffer
(
name
);
// Update needed buffer size.
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
name
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
buf
.
size
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_LE
(
buf
.
size
,
buf
.
max_size
);
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
// DEBUG
memset
(
dst
,
0
,
buf
.
size
);
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpy
(
dst
,
buf
.
buffer
,
buf
.
size
,
cudaMemcpyDeviceToHost
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
cudaMemcpyDeviceToHost
,
*
stream_
));
}
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
...
...
@@ -225,6 +235,12 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
return
itensor_map_
[
name
];
}
void
TensorRTEngine
::
SetRuntimeBatch
(
size_t
batch_size
)
{
runtime_batch_
=
batch_size
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
c6fb1635
...
...
@@ -117,10 +117,14 @@ class TensorRTEngine : public EngineBase {
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
void
SetRuntimeBatch
(
size_t
batch_size
);
int
GetRuntimeBatch
();
private:
// the max batch size
int
max_batch_
;
// the runtime batch size
static
int
runtime_batch_
;
// the max memory size the engine uses
int
max_workspace_
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
c6fb1635
...
...
@@ -28,7 +28,7 @@ class TensorRTEngineTest : public ::testing::Test {
protected:
void
SetUp
()
override
{
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream_
));
engine_
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream_
);
engine_
=
new
TensorRTEngine
(
1
0
,
1
<<
10
,
&
stream_
);
engine_
->
InitNetwork
();
}
...
...
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
;
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
sizeof
(
float
));
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
1
*
sizeof
(
float
));
LOG
(
INFO
)
<<
"to checkout output"
;
ASSERT_EQ
(
y_cpu
,
x_v
*
2
+
3
);
...
...
@@ -103,15 +103,49 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
2
*
sizeof
(
float
)
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
}
TEST_F
(
TensorRTEngineTest
,
test_conv2d_temp
)
{
// Weight in CPU memory.
float
raw_weight
[
9
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
float
raw_bias
[
1
]
=
{
0
};
TensorRTEngine
::
Weight
weight
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_weight
,
9
);
TensorRTEngine
::
Weight
bias
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_bias
,
1
);
auto
*
x
=
engine_
->
DeclareInput
(
"x"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
Dims3
{
1
,
3
,
3
});
auto
*
conv_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Convolution
,
*
x
,
1
,
nvinfer1
::
DimsHW
{
3
,
3
},
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
conv_layer
!=
nullptr
);
conv_layer
->
setStride
(
nvinfer1
::
DimsHW
{
1
,
1
});
conv_layer
->
setPadding
(
nvinfer1
::
DimsHW
{
1
,
1
});
engine_
->
DeclareOutput
(
conv_layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
ASSERT_EQ
(
engine_
->
engine
()
->
getNbBindings
(),
2
);
float
x_v
[
18
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
engine_
->
SetInputFromCPU
(
"x"
,
reinterpret_cast
<
void
*>
(
&
x_v
),
18
*
sizeof
(
float
));
engine_
->
Execute
(
2
);
LOG
(
INFO
)
<<
"to get output"
;
float
*
y_cpu
=
new
float
[
18
];
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
18
*
sizeof
(
float
));
ASSERT_EQ
(
y_cpu
[
0
],
4.0
);
ASSERT_EQ
(
y_cpu
[
1
],
6.0
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
c6fb1635
...
...
@@ -55,13 +55,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
case
2
:
return
nvinfer1
::
Dims2
(
shape
[
0
]
,
shape
[
1
]);
return
nvinfer1
::
Dims2
(
1
,
shape
[
1
]);
case
3
:
return
nvinfer1
::
Dims3
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
]);
return
nvinfer1
::
Dims3
(
1
,
shape
[
1
],
shape
[
2
]);
case
4
:
return
nvinfer1
::
Dims4
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
return
nvinfer1
::
Dims4
(
1
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
default:
return
nvinfer1
::
Dims
();
}
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
c6fb1635
...
...
@@ -93,13 +93,15 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
// TODO(Superjomn) find some way to determine which device to output the
// tensor.
// if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size.
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
FLAGS_tensorrt_engine_batch_size
;
engine
->
GetOutputInCPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
c6fb1635
...
...
@@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) {
LOG
(
INFO
)
<<
"create block desc"
;
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
LOG
(
INFO
)
<<
"create
mul
op"
;
auto
*
mul
=
block_desc
.
AppendOp
();
mul
->
SetType
(
"mul
"
);
mul
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 2 x 4
mul
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
mul
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 2 x 6
LOG
(
INFO
)
<<
"create
fc
op"
;
auto
*
fc0
=
block_desc
.
AppendOp
();
fc0
->
SetType
(
"fc
"
);
fc0
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 4 x 1 x 1
fc0
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
fc0
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 6 x 1 x 1
LOG
(
INFO
)
<<
"create fc op"
;
auto
*
fc
=
block_desc
.
AppendOp
();
fc
->
SetType
(
"mul
"
);
fc
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 2 x 8
auto
*
fc
1
=
block_desc
.
AppendOp
();
fc
1
->
SetType
(
"fc
"
);
fc
1
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
1
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
1
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 8 x 1 x 1
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
// the batch size is 2, so the dims of 'x' is {2, 4, 1, 1}
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
,
1
,
1
}));
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
AddTensorToBlockDesc
(
block_
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
// It is wired, need to copy manually.
*
block_
->
add_ops
()
=
*
mul
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
->
Proto
();
*
block_
->
add_ops
()
=
*
fc0
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
1
->
Proto
();
ASSERT_EQ
(
block_
->
ops_size
(),
2
);
LOG
(
INFO
)
<<
"create tensorrt desc"
;
framework
::
OpDesc
engine_op_desc
(
nullptr
);
engine_op_desc
.
SetType
(
"tensorrt_engine"
);
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
,
"y"
,
"y0"
}));
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
block_
->
SerializeAsString
());
...
...
@@ -207,5 +208,4 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
}
// namespace operators
}
// namespace paddle
USE_TRT_CONVERTER
(
mul
)
USE_TRT_CONVERTER
(
fc
)
python/paddle/fluid/layers/control_flow.py
浏览文件 @
c6fb1635
...
...
@@ -23,25 +23,17 @@ from ops import logical_and, logical_not, logical_or
import
numpy
__all__
=
[
'split_lod_tensor'
,
'merge_lod_tensor'
,
'While'
,
'Switch'
,
'lod_rank_table'
,
'max_sequence_len'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
'array_write'
,
'create_array'
,
'less_than'
,
'equal'
,
'array_read'
,
'shrink_memory'
,
'array_length'
,
'IfElse'
,
'DynamicRNN'
,
'ConditionalBlock'
,
'StaticRNN'
,
'reorder_lod_tensor_by_rank'
,
'ParallelDo'
,
...
...
@@ -1457,7 +1449,7 @@ class IfElse(object):
if
self
.
status
==
IfElse
.
OUT_IF_ELSE_BLOCKS
:
raise
ValueError
(
"input must in true/false blocks"
)
if
id
(
x
)
not
in
self
.
input_table
:
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
out_true
=
parent_block
.
create_var
(
name
=
unique_name
.
generate
(
'ifelse_input'
+
self
.
helper
.
name
),
dtype
=
x
.
dtype
)
...
...
@@ -1483,7 +1475,7 @@ class IfElse(object):
else
:
return
out_false
def
parent_block
(
self
):
def
_
parent_block
(
self
):
current_block
=
self
.
helper
.
main_program
.
current_block
()
return
self
.
helper
.
main_program
.
block
(
current_block
.
parent_idx
)
...
...
@@ -1499,7 +1491,7 @@ class IfElse(object):
out_table
=
self
.
output_table
[
1
if
self
.
status
==
self
.
IN_IF_ELSE_TRUE_BLOCKS
else
0
]
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
for
each_out
in
outs
:
if
not
isinstance
(
each_out
,
Variable
):
raise
TypeError
(
"Each output should be a variable"
)
...
...
python/paddle/fluid/tests/test_if_else_op.py
浏览文件 @
c6fb1635
...
...
@@ -19,6 +19,10 @@ from paddle.fluid.executor import Executor
from
paddle.fluid.optimizer
import
MomentumOptimizer
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
unittest
import
numpy
as
np
...
...
@@ -34,11 +38,10 @@ class TestMNISTIfElseOp(unittest.TestCase):
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
true_image
,
false_image
=
layers
.
split_lod_tensor
(
input
=
image
,
mask
=
cond
)
true_image
,
false_image
=
split_lod_tensor
(
input
=
image
,
mask
=
cond
)
true_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
true_cond
=
layers
.
ConditionalBlock
([
cond
])
true_cond
=
ConditionalBlock
([
cond
])
with
true_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
...
...
@@ -46,14 +49,14 @@ class TestMNISTIfElseOp(unittest.TestCase):
layers
.
assign
(
input
=
prob
,
output
=
true_out
)
false_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
false_cond
=
layers
.
ConditionalBlock
([
cond
])
false_cond
=
ConditionalBlock
([
cond
])
with
false_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
layers
.
assign
(
input
=
prob
,
output
=
false_out
)
prob
=
layers
.
merge_lod_tensor
(
prob
=
merge_lod_tensor
(
in_true
=
true_out
,
in_false
=
false_out
,
mask
=
cond
,
x
=
image
)
loss
=
layers
.
cross_entropy
(
input
=
prob
,
label
=
label
)
avg_loss
=
layers
.
mean
(
loss
)
...
...
python/paddle/fluid/tests/unittests/test_conditional_block.py
浏览文件 @
c6fb1635
...
...
@@ -18,14 +18,15 @@ import paddle.fluid.core as core
from
paddle.fluid.framework
import
default_startup_program
,
default_main_program
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
numpy
class
ConditionalBlock
(
unittest
.
TestCase
):
class
ConditionalBlock
Test
(
unittest
.
TestCase
):
def
test_forward
(
self
):
data
=
layers
.
data
(
name
=
'X'
,
shape
=
[
1
],
dtype
=
'float32'
)
data
.
stop_gradient
=
False
cond
=
layers
.
ConditionalBlock
(
inputs
=
[
data
])
cond
=
ConditionalBlock
(
inputs
=
[
data
])
out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
with
cond
.
block
():
hidden
=
layers
.
fc
(
input
=
data
,
size
=
10
)
...
...
python/paddle/fluid/tests/unittests/test_const_value.py
浏览文件 @
c6fb1635
...
...
@@ -16,7 +16,7 @@ import unittest
import
paddle.fluid.framework
as
framework
class
Con
ditionalBlock
(
unittest
.
TestCase
):
class
Con
stantTest
(
unittest
.
TestCase
):
def
test_const_value
(
self
):
self
.
assertEqual
(
framework
.
GRAD_VAR_SUFFIX
,
"@GRAD"
)
self
.
assertEqual
(
framework
.
TEMP_VAR_NAME
,
"@TEMP@"
)
...
...
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
浏览文件 @
c6fb1635
...
...
@@ -17,6 +17,12 @@ import paddle
import
unittest
import
numpy
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
from
paddle.fluid.layers.control_flow
import
shrink_memory
class
TestDynRNN
(
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -38,12 +44,11 @@ class TestDynRNN(unittest.TestCase):
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'float32'
)
rank_table
=
fluid
.
layers
.
lod_rank_table
(
x
=
sent_emb
)
rank_table
=
lod_rank_table
(
x
=
sent_emb
)
sent_emb_array
=
fluid
.
layers
.
lod_tensor_to_array
(
x
=
sent_emb
,
table
=
rank_table
)
sent_emb_array
=
lod_tensor_to_array
(
x
=
sent_emb
,
table
=
rank_table
)
seq_len
=
fluid
.
layers
.
max_sequence_len
(
rank_table
=
rank_table
)
seq_len
=
max_sequence_len
(
rank_table
=
rank_table
)
i
=
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
i
.
stop_gradient
=
False
...
...
@@ -66,7 +71,7 @@ class TestDynRNN(unittest.TestCase):
mem
=
fluid
.
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
ipt
=
fluid
.
layers
.
array_read
(
array
=
sent_emb_array
,
i
=
i
)
mem
=
fluid
.
layers
.
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
mem
=
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
hidden
=
fluid
.
layers
.
fc
(
input
=
[
mem
,
ipt
],
size
=
100
,
act
=
'tanh'
)
...
...
@@ -75,8 +80,7 @@ class TestDynRNN(unittest.TestCase):
fluid
.
layers
.
array_write
(
x
=
hidden
,
i
=
i
,
array
=
mem_array
)
fluid
.
layers
.
less_than
(
x
=
i
,
y
=
seq_len
,
cond
=
cond
)
all_timesteps
=
fluid
.
layers
.
array_to_lod_tensor
(
x
=
out
,
table
=
rank_table
)
all_timesteps
=
array_to_lod_tensor
(
x
=
out
,
table
=
rank_table
)
last
=
fluid
.
layers
.
sequence_last_step
(
input
=
all_timesteps
)
logits
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
1
,
act
=
None
)
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
...
...
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
浏览文件 @
c6fb1635
...
...
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.fluid.layers
import
lod_rank_table
,
data
from
paddle.fluid.layers
import
data
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.executor
import
Executor
import
paddle.fluid.core
as
core
import
numpy
...
...
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
浏览文件 @
c6fb1635
...
...
@@ -20,6 +20,11 @@ from paddle.fluid.framework import Program, program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
def
place
(
self
):
...
...
@@ -137,13 +142,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
10
])
x
.
persistable
=
True
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
level
)
max_len
=
layers
.
max_sequence_len
(
table
)
table
=
lod_rank_table
(
x
,
level
=
level
)
max_len
=
max_sequence_len
(
table
)
max_len
.
persistable
=
True
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
array
=
lod_tensor_to_array
(
x
,
table
)
array
.
persistable
=
True
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
result
.
persistable
=
True
exe
=
Executor
(
place
)
scope
=
core
.
Scope
()
...
...
@@ -181,9 +186,9 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
1
],
dtype
=
'float32'
,
stop_gradient
=
False
)
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
0
)
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
table
=
lod_rank_table
(
x
,
level
=
0
)
array
=
lod_tensor_to_array
(
x
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
mean
=
layers
.
mean
(
result
)
...
...
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
浏览文件 @
c6fb1635
...
...
@@ -15,6 +15,7 @@
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.layers.control_flow
import
lod_rank_table
import
numpy
...
...
@@ -34,7 +35,7 @@ class TestReorderLoDTensor(unittest.TestCase):
dat
.
stop_gradient
=
False
rank_dat
=
fluid
.
layers
.
data
(
name
=
cls
.
data_desc
[
1
][
0
],
shape
=
cls
.
data_desc
[
1
][
1
])
table
=
fluid
.
layers
.
lod_rank_table
(
rank_dat
)
table
=
lod_rank_table
(
rank_dat
)
new_dat
=
fluid
.
layers
.
reorder_lod_tensor_by_rank
(
x
=
dat
,
rank_table
=
table
)
loss
=
fluid
.
layers
.
reduce_sum
(
new_dat
)
...
...
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
浏览文件 @
c6fb1635
...
...
@@ -21,6 +21,9 @@ from paddle.fluid.framework import default_main_program, switch_main_program
from
paddle.fluid.framework
import
Program
import
numpy
as
np
from
paddle.fluid.layers.control_flow
import
shrink_memory
from
paddle.fluid.layers.control_flow
import
lod_rank_table
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -30,15 +33,15 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
x
.
stop_gradient
=
False
rank_table_tensor
=
layers
.
data
(
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
table
=
l
ayers
.
l
od_rank_table
(
x
=
rank_table_tensor
)
table
=
lod_rank_table
(
x
=
rank_table_tensor
)
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
self
.
mem1
=
layers
.
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
self
.
mem1
=
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
self
.
mem2
=
layers
.
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
self
.
mem2
=
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
self
.
mem3
=
layers
.
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
self
.
mem3
=
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
mem3_mean
=
layers
.
mean
(
self
.
mem3
)
append_backward
(
loss
=
mem3_mean
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
...
...
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
浏览文件 @
c6fb1635
...
...
@@ -19,6 +19,8 @@ import paddle.fluid.layers as layers
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
...
...
@@ -96,12 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
])
y
.
persistable
=
True
out_true
,
out_false
=
layers
.
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
out_true
.
persistable
=
True
out_false
.
persistable
=
True
out
=
layers
.
merge_lod_tensor
(
out
=
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
out
.
persistable
=
True
...
...
@@ -142,9 +143,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
level
=
0
out_true
,
out_false
=
layers
.
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
out
=
layers
.
merge_lod_tensor
(
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
out
=
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
mean
=
layers
.
mean
(
out
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录