Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
bfd42683
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bfd42683
编写于
6月 07, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine
上级
0693b414
635099c1
变更
46
隐藏空白更改
内联
并排
Showing
46 changed file
with
1451 addition
and
556 deletion
+1451
-556
benchmark/fluid/Dockerfile
benchmark/fluid/Dockerfile
+1
-1
benchmark/fluid/README.md
benchmark/fluid/README.md
+10
-0
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+87
-28
benchmark/fluid/models/machine_translation.py
benchmark/fluid/models/machine_translation.py
+3
-1
benchmark/fluid/models/mnist.py
benchmark/fluid/models/mnist.py
+20
-4
benchmark/fluid/models/resnet.py
benchmark/fluid/models/resnet.py
+44
-15
benchmark/fluid/models/stacked_dynamic_lstm.py
benchmark/fluid/models/stacked_dynamic_lstm.py
+4
-1
benchmark/fluid/models/vgg.py
benchmark/fluid/models/vgg.py
+20
-4
benchmark/fluid/recordio_converter.py
benchmark/fluid/recordio_converter.py
+164
-0
cmake/configure.cmake
cmake/configure.cmake
+3
-0
paddle/fluid/framework/data_layout.h
paddle/fluid/framework/data_layout.h
+6
-1
paddle/fluid/framework/data_layout_transform.cc
paddle/fluid/framework/data_layout_transform.cc
+83
-0
paddle/fluid/framework/data_layout_transform.h
paddle/fluid/framework/data_layout_transform.h
+45
-0
paddle/fluid/framework/data_transform.cc
paddle/fluid/framework/data_transform.cc
+26
-3
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+2
-1
paddle/fluid/framework/details/fuse_vars_op_handle.cc
paddle/fluid/framework/details/fuse_vars_op_handle.cc
+51
-0
paddle/fluid/framework/details/fuse_vars_op_handle.h
paddle/fluid/framework/details/fuse_vars_op_handle.h
+63
-0
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+19
-10
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+10
-0
paddle/fluid/framework/details/op_handle_base.h
paddle/fluid/framework/details/op_handle_base.h
+2
-0
paddle/fluid/framework/details/ssa_graph_printer.h
paddle/fluid/framework/details/ssa_graph_printer.h
+1
-1
paddle/fluid/framework/op_kernel_type.h
paddle/fluid/framework/op_kernel_type.h
+8
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+9
-2
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+25
-7
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+26
-2
paddle/fluid/framework/tensor_test.cc
paddle/fluid/framework/tensor_test.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+2
-5
paddle/fluid/inference/tensorrt/convert/activation_op.cc
paddle/fluid/inference/tensorrt/convert/activation_op.cc
+8
-2
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+28
-87
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+3
-1
paddle/fluid/operators/batch_norm_op.cc
paddle/fluid/operators/batch_norm_op.cc
+8
-5
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+11
-6
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+2
-2
paddle/fluid/operators/lrn_op.cc
paddle/fluid/operators/lrn_op.cc
+4
-3
paddle/fluid/operators/pool_mkldnn_op.cc
paddle/fluid/operators/pool_mkldnn_op.cc
+7
-4
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+8
-4
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+6
-3
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+13
-0
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+2
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+16
-9
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+1
-1
python/paddle/fluid/tests/unittests/benchmark.py
python/paddle/fluid/tests/unittests/benchmark.py
+113
-0
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
+82
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+124
-240
python/paddle/fluid/tests/unittests/test_lstm_op.py
python/paddle/fluid/tests/unittests/test_lstm_op.py
+98
-101
python/paddle/fluid/tests/unittests/testsuite.py
python/paddle/fluid/tests/unittests/testsuite.py
+182
-0
未找到文件。
benchmark/fluid/Dockerfile
浏览文件 @
bfd42683
...
...
@@ -19,4 +19,4 @@ ADD *.whl /
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
&&
chmod
+x /usr/bin/paddle_k8s
ENV
LD_LIBRARY_PATH=/usr/local/lib
ADD
fluid_benchmark.py
dataset
.py models/ /workspace/
ADD
fluid_benchmark.py
recordio_converter
.py models/ /workspace/
benchmark/fluid/README.md
浏览文件 @
bfd42683
...
...
@@ -44,6 +44,16 @@ Currently supported `--model` argument include:
PADDLE_PSERVER_PORT
=
7164
PADDLE_TRAINER_IPS
=
192.168.0.2,192.168.0.3
PADDLE_CURRENT_IP
=
127.0.0.1
PADDLE_TRAINER_ID
=
0 python fluid_benchmark.py
--model
mnist
--device
GPU
--update_method
nccl2
```
## Prepare the RecordIO file to Achieve Better Performance
Run the following command will generate RecordIO files like "mnist.recordio" under the path
and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
at any time using
`fluid.batch`
.
```
bash
python
-c
'from recordio_converter import *; prepare_mnist("data", 1)'
```
## Run Distributed Benchmark on Kubernetes Cluster
You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will
...
...
benchmark/fluid/fluid_benchmark.py
浏览文件 @
bfd42683
...
...
@@ -38,10 +38,12 @@ def parse_args():
default
=
'resnet'
,
help
=
'The model to run benchmark with.'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size.'
)
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The batch size on each gpu.'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.001
,
help
=
'The learning rate.'
)
# TODO(wuyi): add "--use_fake_data" option back.
parser
.
add_argument
(
'--skip_batch_num'
,
type
=
int
,
...
...
@@ -49,7 +51,10 @@ def parse_args():
help
=
'The first num of minibatch num to skip, for better performance test'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches.'
)
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches, set to -1 to run all batches.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
100
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
...
...
@@ -69,6 +74,7 @@ def parse_args():
type
=
int
,
default
=
1
,
help
=
'If gpus > 1, will use ParallelExecutor to run, else use Executor.'
)
# this option is available only for vgg and resnet.
parser
.
add_argument
(
'--cpus'
,
type
=
int
,
...
...
@@ -78,7 +84,7 @@ def parse_args():
'--data_set'
,
type
=
str
,
default
=
'flowers'
,
choices
=
[
'cifar10'
,
'flowers'
],
choices
=
[
'cifar10'
,
'flowers'
,
'imagenet'
],
help
=
'Optional dataset for benchmark.'
)
parser
.
add_argument
(
'--infer_only'
,
action
=
'store_true'
,
help
=
'If set, run forward only.'
)
...
...
@@ -108,6 +114,16 @@ def parse_args():
default
=
'local'
,
choices
=
[
'local'
,
'pserver'
,
'nccl2'
],
help
=
'Choose parameter update method, can be local, pserver, nccl2.'
)
parser
.
add_argument
(
'--use_reader_op'
,
action
=
'store_true'
,
help
=
'Whether to use reader op, and must specify the data path if set this to true.'
)
parser
.
add_argument
(
'--data_path'
,
type
=
str
,
default
=
""
,
help
=
'Directory that contains all the training recordio files.'
)
args
=
parser
.
parse_args
()
return
args
...
...
@@ -210,26 +226,50 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
]
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
if
not
args
.
use_reader_op
:
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
]
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
iters
,
num_samples
,
start_time
=
0
,
0
,
time
.
time
()
for
pass_id
in
range
(
args
.
pass_num
):
train_losses
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
not
args
.
use_reader_op
:
reader_generator
=
train_reader
()
batch_id
=
0
data
=
None
while
True
:
if
not
args
.
use_reader_op
:
data
=
next
(
reader_generator
,
None
)
if
data
==
None
:
break
if
iters
==
args
.
iterations
:
break
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
break
loss
=
exe
.
run
(
train_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_loss
])
if
args
.
use_reader_op
:
try
:
loss
=
exe
.
run
(
train_prog
,
fetch_list
=
[
avg_loss
])
except
fluid
.
core
.
EnforceNotMet
as
ex
:
break
else
:
loss
=
exe
.
run
(
train_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_loss
])
iters
+=
1
num_samples
+=
len
(
data
)
batch_id
+=
1
# FIXME(wuyi): For use_reader_op, if the current
# pass is not the last, the last batch of this pass
# is also equal to args.batch_size.
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
num_samples
+=
len
(
data
)
train_losses
.
append
(
loss
)
print
(
"Pass: %d, Iter: %d, Loss: %f
\n
"
%
(
pass_id
,
iters
,
np
.
mean
(
train_losses
)))
...
...
@@ -250,10 +290,14 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
def
train_parallel
(
avg_loss
,
infer_prog
,
optimizer
,
train_reader
,
test_reader
,
batch_acc
,
args
,
train_prog
,
startup_prog
,
nccl_id_var
,
num_trainers
,
trainer_id
):
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
]
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
if
not
args
.
use_reader_op
:
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
]
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
# generate fake:
if
args
.
use_fake_data
:
for
var
in
feed_var_list
:
...
...
@@ -270,7 +314,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
"value"
:
1.0
,
"dtype"
:
var
.
dtype
})
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
if
nccl_id_var
and
trainer_id
==
0
:
#FIXME(wuyi): wait other trainer to start listening
time
.
sleep
(
30
)
...
...
@@ -287,12 +330,21 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
num_trainers
=
num_trainers
,
trainer_id
=
trainer_id
)
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
for
pass_id
in
range
(
args
.
pass_num
):
num_samples
=
0
iters
=
0
start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
not
args
.
use_reader_op
:
reader_generator
=
train_reader
()
batch_id
=
0
data
=
None
while
True
:
if
not
args
.
use_reader_op
:
data
=
next
(
reader_generator
,
None
)
if
data
==
None
:
break
if
iters
==
args
.
iterations
:
break
if
args
.
profile
and
pass_id
==
0
and
batch_id
==
5
:
profiler
.
start_profiler
(
"All"
)
elif
args
.
profile
and
pass_id
==
0
and
batch_id
==
10
:
...
...
@@ -301,19 +353,26 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
break
if
args
.
use_fake_data
:
loss
,
=
exe
.
run
([
avg_loss
.
name
])
if
args
.
use_fake_data
or
args
.
use_reader_op
:
try
:
loss
,
=
exe
.
run
([
avg_loss
.
name
])
except
fluid
.
core
.
EnforceNotMet
as
ex
:
break
else
:
loss
,
=
exe
.
run
([
avg_loss
.
name
],
feed
=
feeder
.
feed
(
data
))
if
args
.
update_method
==
"pserver"
:
exe
.
bcast_params
()
num_samples
+=
len
(
data
)
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
num_samples
+=
len
(
data
)
iters
+=
1
if
batch_id
%
1
==
0
:
print
(
"Pass %d, batch %d, loss %s"
%
(
pass_id
,
batch_id
,
np
.
array
(
loss
)))
batch_id
+=
1
if
args
.
use_reader_op
:
num_samples
=
num_samples
*
args
.
gpus
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
if
not
args
.
no_test
and
batch_acc
:
test_acc
=
test
(
startup_exe
,
infer_prog
,
test_reader
,
feeder
,
...
...
benchmark/fluid/models/machine_translation.py
浏览文件 @
bfd42683
...
...
@@ -197,6 +197,8 @@ def lodtensor_to_ndarray(lod_tensor):
def
get_model
(
args
):
if
args
.
use_reader_op
:
raise
Exception
(
"machine_translation do not support reader op for now."
)
embedding_dim
=
512
encoder_size
=
512
decoder_size
=
512
...
...
@@ -221,7 +223,7 @@ def get_model(args):
train_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
dict_size
),
buf_size
=
1000
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
...
...
benchmark/fluid/models/mnist.py
浏览文件 @
bfd42683
...
...
@@ -20,6 +20,7 @@ import numpy as np
import
argparse
import
time
import
cProfile
import
os
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -65,9 +66,24 @@ def cnn_model(data):
def
get_model
(
args
):
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
,
1
,
28
,
28
],
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
images
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
args
.
device
==
'CPU'
and
args
.
cpus
>
1
:
places
=
fluid
.
layers
.
get_places
(
args
.
cpus
)
...
...
@@ -103,7 +119,7 @@ def get_model(args):
# Reader
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
args
.
batch_size
)
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
opt
,
train_reader
,
test_reader
,
batch_acc
benchmark/fluid/models/resnet.py
浏览文件 @
bfd42683
...
...
@@ -19,6 +19,7 @@ from __future__ import print_function
import
functools
import
numpy
as
np
import
time
import
os
import
cProfile
,
pstats
,
StringIO
...
...
@@ -26,6 +27,7 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.profiler
as
profiler
from
recordio_converter
import
imagenet_train
,
imagenet_test
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
...
...
@@ -122,16 +124,48 @@ def get_model(args):
else
:
dshape
=
[
32
,
32
,
3
]
model
=
resnet_cifar10
else
:
train_reader
=
paddle
.
dataset
.
cifar
.
train10
()
test_reader
=
paddle
.
dataset
.
cifar
.
test10
()
elif
args
.
data_set
==
"flowers"
:
class_dim
=
102
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
224
,
224
]
else
:
dshape
=
[
224
,
224
,
3
]
model
=
resnet_imagenet
input
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
train_reader
=
paddle
.
dataset
.
flowers
.
train
()
test_reader
=
paddle
.
dataset
.
flowers
.
test
()
elif
args
.
data_set
==
"imagenet"
:
class_dim
=
1000
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
224
,
224
]
else
:
dshape
=
[
224
,
224
,
3
]
model
=
resnet_imagenet
if
not
args
.
data_path
:
raise
Exception
(
"Must specify --data_path when training with imagenet"
)
train_reader
=
imagenet_train
(
args
.
data_path
)
test_reader
=
imagenet_test
(
args
.
data_path
)
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
]
+
dshape
,
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
input
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
input
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
args
.
device
==
'CPU'
and
args
.
cpus
>
1
:
places
=
fluid
.
layers
.
get_places
(
args
.
cpus
)
...
...
@@ -162,15 +196,10 @@ def get_model(args):
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
train_reader
=
paddle
.
batch
(
batched_
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
optimizer
,
train_reader
,
test_reader
,
batch_acc
train_reader
,
buf_size
=
5120
),
batch_size
=
args
.
batch_size
*
args
.
gpus
)
batched_test_reader
=
paddle
.
batch
(
train_reader
,
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
optimizer
,
batched_train_reader
,
batched_test_reader
,
batch_acc
benchmark/fluid/models/stacked_dynamic_lstm.py
浏览文件 @
bfd42683
...
...
@@ -44,6 +44,9 @@ def crop_sentence(reader, crop_size):
def
get_model
(
args
):
if
args
.
use_reader_op
:
raise
Exception
(
"stacked_dynamic_lstm do not support reader op for now."
)
lstm_size
=
512
emb_dim
=
512
crop_size
=
1500
...
...
@@ -114,7 +117,7 @@ def get_model(args):
train_reader
=
batch
(
paddle
.
reader
.
shuffle
(
crop_sentence
(
imdb
.
train
(
word_dict
),
crop_size
),
buf_size
=
25000
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
batch
(
paddle
.
reader
.
shuffle
(
crop_sentence
(
imdb
.
test
(
word_dict
),
crop_size
),
buf_size
=
25000
),
...
...
benchmark/fluid/models/vgg.py
浏览文件 @
bfd42683
...
...
@@ -22,6 +22,7 @@ import paddle.fluid as fluid
import
paddle.fluid.core
as
core
import
argparse
import
functools
import
os
def
vgg16_bn_drop
(
input
):
...
...
@@ -65,9 +66,24 @@ def get_model(args):
else
:
data_shape
=
[
224
,
224
,
3
]
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
]
+
data_shape
,
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
images
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
images
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
net
=
vgg16_bn_drop
(
images
)
...
...
@@ -95,7 +111,7 @@ def get_model(args):
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
...
...
benchmark/fluid/recordio_converter.py
0 → 100644
浏览文件 @
bfd42683
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
random
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.dataset
import
mnist
,
cifar
,
flowers
,
image
def
convert_2_recordio
(
py_reader
,
outfilepath
,
batch_size
,
shape_data
,
shape_label
):
num_batches
=
0
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
reader
=
paddle
.
batch
(
py_reader
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
# order is image and label
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
shape_data
),
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
shape_label
,
dtype
=
'int64'
),
],
place
=
fluid
.
CPUPlace
())
num_batches
=
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
outfilepath
,
reader
,
feeder
)
return
num_batches
def
prepare_mnist
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"mnist.recordio"
)
convert_2_recordio
(
mnist
.
train
,
outfilepath
,
batch_size
,
[
784
],
[
1
])
def
prepare_cifar10
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"cifar.recordio"
)
convert_2_recordio
(
cifar
.
train10
,
outfilepath
,
batch_size
,
[
3
,
32
,
32
],
[
1
])
def
prepare_flowers
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"flowers.recordio"
)
convert_2_recordio
(
flowers
.
train
,
outfilepath
,
batch_size
,
[
3
,
224
,
224
],
[
1
])
def
default_mapper
(
sample
):
img
,
label
=
sample
img
=
image
.
simple_transform
(
img
,
256
,
224
,
True
,
mean
=
[
103.94
,
116.78
,
123.68
])
return
img
.
flatten
().
astype
(
'float32'
),
label
def
imagenet_train
(
data_dir
):
contents
=
os
.
listdir
(
data_dir
)
if
set
(
contents
)
!=
set
(
[
"train"
,
"train.txt"
,
"val"
,
"val_set"
,
"val.txt"
,
"unzip.sh"
]):
raise
Exception
(
"Imagenet data contents error!"
)
img2label
=
dict
()
imgfilelist
=
[]
with
open
(
os
.
path
.
join
(
data_dir
,
"train.txt"
))
as
fn
:
while
1
:
l
=
fn
.
readline
()
if
not
l
:
break
img
,
lbl
=
l
[:
-
1
].
split
(
" "
)
img2label
[
img
]
=
int
(
lbl
)
imgfilelist
.
append
(
img
)
# shuffle all, this is slow
random
.
shuffle
(
imgfilelist
)
def
train_reader
():
for
idx
,
imgfile
in
enumerate
(
imgfilelist
):
data
=
image
.
load_image
(
os
.
path
.
join
(
data_dir
,
"train"
,
imgfile
.
lower
()))
label
=
[
img2label
[
imgfile
],
]
yield
[
data
,
label
]
return
paddle
.
reader
.
map_readers
(
default_mapper
,
train_reader
)
def
imagenet_test
(
data_dir
):
contents
=
os
.
listdir
(
data_dir
)
if
set
(
contents
)
!=
set
(
[
"train"
,
"train.txt"
,
"val"
,
"val_set"
,
"val.txt"
,
"unzip.sh"
]):
raise
Exception
(
"Imagenet data contents error!"
)
img2label
=
dict
()
imgfilelist
=
[]
with
open
(
os
.
path
.
join
(
data_dir
,
"val.txt"
))
as
fn
:
while
1
:
l
=
fn
.
readline
()
if
not
l
:
break
img
,
lbl
=
l
[:
-
1
].
split
(
" "
)
img2label
[
img
]
=
int
(
lbl
)
imgfilelist
.
append
(
img
)
def
test_reader
():
for
idx
,
imgfile
in
enumerate
(
imgfilelist
):
base_path
=
os
.
path
.
join
(
data_dir
,
"val"
,
imgfile
.
split
(
"."
)[
0
])
image_path
=
"."
.
join
([
base_path
,
"jpeg"
])
data
=
image
.
load_image
(
image_path
)
label
=
[
img2label
[
imgfile
],
]
yield
[
data
,
label
]
return
paddle
.
reader
.
map_readers
(
default_mapper
,
test_reader
)
# FIXME(wuyi): delete this when https://github.com/PaddlePaddle/Paddle/pull/11066 is merged
def
convert_reader_to_recordio_files
(
filename
,
batch_per_file
,
reader_creator
,
feeder
,
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
f_name
,
f_ext
=
os
.
path
.
splitext
(
filename
)
assert
(
f_ext
==
".recordio"
)
lines
=
[]
f_idx
=
0
counter
=
0
for
idx
,
batch
in
enumerate
(
reader_creator
()):
lines
.
append
(
batch
)
if
idx
>=
batch_per_file
and
idx
%
batch_per_file
==
0
:
filename
=
"%s-%05d%s"
%
(
f_name
,
f_idx
,
f_ext
)
with
fluid
.
recordio_writer
.
create_recordio_writer
(
filename
,
compressor
,
max_num_records
)
as
writer
:
for
l
in
lines
:
res
=
feeder
.
feed
(
l
)
for
each
in
feed_order
:
writer
.
append_tensor
(
res
[
each
])
writer
.
complete_append_tensor
()
counter
+=
1
lines
=
[]
f_idx
+=
1
print
(
"written file: "
,
filename
)
return
counter
def
prepare_imagenet
(
inpath
,
outpath
,
batch_size
):
r
=
paddle
.
batch
(
imagenet_train
(
inpath
),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
fluid
.
layers
.
data
(
name
=
"image"
,
shape
=
[
3
,
224
,
224
]),
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
],
place
=
fluid
.
CPUPlace
())
outpath
=
os
.
path
.
join
(
outpath
,
"imagenet.recordio"
)
convert_reader_to_recordio_files
(
outpath
,
10000
,
r
,
feeder
)
cmake/configure.cmake
浏览文件 @
bfd42683
...
...
@@ -92,6 +92,9 @@ if(WITH_GPU)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
message
(
FATAL_ERROR
"TensorRT needs CUDNN >= 7.0 to compile"
)
endif
()
if
(
${
TENSORRT_MAJOR_VERSION
}
VERSION_LESS 4
)
message
(
FATAL_ERROR
"Paddle needs TensorRT >= 4.0 to compile"
)
endif
()
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
endif
()
elseif
(
WITH_AMD_GPU
)
...
...
paddle/fluid/framework/data_layout.h
浏览文件 @
bfd42683
...
...
@@ -27,6 +27,7 @@ enum class DataLayout {
kNHWC
=
0
,
kNCHW
=
1
,
kAnyLayout
=
2
,
kMKLDNN
=
3
,
// all layouts supported by MKLDNN internally
};
inline
DataLayout
StringToDataLayout
(
const
std
::
string
&
str
)
{
...
...
@@ -41,6 +42,8 @@ inline DataLayout StringToDataLayout(const std::string& str) {
return
DataLayout
::
kNCHW
;
}
else
if
(
s
==
"ANYLAYOUT"
)
{
return
DataLayout
::
kAnyLayout
;
}
else
if
(
s
==
"MKLDNNLAYOUT"
)
{
return
DataLayout
::
kMKLDNN
;
}
else
{
PADDLE_THROW
(
"Unknown storage order string: %s"
,
s
);
}
...
...
@@ -54,8 +57,10 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
return
"NCHW"
;
case
DataLayout
::
kAnyLayout
:
return
"ANY_LAYOUT"
;
case
DataLayout
::
kMKLDNN
:
return
"MKLDNNLAYOUT"
;
default:
PADDLE_THROW
(
"unknown DataLayou %d"
,
data_layout
);
PADDLE_THROW
(
"unknown DataLayou
t
%d"
,
data_layout
);
}
}
...
...
paddle/fluid/framework/data_layout_transform.cc
浏览文件 @
bfd42683
...
...
@@ -16,6 +16,9 @@
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -88,5 +91,85 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
out
->
set_layout
(
expected_kernel_type
.
data_layout_
);
}
#ifdef PADDLE_WITH_MKLDNN
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
void
*
GetDataFromTensor
(
const
Tensor
&
tensor
,
mkldnn
::
memory
::
data_type
type
)
{
switch
(
type
)
{
case
mkldnn
::
memory
::
data_type
::
f32
:
return
platform
::
to_void_cast
(
tensor
.
data
<
float
>
());
case
mkldnn
::
memory
::
data_type
::
s8
:
return
platform
::
to_void_cast
(
tensor
.
data
<
char
>
());
case
mkldnn
::
memory
::
data_type
::
u8
:
return
platform
::
to_void_cast
(
tensor
.
data
<
unsigned
char
>
());
case
mkldnn
::
memory
::
data_type
::
s16
:
return
platform
::
to_void_cast
(
tensor
.
data
<
int16_t
>
());
case
mkldnn
::
memory
::
data_type
::
s32
:
return
platform
::
to_void_cast
(
tensor
.
data
<
int32_t
>
());
default:
PADDLE_THROW
(
"wrong mkldnn type provided"
);
}
}
#endif
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
Tensor
&
in
,
Tensor
*
out
)
{
auto
in_layout
=
kernel_type_for_var
.
data_layout_
;
auto
out_layout
=
expected_kernel_type
.
data_layout_
;
PADDLE_ENFORCE
(
in_layout
==
DataLayout
::
kMKLDNN
&&
out_layout
!=
DataLayout
::
kMKLDNN
,
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
"non-MKLDNN"
);
#ifdef PADDLE_WITH_MKLDNN
PADDLE_ENFORCE
(
in
.
format
()
!=
memory
::
format
::
format_undef
&&
in
.
format
()
!=
memory
::
format
::
any
,
"Input tensor should have specified memory format"
);
// Set default as NCHW in case not specified
out_layout
=
out_layout
==
DataLayout
::
kAnyLayout
?
DataLayout
::
kNCHW
:
out_layout
;
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
expected_kernel_type
.
place_
));
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
std
::
vector
<
int
>
in_tz
=
paddle
::
framework
::
vectorize2int
(
in
.
dims
());
std
::
vector
<
int
>
out_tz
=
in_tz
;
memory
::
data_type
in_type
=
ToMKLDNNDataType
(
in
.
type
());
PADDLE_ENFORCE
(
in_type
!=
memory
::
data_type
::
data_undef
,
"Input tensor type is not supported: "
,
in
.
type
().
name
());
memory
::
data_type
out_type
=
in_type
;
memory
::
format
in_format
=
in_tz
.
size
()
==
2
?
memory
::
format
::
nc
:
in
.
format
();
memory
::
format
out_format
=
out_tz
.
size
()
==
2
?
memory
::
format
::
nc
:
ToMKLDNNFormat
(
out_layout
);
void
*
in_data
=
GetDataFromTensor
(
in
,
in_type
);
// output tensor has the same dims as input. Reorder don't change dims
out
->
Resize
(
in
.
dims
());
auto
out_data
=
out
->
mutable_data
(
expected_kernel_type
.
place_
,
in
.
type
());
auto
in_memory
=
memory
({{{
in_tz
},
in_type
,
in_format
},
cpu_engine
},
in_data
);
auto
out_memory
=
memory
({{{
out_tz
},
out_type
,
out_format
},
cpu_engine
},
out_data
);
platform
::
Reorder
(
in_memory
,
out_memory
);
out
->
set_layout
(
out_layout
);
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
out
->
set_format
(
memory
::
format
::
format_undef
);
#endif
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/data_layout_transform.h
浏览文件 @
bfd42683
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <vector>
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/tensor.h"
...
...
@@ -22,6 +23,50 @@
namespace
paddle
{
namespace
framework
{
#ifdef PADDLE_WITH_MKLDNN
using
MKLDNNFormat
=
mkldnn
::
memory
::
format
;
using
MKLDNNDataType
=
mkldnn
::
memory
::
data_type
;
inline
MKLDNNFormat
ToMKLDNNFormat
(
const
DataLayout
&
layout
)
{
switch
(
layout
)
{
case
DataLayout
::
kNHWC
:
return
MKLDNNFormat
::
nhwc
;
case
DataLayout
::
kNCHW
:
return
MKLDNNFormat
::
nchw
;
default:
PADDLE_THROW
(
"Fail to convert layout %s to MKLDNN format"
,
DataLayoutToString
(
layout
));
}
}
inline
DataLayout
ToPaddleLayout
(
const
MKLDNNFormat
&
format
)
{
switch
(
format
)
{
case
MKLDNNFormat
::
nhwc
:
return
DataLayout
::
kNHWC
;
case
MKLDNNFormat
::
nchw
:
return
DataLayout
::
kNCHW
;
default:
PADDLE_THROW
(
"Fail to convert MKLDNN format to paddle layout"
);
}
}
inline
MKLDNNDataType
ToMKLDNNDataType
(
const
std
::
type_index
type
)
{
static
const
std
::
map
<
std
::
type_index
,
MKLDNNDataType
>
dict
{
{
std
::
type_index
(
typeid
(
float
)),
MKLDNNDataType
::
f32
},
// NOLINT
{
std
::
type_index
(
typeid
(
char
)),
MKLDNNDataType
::
s8
},
// NOLINT
{
std
::
type_index
(
typeid
(
unsigned
char
)),
MKLDNNDataType
::
u8
},
{
std
::
type_index
(
typeid
(
int16_t
)),
MKLDNNDataType
::
s16
},
{
std
::
type_index
(
typeid
(
int32_t
)),
MKLDNNDataType
::
s32
}};
auto
iter
=
dict
.
find
(
type
);
if
(
iter
!=
dict
.
end
())
return
iter
->
second
;
return
MKLDNNDataType
::
data_undef
;
}
#endif
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
Tensor
&
in
,
Tensor
*
out
);
std
::
vector
<
int
>
GetAxis
(
const
DataLayout
&
from
,
const
DataLayout
&
to
);
void
TransDataLayout
(
const
OpKernelType
&
kernel_type_for_var
,
...
...
paddle/fluid/framework/data_transform.cc
浏览文件 @
bfd42683
...
...
@@ -33,11 +33,34 @@ void DataTransform(const OpKernelType& expected_kernel_type,
Tensor
in
;
in
.
ShareDataWith
(
input_tensor
);
Tensor
out
;
DataLayout
lin
=
kernel_type_for_var
.
data_layout_
;
DataLayout
lout
=
expected_kernel_type
.
data_layout_
;
// do layout transform
if
(
NeedTransformLayout
(
expected_kernel_type
.
data_layout_
,
kernel_type_for_var
.
data_layout_
))
{
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
if
(
NeedTransformLayout
(
lout
,
lin
))
{
if
(
lin
==
DataLayout
::
kMKLDNN
||
lout
==
DataLayout
::
kMKLDNN
)
{
PADDLE_ENFORCE
(
!
(
lin
==
DataLayout
::
kMKLDNN
&&
lout
==
DataLayout
::
kMKLDNN
),
"No layout transform needed between two MKLDNN OPKernels"
);
if
(
lin
!=
DataLayout
::
kMKLDNN
&&
lout
==
DataLayout
::
kMKLDNN
)
{
#ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
out
.
ShareDataWith
(
input_tensor
);
out
.
set_layout
(
DataLayout
::
kMKLDNN
);
out
.
set_format
(
ToMKLDNNFormat
(
lin
));
#endif
}
else
{
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
TransDataLayoutFromMKLDNN
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
}
}
else
{
// Case3 - transfrom between Non-MKLDNN OPKernels
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
}
transformed
=
true
;
PassTensorData
(
&
out
,
&
in
);
}
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
bfd42683
...
...
@@ -13,7 +13,7 @@ cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_ro
if
(
WITH_GPU
)
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda
)
dynload_cuda
variable_visitor
)
set
(
multi_devices_graph_builder_deps nccl_all_reduce_op_handle
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda
)
nv_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda
)
...
...
@@ -25,6 +25,7 @@ else()
endif
()
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle
${
multi_devices_graph_builder_deps
}
reduce_op_handle broadcast_op_handle
)
...
...
paddle/fluid/framework/details/fuse_vars_op_handle.cc
0 → 100644
浏览文件 @
bfd42683
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fuse_vars_op_handle.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
FuseVarsOpHandle
::
RunImpl
()
{
WaitInputVarGenerated
(
place_
);
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
()
-
1
,
inputs_numel_
.
size
(),
""
);
auto
scope
=
local_scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
out_var_handle
=
out_var_handles
[
0
];
auto
out_var
=
scope
->
Var
(
out_var_handle
->
name_
);
auto
out_tensor
=
out_var
->
GetMutable
<
LoDTensor
>
();
out_tensor
->
Resize
({
total_numel_
}).
mutable_data
(
this
->
place_
,
type_
);
int64_t
s
=
0
;
for
(
size_t
i
=
1
;
i
<
out_var_handles
.
size
();
++
i
)
{
auto
out_name
=
out_var_handles
[
i
]
->
name_
;
auto
out_t
=
scope
->
Var
(
out_name
)
->
GetMutable
<
LoDTensor
>
();
auto
numel
=
this
->
inputs_numel_
.
at
(
out_name
);
out_t
->
ShareDataWith
(
out_tensor
->
Slice
(
s
,
s
+
numel
));
s
+=
numel
;
}
this
->
RunAndRecordEvent
([
this
]
{});
}
std
::
string
FuseVarsOpHandle
::
Name
()
const
{
return
"fuse vars"
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fuse_vars_op_handle.h
0 → 100644
浏览文件 @
bfd42683
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
FuseVarsOpHandle
:
public
OpHandleBase
{
public:
FuseVarsOpHandle
(
Scope
*
local_scope
,
const
platform
::
Place
&
place
,
const
std
::
unordered_map
<
std
::
string
,
int64_t
>
&
inputs_numel
,
const
std
::
type_index
&
var_type
)
:
local_scope_
(
local_scope
),
place_
(
place
),
inputs_numel_
(
inputs_numel
),
type_
(
var_type
)
{
total_numel_
=
0
;
for
(
auto
in_numel
:
inputs_numel
)
{
PADDLE_ENFORCE_GT
(
in_numel
.
second
,
0
);
total_numel_
+=
in_numel
.
second
;
}
}
std
::
string
Name
()
const
override
;
bool
IsMultiDeviceTransfer
()
override
{
return
false
;
};
protected:
void
RunImpl
()
override
;
private:
Scope
*
local_scope_
;
const
platform
::
Place
place_
;
const
std
::
unordered_map
<
std
::
string
,
int64_t
>
inputs_numel_
;
const
std
::
type_index
type_
;
int64_t
total_numel_
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
bfd42683
...
...
@@ -11,10 +11,12 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
#include <algorithm>
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -30,27 +32,34 @@ NCCLAllReduceOpHandle::NCCLAllReduceOpHandle(
}
void
NCCLAllReduceOpHandle
::
RunImpl
()
{
if
(
inputs_
.
s
ize
()
==
1
)
{
if
(
NoDummyInputS
ize
()
==
1
)
{
return
;
// No need to all reduce when GPU count = 1;
}
else
{
// Wait input done
WaitInputVarGenerated
();
auto
&
var_name
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
])
->
name_
;
int
dtype
=
-
1
;
size_t
numel
=
0
;
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
"The NoDummyInputSize should be equal to the number of places."
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."
);
std
::
vector
<
const
LoDTensor
*>
lod_tensors
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
*
s
=
local_scopes_
[
i
];
auto
&
local_scope
=
*
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
in_var_handles
[
i
]
->
name_
)
->
Get
<
LoDTensor
>
();
lod_tensors
.
emplace_back
(
&
lod_tensor
);
PADDLE_ENFORCE_EQ
(
in_var_handles
[
i
]
->
name_
,
out_var_handles
[
i
]
->
name_
,
"The name of input and output should be equal."
);
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
int
dtype
=
-
1
;
size_t
numel
=
0
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
...
...
@@ -96,7 +105,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
auto
&
scope
=
*
local_scopes_
[
i
]
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
p
=
places_
[
i
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
auto
*
var
=
scope
.
FindVar
(
in_var_handles
[
i
]
->
name_
);
auto
*
dev_ctx
=
dev_ctxes_
[
p
];
RunAndRecordEvent
(
p
,
[
&
trg
,
var
,
dev_ctx
,
p
]
{
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
bfd42683
...
...
@@ -104,6 +104,16 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
}
}
size_t
OpHandleBase
::
NoDummyInputSize
()
const
{
size_t
cnt
=
0
;
for
(
auto
*
in
:
inputs_
)
{
if
(
dynamic_cast
<
DummyVarHandle
*>
(
in
)
==
nullptr
)
{
++
cnt
;
}
}
return
cnt
;
}
bool
OpHandleBase
::
NeedWait
(
VarHandleBase
*
in_var
)
{
return
in_var
&&
in_var
->
generated_op_
;
}
...
...
paddle/fluid/framework/details/op_handle_base.h
浏览文件 @
bfd42683
...
...
@@ -80,6 +80,8 @@ class OpHandleBase {
const
std
::
vector
<
VarHandleBase
*>
&
Outputs
()
const
{
return
outputs_
;
}
size_t
NoDummyInputSize
()
const
;
protected:
void
RunAndRecordEvent
(
const
std
::
function
<
void
()
>
&
callback
);
...
...
paddle/fluid/framework/details/ssa_graph_printer.h
浏览文件 @
bfd42683
...
...
@@ -20,7 +20,7 @@
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
SSAGraph
;
struct
SSAGraph
;
class
SSAGraphPrinter
{
public:
virtual
~
SSAGraphPrinter
()
{}
...
...
paddle/fluid/framework/op_kernel_type.h
浏览文件 @
bfd42683
...
...
@@ -87,7 +87,14 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
}
inline
bool
NeedTransformLayout
(
const
DataLayout
&
l
,
const
DataLayout
&
r
)
{
return
l
!=
DataLayout
::
kAnyLayout
&&
r
!=
DataLayout
::
kAnyLayout
&&
l
!=
r
;
bool
ret
=
(
l
!=
DataLayout
::
kAnyLayout
&&
r
!=
DataLayout
::
kAnyLayout
&&
l
!=
r
);
#ifdef PADDLE_WITH_MKLDNN
// Layout transform needed for either non-MKLDNN to MKLDNN or vice versa
ret
|=
(
l
!=
DataLayout
::
kMKLDNN
&&
r
==
DataLayout
::
kMKLDNN
);
ret
|=
(
l
==
DataLayout
::
kMKLDNN
&&
r
!=
DataLayout
::
kMKLDNN
);
#endif
return
ret
;
}
inline
bool
TransFromNeeded
(
const
OpKernelType
&
l
,
const
OpKernelType
&
r
)
{
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
bfd42683
...
...
@@ -83,8 +83,14 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
void
operator
()(
const
char
*
op_type
,
const
char
*
library_type
)
const
{
using
T
=
typename
KERNEL_TYPE
::
ELEMENT_TYPE
;
std
::
string
library
(
library_type
);
std
::
string
data_layout
=
"ANYLAYOUT"
;
if
(
library
==
"MKLDNN"
)
{
data_layout
=
"MKLDNNLAYOUT"
;
}
OpKernelType
key
(
ToDataType
(
std
::
type_index
(
typeid
(
T
))),
PlaceType
(),
DataLayout
::
kAnyLayout
,
StringToLibraryType
(
library_type
));
StringToDataLayout
(
data_layout
),
StringToLibraryType
(
library_type
));
OperatorWithKernel
::
AllOpKernels
()[
op_type
][
key
].
reset
(
new
KERNEL_TYPE
);
constexpr
auto
size
=
std
::
tuple_size
<
std
::
tuple
<
KernelTypes
...
>>::
value
;
...
...
@@ -99,7 +105,8 @@ struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
void
operator
()(
const
char
*
op_type
,
const
char
*
library_type
)
const
{}
};
// User can register many kernel in one place. The data type could be different.
// User can register many kernel in one place. The data type could be
// different.
template
<
typename
PlaceType
,
typename
...
KernelType
>
class
OpKernelRegistrar
:
public
Registrar
{
public:
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
bfd42683
...
...
@@ -444,10 +444,25 @@ class RuntimeInferShapeContext : public InferShapeContext {
auto
*
out_tensor
=
out_var
->
GetMutable
<
LoDTensor
>
();
out_tensor
->
set_lod
(
in_tensor
.
lod
());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
out_tensor
->
set_layout
(
in_tensor
.
layout
());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
#ifdef PADDLE_WITH_MKLDNN
// Fix me: ugly workaround below
// Correct solution:
// set_layout() should NOT be called here (i.e. ShareLoD). Instead,
// layout of output tensor should be set "manually" in Compute()
// of each OPKernel. The reason layout should NOT be shared between
// input and output "automatically" (now by InferShape()->ShareLoD())
// is that layout transform may occur after InferShape().
// Workaround:
// Skip set_layout() when input layout is kMKLDNN
// This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN
// OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called
// in Compute()
if
(
in_tensor
.
layout
()
!=
DataLayout
::
kMKLDNN
)
#endif
out_tensor
->
set_layout
(
in_tensor
.
layout
());
}
void
ShareLayout
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
...
...
@@ -646,8 +661,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
}
if
(
t
!=
nullptr
)
{
int
tmp
=
static_cast
<
int
>
(
ToDataType
(
t
->
type
()));
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same."
,
Type
());
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same. Get %d != %d"
,
Type
(),
data_type
,
tmp
);
data_type
=
tmp
;
}
}
...
...
@@ -665,7 +682,8 @@ OpKernelType OperatorWithKernel::GetExpectedKernelType(
OpKernelType
OperatorWithKernel
::
GetKernelTypeForVar
(
const
std
::
string
&
var_name
,
const
Tensor
&
tensor
,
const
OpKernelType
&
expected_kernel_type
)
const
{
return
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
());
return
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
tensor
.
layout
());
}
}
// namespace framework
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
bfd42683
...
...
@@ -34,6 +34,28 @@ namespace framework {
class
LoDTensor
;
class
Tensor
{
#ifdef PADDLE_WITH_MKLDNN
public:
inline
mkldnn
::
memory
::
format
format
()
const
{
return
format_
;
}
inline
void
set_format
(
const
mkldnn
::
memory
::
format
format
)
{
format_
=
format
;
}
protected:
/**
* @brief the detail format of memory block which have layout as kMKLDNN
*
* @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
* nChw16c, etc. For a MKLDNN memory block, layout will be set as
* DataLayout::kMKLDNN meanwhile detail memory format will be kept in
* this field.
*/
mkldnn
::
memory
::
format
format_
=
mkldnn
::
memory
::
format
::
format_undef
;
#endif
public:
template
<
typename
T
,
size_t
D
,
int
MajorType
,
typename
IndexType
>
friend
struct
EigenTensor
;
...
...
@@ -195,8 +217,10 @@ class Tensor {
* N,C,H,W for respectively the batch size, the number of
* feature maps, the height.
*/
DataLayout
layout_
=
DataLayout
::
kNHWC
;
// Fix me: here just change the default layout to kNCHW
// it doesn't fix the real issue, i.e. feeder should set up tensor layout
// according to actual input data
DataLayout
layout_
=
DataLayout
::
kNCHW
;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
...
...
paddle/fluid/framework/tensor_test.cc
浏览文件 @
bfd42683
...
...
@@ -209,7 +209,7 @@ TEST(Tensor, ReshapeToMatrix) {
TEST
(
Tensor
,
Layout
)
{
framework
::
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kN
HWC
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kN
CHW
);
src
.
set_layout
(
framework
::
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kAnyLayout
);
}
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
bfd42683
# Add TRT tests
# This test is not stable
# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
# SERIAL)
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc
DEPS tensorrt_engine mul_op
)
...
...
@@ -16,3 +11,5 @@ nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine mul_op SERIAL
)
nv_test
(
test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine mul_op SERIAL
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine activation_op SERIAL
)
paddle/fluid/inference/tensorrt/convert/activation_op.cc
浏览文件 @
bfd42683
...
...
@@ -22,7 +22,8 @@ namespace tensorrt {
class
ReluOpConverter
:
public
OpConverter
{
public:
ReluOpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
)
override
{
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -33,7 +34,12 @@ class ReluOpConverter : public OpConverter {
nvinfer1
::
IActivationLayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
input_tensor
),
nvinfer1
::
ActivationType
::
kRELU
);
engine_
->
SetITensor
(
op_desc
.
Output
(
"Out"
)[
0
],
layer
->
getOutput
(
0
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
if
(
test_mode
)
{
// the test framework can not determine which is the
// output, so place the declaration inside.
engine_
->
DeclareOutput
(
output_name
);
}
}
};
...
...
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
bfd42683
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
USE_OP
(
relu
);
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
Compare
(
const
std
::
string
op_type
,
float
input
,
float
expect
)
{
TEST
(
ReluOpConverter
,
main
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
// init fluid op and variable
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
auto
out_var
=
scope
.
Var
(
"Out"
);
auto
out_tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
out_tensor
->
Resize
({
1
,
1
});
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
op_type
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
// run fluid op
op
->
Run
(
scope
,
place
);
// get fluid output
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
// init tensorrt op
cudaStream_t
stream
;
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
TensorRTEngine
*
engine
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream
);
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
// convert op
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
*
op_desc
.
Proto
(),
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
// convert LoDTensor to ITensor
size_t
size
=
x_tensor
->
memory_size
();
EngineIOConverter
::
ConvertInput
(
op_type
,
*
x_tensor
,
engine
->
buffer
(
"X"
).
buffer
,
size
,
&
stream
);
// run tensorrt Outp
engine
->
Execute
(
1
);
// convert ITensor to LoDTensor
EngineIOConverter
::
ConvertOutput
(
op_type
,
engine
->
buffer
(
"Out"
).
buffer
,
out_tensor
,
size
,
&
stream
);
// get tensorrt output
std
::
vector
<
float
>
out2
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out2
);
// compare
ASSERT_EQ
(
out1
[
0
],
out2
[
0
]);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
cudaStreamDestroy
(
stream
);
}
TEST
(
OpConverter
,
ConvertRelu
)
{
Compare
(
"relu"
,
1
,
1
);
// relu(1) = 1
Compare
(
"relu"
,
-
5
,
0
);
// relu(-5) = 0
std
::
unordered_set
<
std
::
string
>
parameters
;
TRTConvertValidation
validator
(
10
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"relu-X"
,
nvinfer1
::
Dims2
(
10
,
6
));
validator
.
DeclOutputVar
(
"relu-Out"
,
nvinfer1
::
Dims2
(
10
,
6
));
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"relu"
);
desc
.
SetInput
(
"X"
,
{
"relu-X"
});
desc
.
SetOutput
(
"Out"
,
{
"relu-Out"
});
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
10
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
activation
);
USE_OP
(
relu
);
paddle/fluid/operators/activation_op.cc
浏览文件 @
bfd42683
...
...
@@ -58,14 +58,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const
framework
::
OperatorWithKernel
&
oper
,
const
std
::
string
&
name
)
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
auto
it
=
oper
.
Attrs
().
find
(
"use_mkldnn"
);
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
it
!=
oper
.
Attrs
().
end
()
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
name
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
...
...
paddle/fluid/operators/batch_norm_op.cc
浏览文件 @
bfd42683
...
...
@@ -111,14 +111,16 @@ class BatchNormOp : public framework::OperatorWithKernel {
"Variance input should be of float type"
);
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library_
);
}
...
...
@@ -367,17 +369,18 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
}
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library_
);
layout
_
,
library_
);
}
};
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
bfd42683
...
...
@@ -75,6 +75,11 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
ConvOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -84,6 +89,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -99,9 +105,6 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
"float16 can only be used when CUDNN is used"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library
);
}
...
...
@@ -309,6 +312,10 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
ConvOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -318,12 +325,10 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
bfd42683
...
...
@@ -43,7 +43,7 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
FCOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
AnyLayout
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
MKLDNN
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
...
...
@@ -65,7 +65,7 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
FCOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
AnyLayout
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
MKLDNN
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
...
...
paddle/fluid/operators/lrn_op.cc
浏览文件 @
bfd42683
...
...
@@ -124,16 +124,17 @@ namespace {
framework
::
OpKernelType
GetExpectedLRNKernel
(
const
framework
::
ExecutionContext
&
ctx
)
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
paddle/fluid/operators/pool_mkldnn_op.cc
浏览文件 @
bfd42683
...
...
@@ -24,10 +24,13 @@ using mkldnn::pooling_backward;
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static
std
::
string
gethash
(
memory
::
dims
&
input_dims
,
std
::
string
&
pooling_type
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
string
suffix
)
{
auto
dims2str
=
[](
memory
::
dims
&
operand_dims
)
{
static
std
::
string
gethash
(
const
memory
::
dims
&
input_dims
,
const
std
::
string
&
pooling_type
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
bfd42683
...
...
@@ -83,6 +83,9 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
framework
::
OpKernelType
PoolOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -92,11 +95,10 @@ framework::OpKernelType PoolOp::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
@@ -112,6 +114,9 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
framework
::
OpKernelType
PoolOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -121,6 +126,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -129,8 +135,6 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
PADDLE_ENFORCE_EQ
(
library_
,
framework
::
LibraryType
::
kCUDNN
,
"float16 can only be used when CUDNN is used"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
);
}
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
bfd42683
...
...
@@ -49,6 +49,9 @@ class SoftmaxOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// choose cudnn kernel if the runtime supported.
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -58,6 +61,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -68,9 +72,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
"float16 can only be used on GPU place"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
StringToDataLayout
(
data_format
),
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
);
}
};
...
...
@@ -142,6 +144,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// choose cudnn kernel if the runtime supported.
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
bfd42683
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <mkldnn.h>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -86,5 +87,17 @@ inline mkldnn::memory::data_type MKLDNNGetDataType<float>() {
return
mkldnn
::
memory
::
f32
;
}
inline
void
Reorder
(
const
mkldnn
::
memory
&
src
,
const
mkldnn
::
memory
&
dst
)
{
auto
reorder_prim
=
mkldnn
::
reorder
(
src
,
dst
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
reorder_prim
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
inline
mkldnn
::
memory
::
format
GetMKLDNNFormat
(
const
mkldnn
::
memory
memory
)
{
return
static_cast
<
mkldnn
::
memory
::
format
>
(
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
// namespace platform
}
// namespace paddle
python/paddle/fluid/executor.py
浏览文件 @
bfd42683
...
...
@@ -170,6 +170,8 @@ def get_program_cache_key(feed, fetch_list):
return
var
.
desc
.
name
()
elif
isinstance
(
var
,
str
):
return
var
elif
isinstance
(
var
,
basestring
):
return
str
(
var
)
else
:
raise
TypeError
(
str
(
var
)
+
" should be Variable or str"
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
bfd42683
...
...
@@ -72,6 +72,8 @@ def convert_np_dtype_to_dtype_(np_dtype):
return
core
.
VarDesc
.
VarType
.
INT64
elif
dtype
==
np
.
bool
:
return
core
.
VarDesc
.
VarType
.
BOOL
elif
dtype
==
np
.
uint16
:
return
core
.
VarDesc
.
VarType
.
INT16
elif
dtype
==
np
.
uint8
:
return
core
.
VarDesc
.
VarType
.
UINT8
else
:
...
...
@@ -368,6 +370,13 @@ class Operator(object):
Block. Users can use the build in instructions to describe their neural
network.
"""
OP_WITHOUT_KERNEL_SET
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'channel_recv'
,
'select'
}
def
__init__
(
self
,
block
,
...
...
@@ -504,17 +513,13 @@ class Operator(object):
else
:
self
.
desc
.
set_attr
(
attr_name
,
self
.
attrs
[
attr_name
])
self
.
desc
.
check_attrs
()
no_kernel_op_set
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'channel_recv'
,
'select'
,
'gen_nccl_id'
}
if
type
not
in
no_kernel_op_set
:
if
self
.
has_kernel
(
type
):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
def
has_kernel
(
self
,
op_type
):
return
op_type
not
in
self
.
OP_WITHOUT_KERNEL_SET
def
to_string
(
self
,
throw_on_error
):
"""
To debug string.
...
...
@@ -742,7 +747,9 @@ class Block(object):
def
var
(
self
,
name
):
if
not
isinstance
(
name
,
basestring
):
raise
TypeError
()
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
(
type
(
name
)))
v
=
self
.
vars
.
get
(
name
,
None
)
if
v
is
None
:
raise
ValueError
(
"var %s not in this block"
%
name
)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
bfd42683
...
...
@@ -434,7 +434,7 @@ def open_files(filenames,
shapes
,
lod_levels
,
dtypes
,
thread_num
,
thread_num
=
1
,
buffer_size
=
None
,
pass_num
=
1
,
for_parallel
=
True
):
...
...
python/paddle/fluid/tests/unittests/benchmark.py
0 → 100644
浏览文件 @
bfd42683
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
unittest
import
time
import
itertools
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
op_test
import
OpTest
class
BenchmarkSuite
(
OpTest
):
def
timeit_function
(
self
,
callback
,
iters
,
*
args
,
**
kwargs
):
assert
iters
!=
0
,
"Iters should >= 1"
start
=
time
.
time
()
for
i
in
range
(
iters
):
callback
(
*
args
,
**
kwargs
)
elapse
=
time
.
time
()
-
start
return
elapse
/
iters
def
_assert_cpu_gpu_same
(
self
,
cpu_outs
,
gpu_outs
,
fetch_list
,
atol
):
for
item_cpu_out
,
item_gpu_out
,
variable
in
zip
(
cpu_outs
,
gpu_outs
,
fetch_list
):
# the cpu version is baseline, expect gpu version keep same with cpu version.
expect
=
item_cpu_out
expect_t
=
np
.
array
(
item_cpu_out
)
actual
=
item_gpu_out
actual_t
=
np
.
array
(
item_gpu_out
)
var_name
=
variable
if
isinstance
(
variable
,
basestring
)
else
variable
.
name
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
"Output ("
+
var_name
+
") has diff"
+
str
(
actual_t
)
+
"
\n
"
+
str
(
expect_t
))
self
.
assertListEqual
(
actual
.
lod
(),
expect
.
lod
(),
"Output ("
+
var_name
+
") has different lod"
)
def
_get_input_names
(
self
):
inputs
=
[]
for
name
,
value
in
self
.
inputs
.
iteritems
():
if
isinstance
(
value
,
list
):
inputs
.
extend
([
sub_name
for
sub_name
,
_
in
value
])
inputs
.
append
(
name
)
return
inputs
def
_get_output_names
(
self
):
outputs
=
[]
for
var_name
,
var
in
self
.
outputs
.
iteritems
():
if
isinstance
(
var
,
list
):
for
sub_var_name
,
sub_var
in
var
:
outputs
.
append
(
sub_var_name
)
else
:
outputs
.
append
(
var_name
)
if
len
(
outputs
)
==
0
:
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
outputs
.
append
(
str
(
out_name
))
return
outputs
def
check_output_stability
(
self
,
atol
=
1e-8
):
places
=
self
.
_get_places
()
if
len
(
places
)
<
2
:
return
cpu_outs
,
fetch_list
=
self
.
_calc_output
(
places
[
0
])
gpu_outs
,
_
=
self
.
_calc_output
(
places
[
1
])
self
.
_assert_cpu_gpu_same
(
cpu_outs
,
gpu_outs
,
fetch_list
,
atol
)
def
timeit_output_with_place
(
self
,
place
,
iters
):
return
self
.
timeit_function
(
self
.
calc_output
,
iters
,
place
)
def
timeit_output
(
self
,
iters
=
100
):
places
=
self
.
_get_places
()
elapses
=
[]
for
place
in
places
:
elapses
.
append
(
self
.
timeit_output_with_place
(
place
,
iters
))
for
place
,
elapse
in
zip
(
places
,
elapses
):
print
(
"One pass of ({2}_op) at {0} cost {1}"
.
format
(
str
(
place
),
elapse
,
self
.
op_type
))
def
timeit_grad_with_place
(
self
,
place
,
iters
=
100
):
inputs_to_check
=
self
.
_get_input_names
()
output_names
=
self
.
_get_output_names
()
return
self
.
timeit_function
(
self
.
_get_gradient
,
iters
,
inputs_to_check
,
place
,
output_names
,
no_grad_set
=
None
)
def
timeit_grad
(
self
,
iters
=
100
):
places
=
self
.
_get_places
()
elapses
=
[]
for
place
in
places
:
elapses
.
append
(
self
.
timeit_grad_with_place
(
place
,
iters
))
for
place
,
elapse
in
zip
(
places
,
elapses
):
print
(
"One pass of ({2}_grad_op) at {0} cost {1}"
.
format
(
str
(
place
),
elapse
,
self
.
op_type
))
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
0 → 100644
浏览文件 @
bfd42683
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
benchmark
import
BenchmarkSuite
from
op_test
import
OpTest
# This is a demo op test case for operator benchmarking and high resolution number stability alignment.
class
TestSumOp
(
BenchmarkSuite
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
customize_testcase
()
self
.
customize_fetch_list
()
def
customize_fetch_list
(
self
):
"""
customize fetch list, configure the wanted variables.
>>> self.fetch_list = ["Out"]
"""
self
.
fetch_list
=
[
"Out"
]
# pass
def
customize_testcase
(
self
):
# a test case
x0
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
# NOTE: if the output is empty, then it will autofilled by benchmarkSuite.
# only the output dtype is used, the shape, lod and data is computed from input.
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
self
.
outputs
=
{
"Out"
:
x0
+
x1
+
x2
}
def
test_check_output
(
self
):
"""
compare the output with customized output. In this case,
you should set the correct output by hands.
>>> self.outputs = {"Out": x0 + x1 + x2}
"""
self
.
check_output
(
atol
=
1e-8
)
def
test_output_stability
(
self
):
# compare the cpu gpu output in high resolution.
self
.
check_output_stability
()
def
test_timeit_output
(
self
):
"""
perf the op, time cost will be averged in iters.
output example
>>> One pass of (sum_op) at CPUPlace cost 0.000461330413818
>>> One pass of (sum_op) at CUDAPlace(0) cost 0.000556070804596
"""
self
.
timeit_output
(
iters
=
100
)
def
test_timeit_grad
(
self
):
"""
perf the op gradient, time cost will be averged in iters.
output example
>>> One pass of (sum_grad_op) at CPUPlace cost 0.00279935121536
>>> One pass of (sum_grad_op) at CUDAPlace(0) cost 0.00500632047653
"""
self
.
timeit_grad
(
iters
=
100
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
bfd42683
...
...
@@ -15,13 +15,17 @@
import
unittest
import
numpy
as
np
import
random
import
time
import
itertools
import
paddle.fluid.core
as
core
import
collections
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.op
import
Operator
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
Program
,
OpProtoHolder
from
paddle.fluid.framework
import
Program
,
OpProtoHolder
,
Variable
from
testsuite
import
create_op
,
set_input
,
append_input_output
,
append_loss_ops
def
randomize_probability
(
batch_size
,
class_num
,
dtype
=
'float32'
):
...
...
@@ -33,73 +37,6 @@ def randomize_probability(batch_size, class_num, dtype='float32'):
return
prob
def
create_op
(
scope
,
op_type
,
inputs
,
outputs
,
attrs
):
kwargs
=
dict
()
op_maker
=
core
.
op_proto_and_checker_maker
op_role_attr_name
=
op_maker
.
kOpRoleAttrName
()
if
op_role_attr_name
not
in
attrs
:
attrs
[
op_role_attr_name
]
=
int
(
op_maker
.
OpRole
.
Forward
)
def
__create_var__
(
name
,
var_name
):
scope
.
var
(
var_name
).
get_tensor
()
kwargs
[
name
].
append
(
var_name
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op_type
):
if
in_name
in
inputs
:
kwargs
[
in_name
]
=
[]
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
in_name
,
sub_in_name
)
else
:
__create_var__
(
in_name
,
in_name
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
op_type
):
if
out_name
in
outputs
:
kwargs
[
out_name
]
=
[]
if
out_dup
:
sub_out
=
outputs
[
out_name
]
for
item
in
sub_out
:
sub_out_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
out_name
,
sub_out_name
)
else
:
__create_var__
(
out_name
,
out_name
)
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
def
set_input
(
scope
,
op
,
inputs
,
place
):
def
__set_input__
(
var_name
,
var
):
if
isinstance
(
var
,
tuple
)
or
isinstance
(
var
,
np
.
ndarray
):
tensor
=
scope
.
find_var
(
var_name
).
get_tensor
()
if
isinstance
(
var
,
tuple
):
tensor
.
set_lod
(
var
[
1
])
var
=
var
[
0
]
tensor
.
set_dims
(
var
.
shape
)
tensor
.
set
(
var
,
place
)
elif
isinstance
(
var
,
float
):
scope
.
find_var
(
var_name
).
set_float
(
var
)
elif
isinstance
(
var
,
int
):
scope
.
find_var
(
var_name
).
set_int
(
var
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op
.
type
()):
if
in_name
in
inputs
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
sub_in_val
=
item
[
0
],
item
[
1
]
__set_input__
(
sub_in_name
,
sub_in_val
)
else
:
__set_input__
(
in_name
,
inputs
[
in_name
])
def
get_numeric_gradient
(
place
,
scope
,
op
,
...
...
@@ -173,54 +110,15 @@ def get_numeric_gradient(place,
return
gradient_flat
.
reshape
(
tensor_to_check
.
get_dims
())
def
append_input_output
(
block
,
op_proto
,
np_list
,
is_input
):
'''Insert VarDesc and generate Python variable instance'''
proto_list
=
op_proto
.
inputs
if
is_input
else
op_proto
.
outputs
def
create_var
(
block
,
name
,
np_list
,
var_proto
):
if
name
not
in
np_list
:
assert
var_proto
.
intermediate
,
"{} not found"
.
format
(
name
)
shape
=
None
lod_level
=
None
else
:
np_value
=
np_list
[
name
]
if
isinstance
(
np_value
,
tuple
):
shape
=
list
(
np_value
[
0
].
shape
)
lod_level
=
len
(
np_value
[
1
])
else
:
shape
=
list
(
np_value
.
shape
)
lod_level
=
0
return
block
.
create_var
(
dtype
=
"float32"
,
shape
=
shape
,
lod_level
=
lod_level
,
name
=
name
)
var_dict
=
{}
for
var_proto
in
proto_list
:
var_name
=
str
(
var_proto
.
name
)
if
is_input
:
if
(
var_name
not
in
np_list
)
and
var_proto
.
dispensable
:
continue
assert
(
var_name
in
np_list
)
or
(
var_proto
.
dispensable
),
\
"Missing {} as input"
.
format
(
var_name
)
if
var_proto
.
duplicable
:
assert
isinstance
(
np_list
[
var_name
],
list
),
\
"Duplicable {} should be set as list"
.
format
(
var_name
)
var_list
=
[]
for
(
name
,
np_value
)
in
np_list
[
var_name
]:
var_list
.
append
(
create_var
(
block
,
name
,
{
name
:
np_value
},
var_proto
))
var_dict
[
var_name
]
=
var_list
else
:
var_dict
[
var_name
]
=
create_var
(
block
,
var_name
,
np_list
,
var_proto
)
return
var_dict
class
OpTest
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
'''Fix random seeds to remove randomness from tests'''
cls
.
_np_rand_state
=
np
.
random
.
get_state
()
cls
.
_py_rand_state
=
random
.
getstate
()
cls
.
call_once
=
False
cls
.
dtype
=
"float32"
cls
.
outputs
=
{}
np
.
random
.
seed
(
123
)
random
.
seed
(
124
)
...
...
@@ -231,6 +129,31 @@ class OpTest(unittest.TestCase):
np
.
random
.
set_state
(
cls
.
_np_rand_state
)
random
.
setstate
(
cls
.
_py_rand_state
)
def
try_call_once
(
self
,
data_type
):
if
not
self
.
call_once
:
self
.
call_once
=
True
self
.
dtype
=
data_type
def
infer_dtype_from_inputs_outputs
(
self
,
inputs
,
outputs
):
def
infer_dtype
(
numpy_dict
):
assert
isinstance
(
numpy_dict
,
dict
),
"self.inputs, self.outputs must be numpy_dict"
for
var_name
,
var_value
in
numpy_dict
.
iteritems
():
if
isinstance
(
var_value
,
(
np
.
ndarray
,
np
.
generic
)):
self
.
try_call_once
(
var_value
.
dtype
)
elif
isinstance
(
var_value
,
(
list
,
tuple
)):
# the case of self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
if
len
(
var_value
)
>
1
and
isinstance
(
var_value
[
1
],
(
np
.
ndarray
,
np
.
generic
)):
instance
=
var_value
[
1
]
self
.
try_call_once
(
instance
[
1
].
dtype
)
else
:
self
.
try_call_once
(
"float32"
)
infer_dtype
(
inputs
)
infer_dtype
(
outputs
)
def
feed_var
(
self
,
input_vars
,
place
):
feed_map
=
{}
for
var_name
in
input_vars
:
...
...
@@ -254,18 +177,14 @@ class OpTest(unittest.TestCase):
return
feed_map
def
calc_output
(
self
,
place
):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
):
def
_append_ops
(
self
,
block
):
op_proto
=
OpProtoHolder
.
instance
().
get_op_proto
(
self
.
op_type
)
program
=
Program
(
)
block
=
program
.
global_block
()
inputs
=
append_input_output
(
block
,
op_proto
,
self
.
inputs
,
True
)
outputs
=
append_input_output
(
block
,
op_proto
,
self
.
outputs
,
Fals
e
)
"infer datatype from inputs and outputs for this test case"
self
.
infer_dtype_from_inputs_outputs
(
self
.
inputs
,
self
.
outputs
)
inputs
=
append_input_output
(
block
,
op_proto
,
self
.
inputs
,
True
,
self
.
dtype
)
outputs
=
append_input_output
(
block
,
op_proto
,
self
.
outputs
,
False
,
self
.
dtyp
e
)
op
=
block
.
append_op
(
type
=
self
.
op_type
,
inputs
=
inputs
,
...
...
@@ -275,22 +194,68 @@ class OpTest(unittest.TestCase):
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
fetch_list
=
[]
for
var_name
,
var
in
outputs
.
iteritems
():
if
var_name
in
self
.
outputs
:
def
_get_io_vars
(
self
,
block
,
numpy_inputs
):
inputs
=
{}
for
name
,
value
in
numpy_inputs
.
iteritems
():
if
isinstance
(
value
,
list
):
var_list
=
[
block
.
var
(
sub_name
)
for
sub_name
,
sub_value
in
value
]
inputs
[
name
]
=
var_list
else
:
inputs
[
name
]
=
block
.
var
(
name
)
return
inputs
def
_get_inputs
(
self
,
block
):
return
self
.
_get_io_vars
(
block
,
self
.
inputs
)
def
_get_outputs
(
self
,
block
):
return
self
.
_get_io_vars
(
block
,
self
.
outputs
)
def
calc_output
(
self
,
place
):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
,
parallel
=
False
):
program
=
Program
()
block
=
program
.
global_block
()
self
.
_append_ops
(
block
)
inputs
=
self
.
_get_inputs
(
block
)
outputs
=
self
.
_get_outputs
(
block
)
feed_map
=
self
.
feed_var
(
inputs
,
place
)
if
parallel
:
use_cuda
=
False
if
isinstance
(
place
,
fluid
.
CUDAPlace
(
0
)):
use_cuda
=
True
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
else
:
executor
=
Executor
(
place
)
fetch_list
=
getattr
(
self
,
"fetch_list"
,
[])
# if the fetch_list is customized by user, we use it directly.
# if not, fill the fetch_list by the user configured outputs in test.
if
len
(
fetch_list
)
==
0
:
for
var_name
,
var
in
outputs
.
iteritems
():
if
isinstance
(
var
,
list
):
for
v
in
var
:
fetch_list
.
append
(
v
)
else
:
fetch_list
.
append
(
var
)
feed_map
=
self
.
feed_var
(
inputs
,
place
)
exe
=
Executor
(
place
)
outs
=
exe
.
run
(
program
,
feed
=
feed_map
,
fetch_list
=
fetch_list
,
return_numpy
=
False
)
# if the fetch_list still empty, fill the fetch_list by the operator output.
if
len
(
fetch_list
)
==
0
:
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
fetch_list
.
append
(
str
(
out_name
))
# fetch_list = map(block.var, fetch_list)
if
not
isinstance
(
fetch_list
[
0
],
Variable
):
fetch_list
=
map
(
block
.
var
,
fetch_list
)
outs
=
executor
.
run
(
program
,
feed
=
feed_map
,
fetch_list
=
fetch_list
,
return_numpy
=
False
)
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
):
...
...
@@ -346,17 +311,19 @@ class OpTest(unittest.TestCase):
"Output ("
+
out_name
+
") has different lod at "
+
str
(
place
))
def
check_output
(
self
,
atol
=
1e-5
):
places
=
[
core
.
CPUPlace
()]
def
_get_places
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
)
def
check_output_customized
(
self
,
checker
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
=
self
.
_get_places
()
for
place
in
places
:
outs
=
self
.
calc_output
(
place
)
outs
=
[
np
.
array
(
out
)
for
out
in
outs
]
...
...
@@ -389,9 +356,7 @@ class OpTest(unittest.TestCase):
in_place
=
False
,
max_relative_error
=
0.005
,
user_defined_grads
=
None
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_grad_with_place
(
place
,
inputs_to_check
,
output_names
,
no_grad_set
,
numeric_grad_delta
,
...
...
@@ -438,35 +403,6 @@ class OpTest(unittest.TestCase):
max_relative_error
,
"Gradient Check On %s"
%
str
(
place
))
@
staticmethod
def
_create_var_descs_
(
block
,
var_dict
):
# FIXME: Try unify with `append_input_output`
for
param_name
in
var_dict
:
var
=
var_dict
[
param_name
]
if
not
isinstance
(
var
,
list
)
and
not
isinstance
(
var
,
tuple
):
var
=
[(
param_name
,
var
,
None
)]
if
not
isinstance
(
var
[
0
],
list
)
and
not
isinstance
(
var
[
0
],
tuple
):
var
=
[(
param_name
,
var
[
0
],
var
[
1
])]
for
i
,
item
in
enumerate
(
var
):
if
not
isinstance
(
item
[
0
],
basestring
):
item
=
[[
param_name
]
+
list
(
item
)]
if
len
(
item
)
==
2
:
if
isinstance
(
item
[
1
],
tuple
):
var
[
i
]
=
[
item
[
0
],
item
[
1
][
0
],
item
[
1
][
1
]]
else
:
# only set var name and value, set lod to None
var
[
i
]
=
list
(
item
)
+
[
None
]
var_descs
=
[(
block
.
create_var
(
name
=
name
,
shape
=
each
.
shape
,
dtype
=
each
.
dtype
),
each
,
lod
)
for
name
,
each
,
lod
in
var
]
yield
param_name
,
var_descs
@
staticmethod
def
_merge_list
(
iterable
):
return
reduce
(
lambda
a
,
b
:
list
(
a
)
+
list
(
b
),
iterable
,
[])
@
staticmethod
def
_numpy_to_lod_tensor
(
np_value
,
lod
,
place
):
tensor
=
core
.
LoDTensor
()
...
...
@@ -497,83 +433,31 @@ class OpTest(unittest.TestCase):
input
.
dtype
=
np
.
uint16
return
input
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
):
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
,
parallel
=
False
):
prog
=
Program
()
block
=
prog
.
global_block
()
inputs_with_np
=
{
key
:
value
for
(
key
,
value
)
in
OpTest
.
_create_var_descs_
(
block
,
getattr
(
self
,
'inputs'
,
{}))
}
outputs_with_np
=
{
key
:
val
for
(
key
,
val
)
in
OpTest
.
_create_var_descs_
(
block
,
getattr
(
self
,
'outputs'
,
{}))
}
inputs
=
{
k
:
[
item
[
0
]
for
item
in
inputs_with_np
[
k
]]
for
k
in
inputs_with_np
}
outputs
=
{
k
:
[
item
[
0
]
for
item
in
outputs_with_np
[
k
]]
for
k
in
outputs_with_np
}
op
=
block
.
append_op
(
type
=
self
.
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
getattr
(
self
,
'attrs'
,
{}))
# infer variable type and infer shape in compile-time
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
mean_inputs
=
map
(
block
.
var
,
output_names
)
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
mean_inputs
},
outputs
=
{
"Out"
:
loss
},
type
=
'mean'
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
else
:
avg_sum
=
[]
for
cur_loss
in
mean_inputs
:
cur_avg_loss
=
block
.
create_var
(
dtype
=
cur_loss
.
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
[
cur_loss
]},
outputs
=
{
"Out"
:
[
cur_avg_loss
]},
type
=
"mean"
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
avg_sum
.
append
(
cur_avg_loss
)
loss_sum
=
block
.
create_var
(
dtype
=
avg_sum
[
0
].
dtype
,
shape
=
[
1
])
op_sum
=
block
.
append_op
(
inputs
=
{
"X"
:
avg_sum
},
outputs
=
{
"Out"
:
loss_sum
},
type
=
'sum'
)
op_sum
.
desc
.
infer_var_type
(
block
.
desc
)
op_sum
.
desc
.
infer_shape
(
block
.
desc
)
loss
=
block
.
create_var
(
dtype
=
loss_sum
.
dtype
,
shape
=
[
1
])
op_loss
=
block
.
append_op
(
inputs
=
{
"X"
:
loss_sum
},
outputs
=
{
"Out"
:
loss
},
type
=
'scale'
,
attrs
=
{
'scale'
:
1.0
/
float
(
len
(
avg_sum
))})
op_loss
.
desc
.
infer_var_type
(
block
.
desc
)
op_loss
.
desc
.
infer_shape
(
block
.
desc
)
self
.
_append_ops
(
block
)
loss
=
append_loss_ops
(
block
,
output_names
)
param_grad_list
=
append_backward
(
loss
=
loss
,
parameter_list
=
input_to_check
,
no_grad_set
=
no_grad_set
)
feed_dict
=
{
item
[
0
].
name
:
OpTest
.
_numpy_to_lod_tensor
(
item
[
1
],
item
[
2
],
place
)
for
p_name
in
inputs_with_np
for
item
in
inputs_with_np
[
p_name
]
}
inputs
=
self
.
_get_inputs
(
block
)
feed_dict
=
self
.
feed_var
(
inputs
,
place
)
fetch_list
=
[
g
for
p
,
g
in
param_grad_list
]
executor
=
Executor
(
place
)
if
parallel
:
use_cuda
=
False
if
isinstance
(
place
,
fluid
.
CUDAPlace
(
0
)):
use_cuda
=
True
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
else
:
executor
=
Executor
(
place
)
return
map
(
np
.
array
,
executor
.
run
(
prog
,
feed_dict
,
fetch_list
,
return_numpy
=
False
))
python/paddle/fluid/tests/unittests/test_lstm_op.py
浏览文件 @
bfd42683
...
...
@@ -194,107 +194,104 @@ class TestLstmOp(OpTest):
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'H0'
))
def
test_check_grad_ingore_c0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'C0'
))
class
TestLstmOpRerverse
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
class
TestLstmOpNotUsePeepholes
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
# class TestLstmOpHasInitial(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = True
# self.is_reverse = True
# self.use_peepholes = True
# def test_check_grad(self):
# # TODO(qingqing) remove folowing lines after the check_grad is refined.
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'H0', 'C0'], ['Hidden'],
# max_relative_error=5e-4)
# def test_check_grad_ingore_bias(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Bias'))
# def test_check_grad_ingore_weight(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Bias'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Weight'))
# def test_check_grad_ingore_input(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Weight', 'Bias'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Input'))
# def test_check_grad_ingore_h0(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'C0'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('H0'))
# def test_check_grad_ingore_c0(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'H0'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('C0'))
# class TestLstmOpRerverse(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = False
# self.is_reverse = True
# self.use_peepholes = True
# class TestLstmOpNotUsePeepholes(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = False
# self.is_reverse = True
# self.use_peepholes = False
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/testsuite.py
0 → 100644
浏览文件 @
bfd42683
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
def
as_lodtensor
(
np_array
,
lod
,
place
):
tensor
=
core
.
LoDTensor
()
tensor
.
set
(
np_value
,
place
)
if
lod
is
not
None
:
tensor
.
set_lod
(
lod
)
return
tensor
def
create_op
(
scope
,
op_type
,
inputs
,
outputs
,
attrs
):
kwargs
=
dict
()
op_maker
=
core
.
op_proto_and_checker_maker
op_role_attr_name
=
op_maker
.
kOpRoleAttrName
()
if
op_role_attr_name
not
in
attrs
:
attrs
[
op_role_attr_name
]
=
int
(
op_maker
.
OpRole
.
Forward
)
def
__create_var__
(
name
,
var_name
):
scope
.
var
(
var_name
).
get_tensor
()
kwargs
[
name
].
append
(
var_name
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op_type
):
if
in_name
in
inputs
:
kwargs
[
in_name
]
=
[]
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
in_name
,
sub_in_name
)
else
:
__create_var__
(
in_name
,
in_name
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
op_type
):
if
out_name
in
outputs
:
kwargs
[
out_name
]
=
[]
if
out_dup
:
sub_out
=
outputs
[
out_name
]
for
item
in
sub_out
:
sub_out_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
out_name
,
sub_out_name
)
else
:
__create_var__
(
out_name
,
out_name
)
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
def
set_input
(
scope
,
op
,
inputs
,
place
):
def
__set_input__
(
var_name
,
var
):
if
isinstance
(
var
,
tuple
)
or
isinstance
(
var
,
np
.
ndarray
):
tensor
=
scope
.
find_var
(
var_name
).
get_tensor
()
if
isinstance
(
var
,
tuple
):
tensor
.
set_lod
(
var
[
1
])
var
=
var
[
0
]
tensor
.
set_dims
(
var
.
shape
)
tensor
.
set
(
var
,
place
)
elif
isinstance
(
var
,
float
):
scope
.
find_var
(
var_name
).
set_float
(
var
)
elif
isinstance
(
var
,
int
):
scope
.
find_var
(
var_name
).
set_int
(
var
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op
.
type
()):
if
in_name
in
inputs
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
sub_in_val
=
item
[
0
],
item
[
1
]
__set_input__
(
sub_in_name
,
sub_in_val
)
else
:
__set_input__
(
in_name
,
inputs
[
in_name
])
def
append_input_output
(
block
,
op_proto
,
np_list
,
is_input
,
dtype
):
'''Insert VarDesc and generate Python variable instance'''
proto_list
=
op_proto
.
inputs
if
is_input
else
op_proto
.
outputs
def
create_var
(
block
,
name
,
np_list
,
var_proto
):
dtype
=
None
shape
=
None
lod_level
=
None
if
name
not
in
np_list
:
assert
var_proto
.
intermediate
,
"{} not found"
.
format
(
name
)
else
:
np_value
=
np_list
[
name
]
if
isinstance
(
np_value
,
tuple
):
dtype
=
np_value
[
0
].
dtype
# output shape, lod should be infered from input.
if
is_input
:
shape
=
list
(
np_value
[
0
].
shape
)
lod_level
=
len
(
np_value
[
1
])
else
:
dtype
=
np_value
.
dtype
if
is_input
:
shape
=
list
(
np_value
.
shape
)
lod_level
=
0
return
block
.
create_var
(
dtype
=
dtype
,
shape
=
shape
,
lod_level
=
lod_level
,
name
=
name
)
var_dict
=
{}
for
var_proto
in
proto_list
:
var_name
=
str
(
var_proto
.
name
)
if
is_input
:
if
(
var_name
not
in
np_list
)
and
var_proto
.
dispensable
:
continue
assert
(
var_name
in
np_list
)
or
(
var_proto
.
dispensable
),
\
"Missing {} as input"
.
format
(
var_name
)
if
var_proto
.
duplicable
:
assert
isinstance
(
np_list
[
var_name
],
list
),
\
"Duplicable {} should be set as list"
.
format
(
var_name
)
var_list
=
[]
for
(
name
,
np_value
)
in
np_list
[
var_name
]:
var_list
.
append
(
create_var
(
block
,
name
,
{
name
:
np_value
},
var_proto
))
var_dict
[
var_name
]
=
var_list
else
:
var_dict
[
var_name
]
=
create_var
(
block
,
var_name
,
np_list
,
var_proto
)
return
var_dict
def
append_loss_ops
(
block
,
output_names
):
mean_inputs
=
map
(
block
.
var
,
output_names
)
# for item in mean_inputs:
# print(item)
# print("Item", item.dtype)
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
mean_inputs
},
outputs
=
{
"Out"
:
loss
},
type
=
'mean'
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
else
:
avg_sum
=
[]
for
cur_loss
in
mean_inputs
:
cur_avg_loss
=
block
.
create_var
(
dtype
=
cur_loss
.
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
[
cur_loss
]},
outputs
=
{
"Out"
:
[
cur_avg_loss
]},
type
=
"mean"
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
avg_sum
.
append
(
cur_avg_loss
)
loss_sum
=
block
.
create_var
(
dtype
=
avg_sum
[
0
].
dtype
,
shape
=
[
1
])
op_sum
=
block
.
append_op
(
inputs
=
{
"X"
:
avg_sum
},
outputs
=
{
"Out"
:
loss_sum
},
type
=
'sum'
)
op_sum
.
desc
.
infer_var_type
(
block
.
desc
)
op_sum
.
desc
.
infer_shape
(
block
.
desc
)
loss
=
block
.
create_var
(
dtype
=
loss_sum
.
dtype
,
shape
=
[
1
])
op_loss
=
block
.
append_op
(
inputs
=
{
"X"
:
loss_sum
},
outputs
=
{
"Out"
:
loss
},
type
=
'scale'
,
attrs
=
{
'scale'
:
1.0
/
float
(
len
(
avg_sum
))})
op_loss
.
desc
.
infer_var_type
(
block
.
desc
)
op_loss
.
desc
.
infer_shape
(
block
.
desc
)
return
loss
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录