Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
24649a78
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
24649a78
编写于
6月 07, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into update_crop_op
上级
9c61409a
d48172f2
变更
38
显示空白变更内容
内联
并排
Showing
38 changed file
with
1764 addition
and
829 deletion
+1764
-829
benchmark/fluid/Dockerfile
benchmark/fluid/Dockerfile
+1
-1
benchmark/fluid/README.md
benchmark/fluid/README.md
+10
-0
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+87
-28
benchmark/fluid/models/machine_translation.py
benchmark/fluid/models/machine_translation.py
+3
-1
benchmark/fluid/models/mnist.py
benchmark/fluid/models/mnist.py
+20
-4
benchmark/fluid/models/resnet.py
benchmark/fluid/models/resnet.py
+44
-15
benchmark/fluid/models/stacked_dynamic_lstm.py
benchmark/fluid/models/stacked_dynamic_lstm.py
+4
-1
benchmark/fluid/models/vgg.py
benchmark/fluid/models/vgg.py
+20
-4
benchmark/fluid/recordio_converter.py
benchmark/fluid/recordio_converter.py
+164
-0
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+6
-6
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+4
-2
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+0
-2
paddle/fluid/operators/reduce_max_op.cc
paddle/fluid/operators/reduce_max_op.cc
+34
-0
paddle/fluid/operators/reduce_max_op.cu
paddle/fluid/operators/reduce_max_op.cu
+34
-0
paddle/fluid/operators/reduce_mean_op.cc
paddle/fluid/operators/reduce_mean_op.cc
+35
-0
paddle/fluid/operators/reduce_mean_op.cu
paddle/fluid/operators/reduce_mean_op.cu
+34
-0
paddle/fluid/operators/reduce_mean_op.h
paddle/fluid/operators/reduce_mean_op.h
+39
-0
paddle/fluid/operators/reduce_min_max_op.h
paddle/fluid/operators/reduce_min_max_op.h
+50
-0
paddle/fluid/operators/reduce_min_op.cc
paddle/fluid/operators/reduce_min_op.cc
+34
-0
paddle/fluid/operators/reduce_min_op.cu
paddle/fluid/operators/reduce_min_op.cu
+34
-0
paddle/fluid/operators/reduce_op.cc
paddle/fluid/operators/reduce_op.cc
+0
-186
paddle/fluid/operators/reduce_op.cu
paddle/fluid/operators/reduce_op.cu
+0
-41
paddle/fluid/operators/reduce_op.h
paddle/fluid/operators/reduce_op.h
+165
-187
paddle/fluid/operators/reduce_op_function.h
paddle/fluid/operators/reduce_op_function.h
+109
-0
paddle/fluid/operators/reduce_prod_op.cc
paddle/fluid/operators/reduce_prod_op.cc
+35
-0
paddle/fluid/operators/reduce_prod_op.cu
paddle/fluid/operators/reduce_prod_op.cu
+34
-0
paddle/fluid/operators/reduce_prod_op.h
paddle/fluid/operators/reduce_prod_op.h
+39
-0
paddle/fluid/operators/reduce_sum_op.cc
paddle/fluid/operators/reduce_sum_op.cc
+34
-0
paddle/fluid/operators/reduce_sum_op.cu
paddle/fluid/operators/reduce_sum_op.cu
+34
-0
paddle/fluid/operators/reduce_sum_op.h
paddle/fluid/operators/reduce_sum_op.h
+39
-0
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+2
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+16
-9
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+1
-1
python/paddle/fluid/tests/unittests/benchmark.py
python/paddle/fluid/tests/unittests/benchmark.py
+113
-0
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
+82
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+124
-240
python/paddle/fluid/tests/unittests/test_lstm_op.py
python/paddle/fluid/tests/unittests/test_lstm_op.py
+98
-101
python/paddle/fluid/tests/unittests/testsuite.py
python/paddle/fluid/tests/unittests/testsuite.py
+182
-0
未找到文件。
benchmark/fluid/Dockerfile
浏览文件 @
24649a78
...
...
@@ -19,4 +19,4 @@ ADD *.whl /
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
&&
chmod
+x /usr/bin/paddle_k8s
ENV
LD_LIBRARY_PATH=/usr/local/lib
ADD
fluid_benchmark.py
dataset
.py models/ /workspace/
ADD
fluid_benchmark.py
recordio_converter
.py models/ /workspace/
benchmark/fluid/README.md
浏览文件 @
24649a78
...
...
@@ -44,6 +44,16 @@ Currently supported `--model` argument include:
PADDLE_PSERVER_PORT
=
7164
PADDLE_TRAINER_IPS
=
192.168.0.2,192.168.0.3
PADDLE_CURRENT_IP
=
127.0.0.1
PADDLE_TRAINER_ID
=
0 python fluid_benchmark.py
--model
mnist
--device
GPU
--update_method
nccl2
```
## Prepare the RecordIO file to Achieve Better Performance
Run the following command will generate RecordIO files like "mnist.recordio" under the path
and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
at any time using
`fluid.batch`
.
```
bash
python
-c
'from recordio_converter import *; prepare_mnist("data", 1)'
```
## Run Distributed Benchmark on Kubernetes Cluster
You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will
...
...
benchmark/fluid/fluid_benchmark.py
浏览文件 @
24649a78
...
...
@@ -38,10 +38,12 @@ def parse_args():
default
=
'resnet'
,
help
=
'The model to run benchmark with.'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size.'
)
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The batch size on each gpu.'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.001
,
help
=
'The learning rate.'
)
# TODO(wuyi): add "--use_fake_data" option back.
parser
.
add_argument
(
'--skip_batch_num'
,
type
=
int
,
...
...
@@ -49,7 +51,10 @@ def parse_args():
help
=
'The first num of minibatch num to skip, for better performance test'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches.'
)
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches, set to -1 to run all batches.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
100
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
...
...
@@ -69,6 +74,7 @@ def parse_args():
type
=
int
,
default
=
1
,
help
=
'If gpus > 1, will use ParallelExecutor to run, else use Executor.'
)
# this option is available only for vgg and resnet.
parser
.
add_argument
(
'--cpus'
,
type
=
int
,
...
...
@@ -78,7 +84,7 @@ def parse_args():
'--data_set'
,
type
=
str
,
default
=
'flowers'
,
choices
=
[
'cifar10'
,
'flowers'
],
choices
=
[
'cifar10'
,
'flowers'
,
'imagenet'
],
help
=
'Optional dataset for benchmark.'
)
parser
.
add_argument
(
'--infer_only'
,
action
=
'store_true'
,
help
=
'If set, run forward only.'
)
...
...
@@ -108,6 +114,16 @@ def parse_args():
default
=
'local'
,
choices
=
[
'local'
,
'pserver'
,
'nccl2'
],
help
=
'Choose parameter update method, can be local, pserver, nccl2.'
)
parser
.
add_argument
(
'--use_reader_op'
,
action
=
'store_true'
,
help
=
'Whether to use reader op, and must specify the data path if set this to true.'
)
parser
.
add_argument
(
'--data_path'
,
type
=
str
,
default
=
""
,
help
=
'Directory that contains all the training recordio files.'
)
args
=
parser
.
parse_args
()
return
args
...
...
@@ -210,6 +226,8 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
if
not
args
.
use_reader_op
:
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
...
...
@@ -219,16 +237,38 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
iters
,
num_samples
,
start_time
=
0
,
0
,
time
.
time
()
for
pass_id
in
range
(
args
.
pass_num
):
train_losses
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
not
args
.
use_reader_op
:
reader_generator
=
train_reader
()
batch_id
=
0
data
=
None
while
True
:
if
not
args
.
use_reader_op
:
data
=
next
(
reader_generator
,
None
)
if
data
==
None
:
break
if
iters
==
args
.
iterations
:
break
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
if
args
.
use_reader_op
:
try
:
loss
=
exe
.
run
(
train_prog
,
fetch_list
=
[
avg_loss
])
except
fluid
.
core
.
EnforceNotMet
as
ex
:
break
else
:
loss
=
exe
.
run
(
train_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_loss
])
iters
+=
1
batch_id
+=
1
# FIXME(wuyi): For use_reader_op, if the current
# pass is not the last, the last batch of this pass
# is also equal to args.batch_size.
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
num_samples
+=
len
(
data
)
train_losses
.
append
(
loss
)
print
(
"Pass: %d, Iter: %d, Loss: %f
\n
"
%
...
...
@@ -250,10 +290,14 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
def
train_parallel
(
avg_loss
,
infer_prog
,
optimizer
,
train_reader
,
test_reader
,
batch_acc
,
args
,
train_prog
,
startup_prog
,
nccl_id_var
,
num_trainers
,
trainer_id
):
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
if
not
args
.
use_reader_op
:
feed_var_list
=
[
var
for
var
in
train_prog
.
global_block
().
vars
.
itervalues
()
if
var
.
is_data
]
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
# generate fake:
if
args
.
use_fake_data
:
for
var
in
feed_var_list
:
...
...
@@ -270,7 +314,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
"value"
:
1.0
,
"dtype"
:
var
.
dtype
})
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
if
nccl_id_var
and
trainer_id
==
0
:
#FIXME(wuyi): wait other trainer to start listening
time
.
sleep
(
30
)
...
...
@@ -287,12 +330,21 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
num_trainers
=
num_trainers
,
trainer_id
=
trainer_id
)
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
for
pass_id
in
range
(
args
.
pass_num
):
num_samples
=
0
iters
=
0
start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
not
args
.
use_reader_op
:
reader_generator
=
train_reader
()
batch_id
=
0
data
=
None
while
True
:
if
not
args
.
use_reader_op
:
data
=
next
(
reader_generator
,
None
)
if
data
==
None
:
break
if
iters
==
args
.
iterations
:
break
if
args
.
profile
and
pass_id
==
0
and
batch_id
==
5
:
profiler
.
start_profiler
(
"All"
)
elif
args
.
profile
and
pass_id
==
0
and
batch_id
==
10
:
...
...
@@ -301,19 +353,26 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
break
if
args
.
use_fake_data
:
if
args
.
use_fake_data
or
args
.
use_reader_op
:
try
:
loss
,
=
exe
.
run
([
avg_loss
.
name
])
except
fluid
.
core
.
EnforceNotMet
as
ex
:
break
else
:
loss
,
=
exe
.
run
([
avg_loss
.
name
],
feed
=
feeder
.
feed
(
data
))
if
args
.
update_method
==
"pserver"
:
exe
.
bcast_params
()
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
num_samples
+=
len
(
data
)
iters
+=
1
if
batch_id
%
1
==
0
:
print
(
"Pass %d, batch %d, loss %s"
%
(
pass_id
,
batch_id
,
np
.
array
(
loss
)))
batch_id
+=
1
if
args
.
use_reader_op
:
num_samples
=
num_samples
*
args
.
gpus
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
if
not
args
.
no_test
and
batch_acc
:
test_acc
=
test
(
startup_exe
,
infer_prog
,
test_reader
,
feeder
,
...
...
benchmark/fluid/models/machine_translation.py
浏览文件 @
24649a78
...
...
@@ -197,6 +197,8 @@ def lodtensor_to_ndarray(lod_tensor):
def
get_model
(
args
):
if
args
.
use_reader_op
:
raise
Exception
(
"machine_translation do not support reader op for now."
)
embedding_dim
=
512
encoder_size
=
512
decoder_size
=
512
...
...
@@ -221,7 +223,7 @@ def get_model(args):
train_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
dict_size
),
buf_size
=
1000
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
...
...
benchmark/fluid/models/mnist.py
浏览文件 @
24649a78
...
...
@@ -20,6 +20,7 @@ import numpy as np
import
argparse
import
time
import
cProfile
import
os
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -65,7 +66,22 @@ def cnn_model(data):
def
get_model
(
args
):
# Input data
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
,
1
,
28
,
28
],
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
images
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
...
...
@@ -103,7 +119,7 @@ def get_model(args):
# Reader
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
args
.
batch_size
)
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
opt
,
train_reader
,
test_reader
,
batch_acc
benchmark/fluid/models/resnet.py
浏览文件 @
24649a78
...
...
@@ -19,6 +19,7 @@ from __future__ import print_function
import
functools
import
numpy
as
np
import
time
import
os
import
cProfile
,
pstats
,
StringIO
...
...
@@ -26,6 +27,7 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.profiler
as
profiler
from
recordio_converter
import
imagenet_train
,
imagenet_test
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
...
...
@@ -122,14 +124,46 @@ def get_model(args):
else
:
dshape
=
[
32
,
32
,
3
]
model
=
resnet_cifar10
else
:
train_reader
=
paddle
.
dataset
.
cifar
.
train10
()
test_reader
=
paddle
.
dataset
.
cifar
.
test10
()
elif
args
.
data_set
==
"flowers"
:
class_dim
=
102
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
224
,
224
]
else
:
dshape
=
[
224
,
224
,
3
]
model
=
resnet_imagenet
train_reader
=
paddle
.
dataset
.
flowers
.
train
()
test_reader
=
paddle
.
dataset
.
flowers
.
test
()
elif
args
.
data_set
==
"imagenet"
:
class_dim
=
1000
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
224
,
224
]
else
:
dshape
=
[
224
,
224
,
3
]
model
=
resnet_imagenet
if
not
args
.
data_path
:
raise
Exception
(
"Must specify --data_path when training with imagenet"
)
train_reader
=
imagenet_train
(
args
.
data_path
)
test_reader
=
imagenet_test
(
args
.
data_path
)
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
]
+
dshape
,
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
input
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
input
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
...
...
@@ -162,15 +196,10 @@ def get_model(args):
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
train_reader
=
paddle
.
batch
(
batched_
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
optimizer
,
train_reader
,
test_reader
,
batch_acc
train_reader
,
buf_size
=
5120
),
batch_size
=
args
.
batch_size
*
args
.
gpus
)
batched_test_reader
=
paddle
.
batch
(
train_reader
,
batch_size
=
args
.
batch_size
)
return
avg_cost
,
inference_program
,
optimizer
,
batched_train_reader
,
batched_test_reader
,
batch_acc
benchmark/fluid/models/stacked_dynamic_lstm.py
浏览文件 @
24649a78
...
...
@@ -44,6 +44,9 @@ def crop_sentence(reader, crop_size):
def
get_model
(
args
):
if
args
.
use_reader_op
:
raise
Exception
(
"stacked_dynamic_lstm do not support reader op for now."
)
lstm_size
=
512
emb_dim
=
512
crop_size
=
1500
...
...
@@ -114,7 +117,7 @@ def get_model(args):
train_reader
=
batch
(
paddle
.
reader
.
shuffle
(
crop_sentence
(
imdb
.
train
(
word_dict
),
crop_size
),
buf_size
=
25000
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
batch
(
paddle
.
reader
.
shuffle
(
crop_sentence
(
imdb
.
test
(
word_dict
),
crop_size
),
buf_size
=
25000
),
...
...
benchmark/fluid/models/vgg.py
浏览文件 @
24649a78
...
...
@@ -22,6 +22,7 @@ import paddle.fluid as fluid
import
paddle.fluid.core
as
core
import
argparse
import
functools
import
os
def
vgg16_bn_drop
(
input
):
...
...
@@ -65,8 +66,23 @@ def get_model(args):
else
:
data_shape
=
[
224
,
224
,
3
]
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
if
args
.
use_reader_op
:
filelist
=
[
os
.
path
.
join
(
args
.
data_path
,
f
)
for
f
in
os
.
listdir
(
args
.
data_path
)
]
data_file
=
fluid
.
layers
.
open_files
(
filenames
=
filelist
,
shapes
=
[[
-
1
]
+
data_shape
,
(
-
1
,
1
)],
lod_levels
=
[
0
,
0
],
dtypes
=
[
"float32"
,
"int64"
],
thread_num
=
args
.
gpus
,
pass_num
=
args
.
pass_num
)
data_file
=
fluid
.
layers
.
double_buffer
(
fluid
.
layers
.
batch
(
data_file
,
batch_size
=
args
.
batch_size
))
images
,
label
=
fluid
.
layers
.
read_file
(
data_file
)
else
:
images
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
...
...
@@ -95,7 +111,7 @@ def get_model(args):
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
batch_size
=
args
.
batch_size
*
args
.
gpus
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
...
...
benchmark/fluid/recordio_converter.py
0 → 100644
浏览文件 @
24649a78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
random
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.dataset
import
mnist
,
cifar
,
flowers
,
image
def
convert_2_recordio
(
py_reader
,
outfilepath
,
batch_size
,
shape_data
,
shape_label
):
num_batches
=
0
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
reader
=
paddle
.
batch
(
py_reader
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
# order is image and label
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
shape_data
),
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
shape_label
,
dtype
=
'int64'
),
],
place
=
fluid
.
CPUPlace
())
num_batches
=
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
outfilepath
,
reader
,
feeder
)
return
num_batches
def
prepare_mnist
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"mnist.recordio"
)
convert_2_recordio
(
mnist
.
train
,
outfilepath
,
batch_size
,
[
784
],
[
1
])
def
prepare_cifar10
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"cifar.recordio"
)
convert_2_recordio
(
cifar
.
train10
,
outfilepath
,
batch_size
,
[
3
,
32
,
32
],
[
1
])
def
prepare_flowers
(
outpath
,
batch_size
):
outfilepath
=
os
.
path
.
join
(
outpath
,
"flowers.recordio"
)
convert_2_recordio
(
flowers
.
train
,
outfilepath
,
batch_size
,
[
3
,
224
,
224
],
[
1
])
def
default_mapper
(
sample
):
img
,
label
=
sample
img
=
image
.
simple_transform
(
img
,
256
,
224
,
True
,
mean
=
[
103.94
,
116.78
,
123.68
])
return
img
.
flatten
().
astype
(
'float32'
),
label
def
imagenet_train
(
data_dir
):
contents
=
os
.
listdir
(
data_dir
)
if
set
(
contents
)
!=
set
(
[
"train"
,
"train.txt"
,
"val"
,
"val_set"
,
"val.txt"
,
"unzip.sh"
]):
raise
Exception
(
"Imagenet data contents error!"
)
img2label
=
dict
()
imgfilelist
=
[]
with
open
(
os
.
path
.
join
(
data_dir
,
"train.txt"
))
as
fn
:
while
1
:
l
=
fn
.
readline
()
if
not
l
:
break
img
,
lbl
=
l
[:
-
1
].
split
(
" "
)
img2label
[
img
]
=
int
(
lbl
)
imgfilelist
.
append
(
img
)
# shuffle all, this is slow
random
.
shuffle
(
imgfilelist
)
def
train_reader
():
for
idx
,
imgfile
in
enumerate
(
imgfilelist
):
data
=
image
.
load_image
(
os
.
path
.
join
(
data_dir
,
"train"
,
imgfile
.
lower
()))
label
=
[
img2label
[
imgfile
],
]
yield
[
data
,
label
]
return
paddle
.
reader
.
map_readers
(
default_mapper
,
train_reader
)
def
imagenet_test
(
data_dir
):
contents
=
os
.
listdir
(
data_dir
)
if
set
(
contents
)
!=
set
(
[
"train"
,
"train.txt"
,
"val"
,
"val_set"
,
"val.txt"
,
"unzip.sh"
]):
raise
Exception
(
"Imagenet data contents error!"
)
img2label
=
dict
()
imgfilelist
=
[]
with
open
(
os
.
path
.
join
(
data_dir
,
"val.txt"
))
as
fn
:
while
1
:
l
=
fn
.
readline
()
if
not
l
:
break
img
,
lbl
=
l
[:
-
1
].
split
(
" "
)
img2label
[
img
]
=
int
(
lbl
)
imgfilelist
.
append
(
img
)
def
test_reader
():
for
idx
,
imgfile
in
enumerate
(
imgfilelist
):
base_path
=
os
.
path
.
join
(
data_dir
,
"val"
,
imgfile
.
split
(
"."
)[
0
])
image_path
=
"."
.
join
([
base_path
,
"jpeg"
])
data
=
image
.
load_image
(
image_path
)
label
=
[
img2label
[
imgfile
],
]
yield
[
data
,
label
]
return
paddle
.
reader
.
map_readers
(
default_mapper
,
test_reader
)
# FIXME(wuyi): delete this when https://github.com/PaddlePaddle/Paddle/pull/11066 is merged
def
convert_reader_to_recordio_files
(
filename
,
batch_per_file
,
reader_creator
,
feeder
,
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
f_name
,
f_ext
=
os
.
path
.
splitext
(
filename
)
assert
(
f_ext
==
".recordio"
)
lines
=
[]
f_idx
=
0
counter
=
0
for
idx
,
batch
in
enumerate
(
reader_creator
()):
lines
.
append
(
batch
)
if
idx
>=
batch_per_file
and
idx
%
batch_per_file
==
0
:
filename
=
"%s-%05d%s"
%
(
f_name
,
f_idx
,
f_ext
)
with
fluid
.
recordio_writer
.
create_recordio_writer
(
filename
,
compressor
,
max_num_records
)
as
writer
:
for
l
in
lines
:
res
=
feeder
.
feed
(
l
)
for
each
in
feed_order
:
writer
.
append_tensor
(
res
[
each
])
writer
.
complete_append_tensor
()
counter
+=
1
lines
=
[]
f_idx
+=
1
print
(
"written file: "
,
filename
)
return
counter
def
prepare_imagenet
(
inpath
,
outpath
,
batch_size
):
r
=
paddle
.
batch
(
imagenet_train
(
inpath
),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
fluid
.
layers
.
data
(
name
=
"image"
,
shape
=
[
3
,
224
,
224
]),
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
],
place
=
fluid
.
CPUPlace
())
outpath
=
os
.
path
.
join
(
outpath
,
"imagenet.recordio"
)
convert_reader_to_recordio_files
(
outpath
,
10000
,
r
,
feeder
)
paddle/fluid/framework/op_registry.h
浏览文件 @
24649a78
...
...
@@ -156,15 +156,15 @@ class OpKernelRegistrar : public Registrar {
/**
* Macro to register OperatorKernel.
*/
#define REGISTER_OP_KERNEL(op_type,
LIBRARY_TYPE
, place_class, ...) \
#define REGISTER_OP_KERNEL(op_type,
library_type
, place_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##op_type##_##
LIBRARY_TYPE
##__, \
__reg_op_kernel_##op_type##_##
library_type
##__, \
"REGISTER_OP_KERNEL must be called in global namespace"); \
static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
__op_kernel_registrar_##op_type##_##
LIBRARY_TYPE
##__(#op_type, \
#
LIBRARY_TYPE
); \
int TouchOpKernelRegistrar_##op_type##_##
LIBRARY_TYPE
() { \
__op_kernel_registrar_##op_type##_##
LIBRARY_TYPE
##__.Touch(); \
__op_kernel_registrar_##op_type##_##
library_type
##__(#op_type, \
#
library_type
); \
int TouchOpKernelRegistrar_##op_type##_##
library_type
() { \
__op_kernel_registrar_##op_type##_##
library_type
##__.Touch(); \
return 0; \
}
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
24649a78
...
...
@@ -693,8 +693,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
}
if
(
t
!=
nullptr
)
{
int
tmp
=
static_cast
<
int
>
(
ToDataType
(
t
->
type
()));
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same."
,
Type
());
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same. Get %d != %d"
,
Type
(),
data_type
,
tmp
);
data_type
=
tmp
;
}
}
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
24649a78
...
...
@@ -166,8 +166,6 @@ function(op_library TARGET)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if
(
${
TARGET
}
STREQUAL
"activation"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(relu);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"reduce"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(reduce_sum);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"fake_dequantize"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(fake_dequantize_max_abs);
\n
"
)
else
()
...
...
paddle/fluid/operators/reduce_max_op.cc
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_REDUCE_OP
(
reduce_max
);
REGISTER_OP_CPU_KERNEL
(
reduce_max
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MaxFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_max_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_max_op.cu
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_max
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_max_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_mean_op.cc
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_mean_op.h"
REGISTER_REDUCE_OP
(
reduce_mean
);
REGISTER_OP_CPU_KERNEL
(
reduce_mean
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MeanFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MeanGradFunctor
>
);
paddle/fluid/operators/reduce_mean_op.cu
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_mean_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_mean
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MeanFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MeanGradFunctor
>
);
paddle/fluid/operators/reduce_mean_op.h
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_op.h"
namespace
paddle
{
namespace
operators
{
struct
MeanFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
mean
(
dim
);
}
};
struct
MeanGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
/
dx
->
constant
(
size
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reduce_min_max_op.h
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_op.h"
namespace
paddle
{
namespace
operators
{
struct
MaxFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
maximum
(
dim
);
}
};
struct
MinFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
minimum
(
dim
);
}
};
struct
MaxOrMinGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
auto
equals
=
(
*
x
)
==
y
->
broadcast
(
dim
);
auto
ones
=
dx
->
constant
(
1
);
auto
zeros
=
dx
->
constant
(
0
);
// If there are multiple minimum or maximum elements, the subgradient of
// each is the set [0, 1], and we pass gradient to all of them here.
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
*
equals
.
select
(
ones
,
zeros
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reduce_min_op.cc
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_REDUCE_OP
(
reduce_min
);
REGISTER_OP_CPU_KERNEL
(
reduce_min
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MinFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_min_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_min_op.cu
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_min
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MinFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_min_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_op.cc
已删除
100644 → 0
浏览文件 @
9c61409a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/reduce_op.h"
#include <algorithm>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
ReduceOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of ReduceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ReduceOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_rank
=
x_dims
.
size
();
PADDLE_ENFORCE_LE
(
x_rank
,
6
,
"Tensors with rank at most 6 are supported."
);
auto
dims
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dim"
);
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
dims
[
i
]
=
x_rank
+
dims
[
i
];
PADDLE_ENFORCE_LT
(
dims
[
i
],
x_rank
,
"The dim should be in the range [-rank(input), rank(input))."
);
}
sort
(
dims
.
begin
(),
dims
.
end
());
bool
reduce_all
=
ctx
->
Attrs
().
Get
<
bool
>
(
"reduce_all"
);
bool
keep_dim
=
ctx
->
Attrs
().
Get
<
bool
>
(
"keep_dim"
);
if
(
reduce_all
)
{
if
(
keep_dim
)
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
(
x_rank
,
1
)));
else
ctx
->
SetOutputDim
(
"Out"
,
{
1
});
}
else
{
auto
dims_vector
=
vectorize
(
x_dims
);
if
(
keep_dim
)
{
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
dims_vector
[
dims
[
i
]]
=
1
;
}
}
else
{
const
int
kDelFlag
=
-
2
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
dims_vector
[
dims
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
}
auto
out_dims
=
framework
::
make_ddim
(
dims_vector
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
if
(
dims
[
0
]
!=
0
)
{
// Only pass LoD when not reducing on the first dim.
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
}
}
};
class
ReduceGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_rank
=
x_dims
.
size
();
PADDLE_ENFORCE_LE
(
x_rank
,
6
,
"Tensors with rank at most 6 are supported."
);
auto
dims
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dim"
);
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
dims
[
i
]
=
x_rank
+
dims
[
i
];
PADDLE_ENFORCE_LT
(
dims
[
i
],
x_rank
,
"The dim should be in the range [-rank(input), rank(input))."
);
}
sort
(
dims
.
begin
(),
dims
.
end
());
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
x_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
x_grad_name
);
}
}
};
class
ReduceOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
final
{
AddInput
(
"X"
,
"(Tensor) The input tensor. Tensors with rank at most 6 are "
"supported."
);
AddOutput
(
"Out"
,
"(Tensor) The result tensor."
);
AddAttr
<
std
::
vector
<
int
>>
(
"dim"
,
"(list<int>, default {0}) The dimensions to reduce. "
"Must be in the range [-rank(input), rank(input)). "
"If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. "
"Note that reducing on the first dim will make the LoD info lost."
)
.
SetDefault
({
0
});
AddAttr
<
bool
>
(
"keep_dim"
,
"(bool, default false) "
"If true, retain the reduced dimension with length 1."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"reduce_all"
,
"(bool, default false) "
"If true, output a scalar reduced along all dimensions."
)
.
SetDefault
(
false
);
AddComment
(
string
::
Sprintf
(
R"DOC(
%s Operator.
This operator computes the %s of input tensor along the given dimension.
The result tensor has 1 fewer dimension than the input unless keep_dim is true.
If reduce_all is true, just reduce along all dimensions and output a scalar.
)DOC"
,
GetOpType
(),
GetName
()));
}
protected:
virtual
std
::
string
GetName
()
const
=
0
;
virtual
std
::
string
GetOpType
()
const
=
0
;
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
#define REGISTER_REDUCE_OP(op_name) \
class __##op_name##Maker__ : public ops::ReduceOpMaker { \
protected: \
virtual std::string GetName() const { return #op_name; } \
virtual std::string GetOpType() const { return "Reduce " #op_name; } \
}; \
REGISTER_OPERATOR(reduce_##op_name, ops::ReduceOp, __##op_name##Maker__, \
paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(reduce_##op_name##_grad, ops::ReduceGradOp)
REGISTER_REDUCE_OP
(
sum
);
REGISTER_REDUCE_OP
(
mean
);
REGISTER_REDUCE_OP
(
max
);
REGISTER_REDUCE_OP
(
min
);
REGISTER_REDUCE_OP
(
prod
);
#define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL(reduce_type, \
ops::ReduceKernel<paddle::platform::CPUDeviceContext, \
float, ops::functor>, \
ops::ReduceKernel<paddle::platform::CPUDeviceContext, \
double, ops::functor>, \
ops::ReduceKernel<paddle::platform::CPUDeviceContext, \
int, ops::functor>, \
ops::ReduceKernel<paddle::platform::CPUDeviceContext, \
int64_t, ops::functor>); \
REGISTER_OP_CPU_KERNEL( \
reduce_type##_grad, \
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, float, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, double, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int64_t, \
ops::grad_functor>);
FOR_EACH_KERNEL_FUNCTOR
(
REGISTER_REDUCE_CPU_KERNEL
);
paddle/fluid/operators/reduce_op.cu
已删除
100644 → 0
浏览文件 @
9c61409a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/reduce_op.h"
namespace
ops
=
paddle
::
operators
;
#define REGISTER_REDUCE_GPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
reduce_type, ops::ReduceKernel<paddle::platform::CUDADeviceContext, \
float, ops::functor>, \
ops::ReduceKernel<paddle::platform::CUDADeviceContext, double, \
ops::functor>, \
ops::ReduceKernel<paddle::platform::CUDADeviceContext, int, \
ops::functor>, \
ops::ReduceKernel<paddle::platform::CUDADeviceContext, int64_t, \
ops::functor>); \
REGISTER_OP_CUDA_KERNEL( \
reduce_type##_grad, \
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, float, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int, \
ops::grad_functor>, \
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t, \
ops::grad_functor>);
FOR_EACH_KERNEL_FUNCTOR
(
REGISTER_REDUCE_GPU_KERNEL
);
paddle/fluid/operators/reduce_op.h
浏览文件 @
24649a78
...
...
@@ -14,105 +14,20 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/reduce_op_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
DDim
=
framework
::
DDim
;
template
<
typename
T
,
size_t
D
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenScalar
=
framework
::
EigenScalar
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
struct
SumFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
sum
(
dim
);
}
};
struct
SumGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
);
}
};
struct
MeanFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
mean
(
dim
);
}
};
struct
MeanGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
/
dx
->
constant
(
size
);
}
};
struct
MaxFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
maximum
(
dim
);
}
};
struct
MinFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
minimum
(
dim
);
}
};
struct
MaxOrMinGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
auto
equals
=
(
*
x
)
==
y
->
broadcast
(
dim
);
auto
ones
=
dx
->
constant
(
1
);
auto
zeros
=
dx
->
constant
(
0
);
// If there are multiple minimum or maximum elements, the subgradient of
// each is the set [0, 1], and we pass gradient to all of them here.
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
*
equals
.
select
(
ones
,
zeros
);
}
};
struct
ProdFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
prod
(
dim
);
}
};
struct
ProdGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
*
y
->
broadcast
(
dim
)
*
x
->
inverse
();
}
};
#define HANDLE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceCompute<NDIM, RDIM>(context); \
ReduceFunctor<DeviceContext, T, NDIM, RDIM, Functor>( \
context.template device_context<DeviceContext>(), *input, output, \
dims, keep_dim); \
}
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
...
...
@@ -120,11 +35,15 @@ class ReduceKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
keep_dim
=
context
.
Attr
<
bool
>
(
"keep_dim"
);
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
x
=
EigenVector
<
T
>::
Flatten
(
*
input
);
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
auto
&
place
=
...
...
@@ -133,8 +52,8 @@ class ReduceKernel : public framework::OpKernel<T> {
Functor
functor
;
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
int
ndim
=
context
.
Input
<
Tensor
>
(
"X"
)
->
dims
().
size
();
int
rdim
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
)
.
size
();
int
ndim
=
input
->
dims
().
size
();
int
rdim
=
dims
.
size
();
// comments for accelerating compiling temporarily.
// HANDLE_DIM(6, 5);
// HANDLE_DIM(6, 4);
...
...
@@ -154,48 +73,6 @@ class ReduceKernel : public framework::OpKernel<T> {
HANDLE_DIM
(
1
,
1
);
}
}
private:
template
<
size_t
D
,
size_t
R_D
>
void
ReduceCompute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
*
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
dims
[
i
]
=
x_rank
+
dims
[
i
];
reduce_dim
[
i
]
=
dims
[
i
];
}
// construct the squeezed output tensor
bool
keep_dim
=
context
.
Attr
<
bool
>
(
"keep_dim"
);
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
dims_vector
[
dims
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
framework
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
};
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
...
...
@@ -203,12 +80,15 @@ class ReduceGradKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
if
(
reduce_all
)
{
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
*
input0
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
input1
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
input2
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
output
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
reduce_all
)
{
auto
x
=
EigenVector
<
T
>::
Flatten
(
*
input0
);
auto
x_reduce
=
EigenVector
<
T
>::
From
(
*
input1
);
auto
x_reduce_grad
=
EigenVector
<
T
>::
From
(
*
input2
);
...
...
@@ -221,74 +101,172 @@ class ReduceGradKernel : public framework::OpKernel<T> {
functor
(
place
,
&
x
,
&
x_reduce
,
&
x_grad
,
&
x_reduce_grad
,
broadcast_dim
,
broadcast_dim
[
0
]);
}
else
{
int
rank
=
context
.
Input
<
Tensor
>
(
"X"
)
->
dims
().
size
();
int
rank
=
input0
->
dims
().
size
();
switch
(
rank
)
{
case
1
:
ReduceGradCompute
<
1
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
1
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
case
2
:
ReduceGradCompute
<
2
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
2
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
case
3
:
ReduceGradCompute
<
3
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
3
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
case
4
:
ReduceGradCompute
<
4
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
4
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
case
5
:
ReduceGradCompute
<
5
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
5
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
case
6
:
ReduceGradCompute
<
6
>
(
context
);
ReduceGradFunctor
<
DeviceContext
,
T
,
6
,
Functor
>
(
context
.
template
device_context
<
DeviceContext
>(),
*
input0
,
*
input1
,
*
input2
,
output
,
dims
);
break
;
}
}
}
};
private:
template
<
size_t
D
>
void
ReduceGradCompute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
input0
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
input1
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
input2
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
output
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
class
ReduceOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
*
input0
);
auto
x_grad
=
EigenTensor
<
T
,
D
>::
From
(
*
output
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
x_dims
=
input0
->
dims
();
auto
reduced_dims_v
=
vectorize
(
x_dims
);
Eigen
::
array
<
int
,
D
>
broadcast_dim
;
for
(
size_t
i
=
0
;
i
<
D
;
++
i
)
broadcast_dim
[
i
]
=
1
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of ReduceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ReduceOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_rank
=
x_dims
.
size
();
PADDLE_ENFORCE_LE
(
x_rank
,
6
,
"Tensors with rank at most 6 are supported."
);
auto
dims
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dim"
);
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
dims
[
i
]
=
x_rank
+
dims
[
i
];
PADDLE_ENFORCE_LT
(
dims
[
i
],
x_rank
,
"The dim should be in the range [-rank(input), rank(input))."
);
}
sort
(
dims
.
begin
(),
dims
.
end
());
bool
reduce_all
=
ctx
->
Attrs
().
Get
<
bool
>
(
"reduce_all"
);
bool
keep_dim
=
ctx
->
Attrs
().
Get
<
bool
>
(
"keep_dim"
);
if
(
reduce_all
)
{
if
(
keep_dim
)
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
(
x_rank
,
1
)));
else
ctx
->
SetOutputDim
(
"Out"
,
{
1
});
}
else
{
auto
dims_vector
=
vectorize
(
x_dims
);
if
(
keep_dim
)
{
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
dims_vector
[
dims
[
i
]]
=
1
;
}
}
else
{
const
int
kDelFlag
=
-
2
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
dims_vector
[
dims
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
}
auto
out_dims
=
framework
::
make_ddim
(
dims_vector
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
if
(
dims
[
0
]
!=
0
)
{
// Only pass LoD when not reducing on the first dim.
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
}
}
};
int
broad_cats_times
=
1
;
class
ReduceGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_rank
=
x_dims
.
size
();
PADDLE_ENFORCE_LE
(
x_rank
,
6
,
"Tensors with rank at most 6 are supported."
);
auto
dims
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dim"
);
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
++
i
)
{
if
(
dims
[
i
]
<
0
)
dims
[
i
]
=
x_rank
+
dims
[
i
];
reduced_dims_v
[
dims
[
i
]]
=
1
;
broadcast_dim
[
dims
[
i
]]
=
x_dims
[
dims
[
i
]];
broad_cats_times
*=
x_dims
[
dims
[
i
]];
PADDLE_ENFORCE_LT
(
dims
[
i
],
x_rank
,
"The dim should be in the range [-rank(input), rank(input))."
);
}
sort
(
dims
.
begin
(),
dims
.
end
());
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
x_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
x_grad_name
);
}
}
auto
reduced_dims
=
framework
::
make_ddim
(
reduced_dims_v
);
auto
x_reduce
=
EigenTensor
<
T
,
D
>::
From
(
*
input1
,
reduced_dims
);
auto
x_reduce_grad
=
EigenTensor
<
T
,
D
>::
From
(
*
input2
,
reduced_dims
);
};
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
class
ReduceOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
final
{
AddInput
(
"X"
,
"(Tensor) The input tensor. Tensors with rank at most 6 are "
"supported."
);
AddOutput
(
"Out"
,
"(Tensor) The result tensor."
);
AddAttr
<
std
::
vector
<
int
>>
(
"dim"
,
"(list<int>, default {0}) The dimensions to reduce. "
"Must be in the range [-rank(input), rank(input)). "
"If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. "
"Note that reducing on the first dim will make the LoD info lost."
)
.
SetDefault
({
0
});
AddAttr
<
bool
>
(
"keep_dim"
,
"(bool, default false) "
"If true, retain the reduced dimension with length 1."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"reduce_all"
,
"(bool, default false) "
"If true, output a scalar reduced along all dimensions."
)
.
SetDefault
(
false
);
AddComment
(
string
::
Sprintf
(
R"DOC(
%s Operator.
Functor
functor
;
functor
(
place
,
&
x
,
&
x_reduce
,
&
x_grad
,
&
x_reduce_grad
,
broadcast_dim
,
broad_cats_times
);
This operator computes the %s of input tensor along the given dimension.
The result tensor has 1 fewer dimension than the input unless keep_dim is true.
If reduce_all is true, just reduce along all dimensions and output a scalar.
)DOC"
,
GetOpType
(),
GetName
()));
}
protected:
virtual
std
::
string
GetName
()
const
=
0
;
virtual
std
::
string
GetOpType
()
const
=
0
;
};
}
// namespace operators
}
// namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(reduce_sum, SumFunctor, SumGradFunctor); \
__macro(reduce_mean, MeanFunctor, MeanGradFunctor); \
__macro(reduce_max, MaxFunctor, MaxOrMinGradFunctor); \
__macro(reduce_min, MinFunctor, MaxOrMinGradFunctor); \
__macro(reduce_prod, ProdFunctor, ProdGradFunctor);
namespace
ops
=
paddle
::
operators
;
#define REGISTER_REDUCE_OP(op_name) \
class __##op_name##Maker__ : public ops::ReduceOpMaker { \
protected: \
virtual std::string GetName() const { return #op_name; } \
virtual std::string GetOpType() const { return "Reduce " #op_name; } \
}; \
REGISTER_OPERATOR(op_name, ops::ReduceOp, __##op_name##Maker__, \
paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(op_name##_grad, ops::ReduceGradOp)
paddle/fluid/operators/reduce_op_function.h
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
DDim
=
framework
::
DDim
;
template
<
typename
T
,
size_t
D
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenScalar
=
framework
::
EigenScalar
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
size_t
R_D
,
typename
Functor
>
void
ReduceFunctor
(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
dims
,
bool
keep_dim
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
std
::
vector
<
int
>
dims_ref
=
dims
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
reduce_dim
[
i
]
=
dims_ref
[
i
];
}
// construct the squeezed output tensor
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
framework
::
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
dims_vector
[
dims_ref
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
framework
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
typename
Functor
>
void
ReduceGradFunctor
(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input0
,
const
framework
::
Tensor
&
input1
,
const
framework
::
Tensor
&
input2
,
framework
::
Tensor
*
output
,
const
std
::
vector
<
int
>&
dims
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input0
);
auto
x_grad
=
EigenTensor
<
T
,
D
>::
From
(
*
output
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
x_dims
=
input0
.
dims
();
auto
reduced_dims_v
=
framework
::
vectorize
(
x_dims
);
std
::
vector
<
int
>
dims_ref
=
dims
;
Eigen
::
array
<
int
,
D
>
broadcast_dim
;
for
(
size_t
i
=
0
;
i
<
D
;
++
i
)
broadcast_dim
[
i
]
=
1
;
int
broad_cats_times
=
1
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
{
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
}
reduced_dims_v
[
dims_ref
[
i
]]
=
1
;
broadcast_dim
[
dims_ref
[
i
]]
=
x_dims
[
dims_ref
[
i
]];
broad_cats_times
*=
x_dims
[
dims_ref
[
i
]];
}
auto
reduced_dims
=
framework
::
make_ddim
(
reduced_dims_v
);
auto
x_reduce
=
EigenTensor
<
T
,
D
>::
From
(
input1
,
reduced_dims
);
auto
x_reduce_grad
=
EigenTensor
<
T
,
D
>::
From
(
input2
,
reduced_dims
);
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
functor
(
place
,
&
x
,
&
x_reduce
,
&
x_grad
,
&
x_reduce_grad
,
broadcast_dim
,
broad_cats_times
);
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reduce_prod_op.cc
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_prod_op.h"
REGISTER_REDUCE_OP
(
reduce_prod
);
REGISTER_OP_CPU_KERNEL
(
reduce_prod
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
ProdFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_prod_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
ProdGradFunctor
>
);
paddle/fluid/operators/reduce_prod_op.cu
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_prod_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_prod
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
ProdFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_prod_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
ProdGradFunctor
>
);
paddle/fluid/operators/reduce_prod_op.h
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_op.h"
namespace
paddle
{
namespace
operators
{
struct
ProdFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
prod
(
dim
);
}
};
struct
ProdGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
)
*
y
->
broadcast
(
dim
)
*
x
->
inverse
();
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reduce_sum_op.cc
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_sum_op.h"
REGISTER_REDUCE_OP
(
reduce_sum
);
REGISTER_OP_CPU_KERNEL
(
reduce_sum
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
SumFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
SumGradFunctor
>
);
paddle/fluid/operators/reduce_sum_op.cu
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_sum_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
SumFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
SumGradFunctor
>
);
paddle/fluid/operators/reduce_sum_op.h
0 → 100644
浏览文件 @
24649a78
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_op.h"
namespace
paddle
{
namespace
operators
{
struct
SumFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
sum
(
dim
);
}
};
struct
SumGradFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
DX
*
dx
,
DY
*
dy
,
const
Dim
&
dim
,
int
size
)
{
dx
->
device
(
place
)
=
dy
->
broadcast
(
dim
);
}
};
}
// namespace operators
}
// namespace paddle
python/paddle/fluid/executor.py
浏览文件 @
24649a78
...
...
@@ -170,6 +170,8 @@ def get_program_cache_key(feed, fetch_list):
return
var
.
desc
.
name
()
elif
isinstance
(
var
,
str
):
return
var
elif
isinstance
(
var
,
basestring
):
return
str
(
var
)
else
:
raise
TypeError
(
str
(
var
)
+
" should be Variable or str"
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
24649a78
...
...
@@ -72,6 +72,8 @@ def convert_np_dtype_to_dtype_(np_dtype):
return
core
.
VarDesc
.
VarType
.
INT64
elif
dtype
==
np
.
bool
:
return
core
.
VarDesc
.
VarType
.
BOOL
elif
dtype
==
np
.
uint16
:
return
core
.
VarDesc
.
VarType
.
INT16
elif
dtype
==
np
.
uint8
:
return
core
.
VarDesc
.
VarType
.
UINT8
else
:
...
...
@@ -368,6 +370,13 @@ class Operator(object):
Block. Users can use the build in instructions to describe their neural
network.
"""
OP_WITHOUT_KERNEL_SET
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'channel_recv'
,
'select'
}
def
__init__
(
self
,
block
,
...
...
@@ -504,17 +513,13 @@ class Operator(object):
else
:
self
.
desc
.
set_attr
(
attr_name
,
self
.
attrs
[
attr_name
])
self
.
desc
.
check_attrs
()
no_kernel_op_set
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'channel_recv'
,
'select'
,
'gen_nccl_id'
}
if
type
not
in
no_kernel_op_set
:
if
self
.
has_kernel
(
type
):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
def
has_kernel
(
self
,
op_type
):
return
op_type
not
in
self
.
OP_WITHOUT_KERNEL_SET
def
to_string
(
self
,
throw_on_error
):
"""
To debug string.
...
...
@@ -742,7 +747,9 @@ class Block(object):
def
var
(
self
,
name
):
if
not
isinstance
(
name
,
basestring
):
raise
TypeError
()
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
(
type
(
name
)))
v
=
self
.
vars
.
get
(
name
,
None
)
if
v
is
None
:
raise
ValueError
(
"var %s not in this block"
%
name
)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
24649a78
...
...
@@ -434,7 +434,7 @@ def open_files(filenames,
shapes
,
lod_levels
,
dtypes
,
thread_num
,
thread_num
=
1
,
buffer_size
=
None
,
pass_num
=
1
,
for_parallel
=
True
):
...
...
python/paddle/fluid/tests/unittests/benchmark.py
0 → 100644
浏览文件 @
24649a78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
unittest
import
time
import
itertools
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
op_test
import
OpTest
class
BenchmarkSuite
(
OpTest
):
def
timeit_function
(
self
,
callback
,
iters
,
*
args
,
**
kwargs
):
assert
iters
!=
0
,
"Iters should >= 1"
start
=
time
.
time
()
for
i
in
range
(
iters
):
callback
(
*
args
,
**
kwargs
)
elapse
=
time
.
time
()
-
start
return
elapse
/
iters
def
_assert_cpu_gpu_same
(
self
,
cpu_outs
,
gpu_outs
,
fetch_list
,
atol
):
for
item_cpu_out
,
item_gpu_out
,
variable
in
zip
(
cpu_outs
,
gpu_outs
,
fetch_list
):
# the cpu version is baseline, expect gpu version keep same with cpu version.
expect
=
item_cpu_out
expect_t
=
np
.
array
(
item_cpu_out
)
actual
=
item_gpu_out
actual_t
=
np
.
array
(
item_gpu_out
)
var_name
=
variable
if
isinstance
(
variable
,
basestring
)
else
variable
.
name
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
"Output ("
+
var_name
+
") has diff"
+
str
(
actual_t
)
+
"
\n
"
+
str
(
expect_t
))
self
.
assertListEqual
(
actual
.
lod
(),
expect
.
lod
(),
"Output ("
+
var_name
+
") has different lod"
)
def
_get_input_names
(
self
):
inputs
=
[]
for
name
,
value
in
self
.
inputs
.
iteritems
():
if
isinstance
(
value
,
list
):
inputs
.
extend
([
sub_name
for
sub_name
,
_
in
value
])
inputs
.
append
(
name
)
return
inputs
def
_get_output_names
(
self
):
outputs
=
[]
for
var_name
,
var
in
self
.
outputs
.
iteritems
():
if
isinstance
(
var
,
list
):
for
sub_var_name
,
sub_var
in
var
:
outputs
.
append
(
sub_var_name
)
else
:
outputs
.
append
(
var_name
)
if
len
(
outputs
)
==
0
:
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
outputs
.
append
(
str
(
out_name
))
return
outputs
def
check_output_stability
(
self
,
atol
=
1e-8
):
places
=
self
.
_get_places
()
if
len
(
places
)
<
2
:
return
cpu_outs
,
fetch_list
=
self
.
_calc_output
(
places
[
0
])
gpu_outs
,
_
=
self
.
_calc_output
(
places
[
1
])
self
.
_assert_cpu_gpu_same
(
cpu_outs
,
gpu_outs
,
fetch_list
,
atol
)
def
timeit_output_with_place
(
self
,
place
,
iters
):
return
self
.
timeit_function
(
self
.
calc_output
,
iters
,
place
)
def
timeit_output
(
self
,
iters
=
100
):
places
=
self
.
_get_places
()
elapses
=
[]
for
place
in
places
:
elapses
.
append
(
self
.
timeit_output_with_place
(
place
,
iters
))
for
place
,
elapse
in
zip
(
places
,
elapses
):
print
(
"One pass of ({2}_op) at {0} cost {1}"
.
format
(
str
(
place
),
elapse
,
self
.
op_type
))
def
timeit_grad_with_place
(
self
,
place
,
iters
=
100
):
inputs_to_check
=
self
.
_get_input_names
()
output_names
=
self
.
_get_output_names
()
return
self
.
timeit_function
(
self
.
_get_gradient
,
iters
,
inputs_to_check
,
place
,
output_names
,
no_grad_set
=
None
)
def
timeit_grad
(
self
,
iters
=
100
):
places
=
self
.
_get_places
()
elapses
=
[]
for
place
in
places
:
elapses
.
append
(
self
.
timeit_grad_with_place
(
place
,
iters
))
for
place
,
elapse
in
zip
(
places
,
elapses
):
print
(
"One pass of ({2}_grad_op) at {0} cost {1}"
.
format
(
str
(
place
),
elapse
,
self
.
op_type
))
python/paddle/fluid/tests/unittests/benchmark_sum_op.py
0 → 100644
浏览文件 @
24649a78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
benchmark
import
BenchmarkSuite
from
op_test
import
OpTest
# This is a demo op test case for operator benchmarking and high resolution number stability alignment.
class
TestSumOp
(
BenchmarkSuite
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
customize_testcase
()
self
.
customize_fetch_list
()
def
customize_fetch_list
(
self
):
"""
customize fetch list, configure the wanted variables.
>>> self.fetch_list = ["Out"]
"""
self
.
fetch_list
=
[
"Out"
]
# pass
def
customize_testcase
(
self
):
# a test case
x0
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
300
,
400
)).
astype
(
'float32'
)
# NOTE: if the output is empty, then it will autofilled by benchmarkSuite.
# only the output dtype is used, the shape, lod and data is computed from input.
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
self
.
outputs
=
{
"Out"
:
x0
+
x1
+
x2
}
def
test_check_output
(
self
):
"""
compare the output with customized output. In this case,
you should set the correct output by hands.
>>> self.outputs = {"Out": x0 + x1 + x2}
"""
self
.
check_output
(
atol
=
1e-8
)
def
test_output_stability
(
self
):
# compare the cpu gpu output in high resolution.
self
.
check_output_stability
()
def
test_timeit_output
(
self
):
"""
perf the op, time cost will be averged in iters.
output example
>>> One pass of (sum_op) at CPUPlace cost 0.000461330413818
>>> One pass of (sum_op) at CUDAPlace(0) cost 0.000556070804596
"""
self
.
timeit_output
(
iters
=
100
)
def
test_timeit_grad
(
self
):
"""
perf the op gradient, time cost will be averged in iters.
output example
>>> One pass of (sum_grad_op) at CPUPlace cost 0.00279935121536
>>> One pass of (sum_grad_op) at CUDAPlace(0) cost 0.00500632047653
"""
self
.
timeit_grad
(
iters
=
100
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
24649a78
...
...
@@ -15,13 +15,17 @@
import
unittest
import
numpy
as
np
import
random
import
time
import
itertools
import
paddle.fluid.core
as
core
import
collections
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.op
import
Operator
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
Program
,
OpProtoHolder
from
paddle.fluid.framework
import
Program
,
OpProtoHolder
,
Variable
from
testsuite
import
create_op
,
set_input
,
append_input_output
,
append_loss_ops
def
randomize_probability
(
batch_size
,
class_num
,
dtype
=
'float32'
):
...
...
@@ -33,73 +37,6 @@ def randomize_probability(batch_size, class_num, dtype='float32'):
return
prob
def
create_op
(
scope
,
op_type
,
inputs
,
outputs
,
attrs
):
kwargs
=
dict
()
op_maker
=
core
.
op_proto_and_checker_maker
op_role_attr_name
=
op_maker
.
kOpRoleAttrName
()
if
op_role_attr_name
not
in
attrs
:
attrs
[
op_role_attr_name
]
=
int
(
op_maker
.
OpRole
.
Forward
)
def
__create_var__
(
name
,
var_name
):
scope
.
var
(
var_name
).
get_tensor
()
kwargs
[
name
].
append
(
var_name
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op_type
):
if
in_name
in
inputs
:
kwargs
[
in_name
]
=
[]
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
in_name
,
sub_in_name
)
else
:
__create_var__
(
in_name
,
in_name
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
op_type
):
if
out_name
in
outputs
:
kwargs
[
out_name
]
=
[]
if
out_dup
:
sub_out
=
outputs
[
out_name
]
for
item
in
sub_out
:
sub_out_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
out_name
,
sub_out_name
)
else
:
__create_var__
(
out_name
,
out_name
)
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
def
set_input
(
scope
,
op
,
inputs
,
place
):
def
__set_input__
(
var_name
,
var
):
if
isinstance
(
var
,
tuple
)
or
isinstance
(
var
,
np
.
ndarray
):
tensor
=
scope
.
find_var
(
var_name
).
get_tensor
()
if
isinstance
(
var
,
tuple
):
tensor
.
set_lod
(
var
[
1
])
var
=
var
[
0
]
tensor
.
set_dims
(
var
.
shape
)
tensor
.
set
(
var
,
place
)
elif
isinstance
(
var
,
float
):
scope
.
find_var
(
var_name
).
set_float
(
var
)
elif
isinstance
(
var
,
int
):
scope
.
find_var
(
var_name
).
set_int
(
var
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op
.
type
()):
if
in_name
in
inputs
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
sub_in_val
=
item
[
0
],
item
[
1
]
__set_input__
(
sub_in_name
,
sub_in_val
)
else
:
__set_input__
(
in_name
,
inputs
[
in_name
])
def
get_numeric_gradient
(
place
,
scope
,
op
,
...
...
@@ -173,54 +110,15 @@ def get_numeric_gradient(place,
return
gradient_flat
.
reshape
(
tensor_to_check
.
get_dims
())
def
append_input_output
(
block
,
op_proto
,
np_list
,
is_input
):
'''Insert VarDesc and generate Python variable instance'''
proto_list
=
op_proto
.
inputs
if
is_input
else
op_proto
.
outputs
def
create_var
(
block
,
name
,
np_list
,
var_proto
):
if
name
not
in
np_list
:
assert
var_proto
.
intermediate
,
"{} not found"
.
format
(
name
)
shape
=
None
lod_level
=
None
else
:
np_value
=
np_list
[
name
]
if
isinstance
(
np_value
,
tuple
):
shape
=
list
(
np_value
[
0
].
shape
)
lod_level
=
len
(
np_value
[
1
])
else
:
shape
=
list
(
np_value
.
shape
)
lod_level
=
0
return
block
.
create_var
(
dtype
=
"float32"
,
shape
=
shape
,
lod_level
=
lod_level
,
name
=
name
)
var_dict
=
{}
for
var_proto
in
proto_list
:
var_name
=
str
(
var_proto
.
name
)
if
is_input
:
if
(
var_name
not
in
np_list
)
and
var_proto
.
dispensable
:
continue
assert
(
var_name
in
np_list
)
or
(
var_proto
.
dispensable
),
\
"Missing {} as input"
.
format
(
var_name
)
if
var_proto
.
duplicable
:
assert
isinstance
(
np_list
[
var_name
],
list
),
\
"Duplicable {} should be set as list"
.
format
(
var_name
)
var_list
=
[]
for
(
name
,
np_value
)
in
np_list
[
var_name
]:
var_list
.
append
(
create_var
(
block
,
name
,
{
name
:
np_value
},
var_proto
))
var_dict
[
var_name
]
=
var_list
else
:
var_dict
[
var_name
]
=
create_var
(
block
,
var_name
,
np_list
,
var_proto
)
return
var_dict
class
OpTest
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
'''Fix random seeds to remove randomness from tests'''
cls
.
_np_rand_state
=
np
.
random
.
get_state
()
cls
.
_py_rand_state
=
random
.
getstate
()
cls
.
call_once
=
False
cls
.
dtype
=
"float32"
cls
.
outputs
=
{}
np
.
random
.
seed
(
123
)
random
.
seed
(
124
)
...
...
@@ -231,6 +129,31 @@ class OpTest(unittest.TestCase):
np
.
random
.
set_state
(
cls
.
_np_rand_state
)
random
.
setstate
(
cls
.
_py_rand_state
)
def
try_call_once
(
self
,
data_type
):
if
not
self
.
call_once
:
self
.
call_once
=
True
self
.
dtype
=
data_type
def
infer_dtype_from_inputs_outputs
(
self
,
inputs
,
outputs
):
def
infer_dtype
(
numpy_dict
):
assert
isinstance
(
numpy_dict
,
dict
),
"self.inputs, self.outputs must be numpy_dict"
for
var_name
,
var_value
in
numpy_dict
.
iteritems
():
if
isinstance
(
var_value
,
(
np
.
ndarray
,
np
.
generic
)):
self
.
try_call_once
(
var_value
.
dtype
)
elif
isinstance
(
var_value
,
(
list
,
tuple
)):
# the case of self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
if
len
(
var_value
)
>
1
and
isinstance
(
var_value
[
1
],
(
np
.
ndarray
,
np
.
generic
)):
instance
=
var_value
[
1
]
self
.
try_call_once
(
instance
[
1
].
dtype
)
else
:
self
.
try_call_once
(
"float32"
)
infer_dtype
(
inputs
)
infer_dtype
(
outputs
)
def
feed_var
(
self
,
input_vars
,
place
):
feed_map
=
{}
for
var_name
in
input_vars
:
...
...
@@ -254,18 +177,14 @@ class OpTest(unittest.TestCase):
return
feed_map
def
calc_output
(
self
,
place
):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
):
def
_append_ops
(
self
,
block
):
op_proto
=
OpProtoHolder
.
instance
().
get_op_proto
(
self
.
op_type
)
program
=
Program
(
)
block
=
program
.
global_block
()
inputs
=
append_input_output
(
block
,
op_proto
,
self
.
inputs
,
True
)
outputs
=
append_input_output
(
block
,
op_proto
,
self
.
outputs
,
Fals
e
)
"infer datatype from inputs and outputs for this test case"
self
.
infer_dtype_from_inputs_outputs
(
self
.
inputs
,
self
.
outputs
)
inputs
=
append_input_output
(
block
,
op_proto
,
self
.
inputs
,
True
,
self
.
dtype
)
outputs
=
append_input_output
(
block
,
op_proto
,
self
.
outputs
,
False
,
self
.
dtyp
e
)
op
=
block
.
append_op
(
type
=
self
.
op_type
,
inputs
=
inputs
,
...
...
@@ -275,19 +194,65 @@ class OpTest(unittest.TestCase):
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
fetch_list
=
[]
def
_get_io_vars
(
self
,
block
,
numpy_inputs
):
inputs
=
{}
for
name
,
value
in
numpy_inputs
.
iteritems
():
if
isinstance
(
value
,
list
):
var_list
=
[
block
.
var
(
sub_name
)
for
sub_name
,
sub_value
in
value
]
inputs
[
name
]
=
var_list
else
:
inputs
[
name
]
=
block
.
var
(
name
)
return
inputs
def
_get_inputs
(
self
,
block
):
return
self
.
_get_io_vars
(
block
,
self
.
inputs
)
def
_get_outputs
(
self
,
block
):
return
self
.
_get_io_vars
(
block
,
self
.
outputs
)
def
calc_output
(
self
,
place
):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
,
parallel
=
False
):
program
=
Program
()
block
=
program
.
global_block
()
self
.
_append_ops
(
block
)
inputs
=
self
.
_get_inputs
(
block
)
outputs
=
self
.
_get_outputs
(
block
)
feed_map
=
self
.
feed_var
(
inputs
,
place
)
if
parallel
:
use_cuda
=
False
if
isinstance
(
place
,
fluid
.
CUDAPlace
(
0
)):
use_cuda
=
True
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
else
:
executor
=
Executor
(
place
)
fetch_list
=
getattr
(
self
,
"fetch_list"
,
[])
# if the fetch_list is customized by user, we use it directly.
# if not, fill the fetch_list by the user configured outputs in test.
if
len
(
fetch_list
)
==
0
:
for
var_name
,
var
in
outputs
.
iteritems
():
if
var_name
in
self
.
outputs
:
if
isinstance
(
var
,
list
):
for
v
in
var
:
fetch_list
.
append
(
v
)
else
:
fetch_list
.
append
(
var
)
feed_map
=
self
.
feed_var
(
inputs
,
place
)
exe
=
Executor
(
place
)
outs
=
exe
.
run
(
program
,
# if the fetch_list still empty, fill the fetch_list by the operator output.
if
len
(
fetch_list
)
==
0
:
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
fetch_list
.
append
(
str
(
out_name
))
# fetch_list = map(block.var, fetch_list)
if
not
isinstance
(
fetch_list
[
0
],
Variable
):
fetch_list
=
map
(
block
.
var
,
fetch_list
)
outs
=
executor
.
run
(
program
,
feed
=
feed_map
,
fetch_list
=
fetch_list
,
return_numpy
=
False
)
...
...
@@ -346,17 +311,19 @@ class OpTest(unittest.TestCase):
"Output ("
+
out_name
+
") has different lod at "
+
str
(
place
))
def
check_output
(
self
,
atol
=
1e-5
):
places
=
[
core
.
CPUPlace
()]
def
_get_places
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
)
def
check_output_customized
(
self
,
checker
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
=
self
.
_get_places
()
for
place
in
places
:
outs
=
self
.
calc_output
(
place
)
outs
=
[
np
.
array
(
out
)
for
out
in
outs
]
...
...
@@ -389,9 +356,7 @@ class OpTest(unittest.TestCase):
in_place
=
False
,
max_relative_error
=
0.005
,
user_defined_grads
=
None
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_grad_with_place
(
place
,
inputs_to_check
,
output_names
,
no_grad_set
,
numeric_grad_delta
,
...
...
@@ -438,35 +403,6 @@ class OpTest(unittest.TestCase):
max_relative_error
,
"Gradient Check On %s"
%
str
(
place
))
@
staticmethod
def
_create_var_descs_
(
block
,
var_dict
):
# FIXME: Try unify with `append_input_output`
for
param_name
in
var_dict
:
var
=
var_dict
[
param_name
]
if
not
isinstance
(
var
,
list
)
and
not
isinstance
(
var
,
tuple
):
var
=
[(
param_name
,
var
,
None
)]
if
not
isinstance
(
var
[
0
],
list
)
and
not
isinstance
(
var
[
0
],
tuple
):
var
=
[(
param_name
,
var
[
0
],
var
[
1
])]
for
i
,
item
in
enumerate
(
var
):
if
not
isinstance
(
item
[
0
],
basestring
):
item
=
[[
param_name
]
+
list
(
item
)]
if
len
(
item
)
==
2
:
if
isinstance
(
item
[
1
],
tuple
):
var
[
i
]
=
[
item
[
0
],
item
[
1
][
0
],
item
[
1
][
1
]]
else
:
# only set var name and value, set lod to None
var
[
i
]
=
list
(
item
)
+
[
None
]
var_descs
=
[(
block
.
create_var
(
name
=
name
,
shape
=
each
.
shape
,
dtype
=
each
.
dtype
),
each
,
lod
)
for
name
,
each
,
lod
in
var
]
yield
param_name
,
var_descs
@
staticmethod
def
_merge_list
(
iterable
):
return
reduce
(
lambda
a
,
b
:
list
(
a
)
+
list
(
b
),
iterable
,
[])
@
staticmethod
def
_numpy_to_lod_tensor
(
np_value
,
lod
,
place
):
tensor
=
core
.
LoDTensor
()
...
...
@@ -497,82 +433,30 @@ class OpTest(unittest.TestCase):
input
.
dtype
=
np
.
uint16
return
input
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
):
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
,
parallel
=
False
):
prog
=
Program
()
block
=
prog
.
global_block
()
inputs_with_np
=
{
key
:
value
for
(
key
,
value
)
in
OpTest
.
_create_var_descs_
(
block
,
getattr
(
self
,
'inputs'
,
{}))
}
outputs_with_np
=
{
key
:
val
for
(
key
,
val
)
in
OpTest
.
_create_var_descs_
(
block
,
getattr
(
self
,
'outputs'
,
{}))
}
inputs
=
{
k
:
[
item
[
0
]
for
item
in
inputs_with_np
[
k
]]
for
k
in
inputs_with_np
}
outputs
=
{
k
:
[
item
[
0
]
for
item
in
outputs_with_np
[
k
]]
for
k
in
outputs_with_np
}
op
=
block
.
append_op
(
type
=
self
.
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
getattr
(
self
,
'attrs'
,
{}))
# infer variable type and infer shape in compile-time
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
mean_inputs
=
map
(
block
.
var
,
output_names
)
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
mean_inputs
},
outputs
=
{
"Out"
:
loss
},
type
=
'mean'
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
else
:
avg_sum
=
[]
for
cur_loss
in
mean_inputs
:
cur_avg_loss
=
block
.
create_var
(
dtype
=
cur_loss
.
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
[
cur_loss
]},
outputs
=
{
"Out"
:
[
cur_avg_loss
]},
type
=
"mean"
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
avg_sum
.
append
(
cur_avg_loss
)
loss_sum
=
block
.
create_var
(
dtype
=
avg_sum
[
0
].
dtype
,
shape
=
[
1
])
op_sum
=
block
.
append_op
(
inputs
=
{
"X"
:
avg_sum
},
outputs
=
{
"Out"
:
loss_sum
},
type
=
'sum'
)
op_sum
.
desc
.
infer_var_type
(
block
.
desc
)
op_sum
.
desc
.
infer_shape
(
block
.
desc
)
loss
=
block
.
create_var
(
dtype
=
loss_sum
.
dtype
,
shape
=
[
1
])
op_loss
=
block
.
append_op
(
inputs
=
{
"X"
:
loss_sum
},
outputs
=
{
"Out"
:
loss
},
type
=
'scale'
,
attrs
=
{
'scale'
:
1.0
/
float
(
len
(
avg_sum
))})
op_loss
.
desc
.
infer_var_type
(
block
.
desc
)
op_loss
.
desc
.
infer_shape
(
block
.
desc
)
self
.
_append_ops
(
block
)
loss
=
append_loss_ops
(
block
,
output_names
)
param_grad_list
=
append_backward
(
loss
=
loss
,
parameter_list
=
input_to_check
,
no_grad_set
=
no_grad_set
)
feed_dict
=
{
item
[
0
].
name
:
OpTest
.
_numpy_to_lod_tensor
(
item
[
1
],
item
[
2
],
place
)
for
p_name
in
inputs_with_np
for
item
in
inputs_with_np
[
p_name
]
}
inputs
=
self
.
_get_inputs
(
block
)
feed_dict
=
self
.
feed_var
(
inputs
,
place
)
fetch_list
=
[
g
for
p
,
g
in
param_grad_list
]
if
parallel
:
use_cuda
=
False
if
isinstance
(
place
,
fluid
.
CUDAPlace
(
0
)):
use_cuda
=
True
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
else
:
executor
=
Executor
(
place
)
return
map
(
np
.
array
,
executor
.
run
(
prog
,
feed_dict
,
fetch_list
,
...
...
python/paddle/fluid/tests/unittests/test_lstm_op.py
浏览文件 @
24649a78
...
...
@@ -194,107 +194,104 @@ class TestLstmOp(OpTest):
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'H0'
))
def
test_check_grad_ingore_c0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'C0'
))
class
TestLstmOpRerverse
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
class
TestLstmOpNotUsePeepholes
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
# class TestLstmOpHasInitial(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = True
# self.is_reverse = True
# self.use_peepholes = True
# def test_check_grad(self):
# # TODO(qingqing) remove folowing lines after the check_grad is refined.
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'H0', 'C0'], ['Hidden'],
# max_relative_error=5e-4)
# def test_check_grad_ingore_bias(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Bias'))
# def test_check_grad_ingore_weight(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Bias'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Weight'))
# def test_check_grad_ingore_input(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Weight', 'Bias'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('Input'))
# def test_check_grad_ingore_h0(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'C0'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('H0'))
# def test_check_grad_ingore_c0(self):
# N = len(self.lod[0]) - 1
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
# self.check_grad(
# ['Input', 'Weight', 'Bias', 'H0'], ['Hidden'],
# max_relative_error=5e-4,
# no_grad_set=set('C0'))
# class TestLstmOpRerverse(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = False
# self.is_reverse = True
# self.use_peepholes = True
# class TestLstmOpNotUsePeepholes(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.D = 16
# self.act_gate = 'sigmoid'
# self.act_cell = 'tanh'
# self.act_cand = 'tanh'
# self.has_initial_state = False
# self.is_reverse = True
# self.use_peepholes = False
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/testsuite.py
0 → 100644
浏览文件 @
24649a78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
def
as_lodtensor
(
np_array
,
lod
,
place
):
tensor
=
core
.
LoDTensor
()
tensor
.
set
(
np_value
,
place
)
if
lod
is
not
None
:
tensor
.
set_lod
(
lod
)
return
tensor
def
create_op
(
scope
,
op_type
,
inputs
,
outputs
,
attrs
):
kwargs
=
dict
()
op_maker
=
core
.
op_proto_and_checker_maker
op_role_attr_name
=
op_maker
.
kOpRoleAttrName
()
if
op_role_attr_name
not
in
attrs
:
attrs
[
op_role_attr_name
]
=
int
(
op_maker
.
OpRole
.
Forward
)
def
__create_var__
(
name
,
var_name
):
scope
.
var
(
var_name
).
get_tensor
()
kwargs
[
name
].
append
(
var_name
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op_type
):
if
in_name
in
inputs
:
kwargs
[
in_name
]
=
[]
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
in_name
,
sub_in_name
)
else
:
__create_var__
(
in_name
,
in_name
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
op_type
):
if
out_name
in
outputs
:
kwargs
[
out_name
]
=
[]
if
out_dup
:
sub_out
=
outputs
[
out_name
]
for
item
in
sub_out
:
sub_out_name
,
_
=
item
[
0
],
item
[
1
]
__create_var__
(
out_name
,
sub_out_name
)
else
:
__create_var__
(
out_name
,
out_name
)
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
def
set_input
(
scope
,
op
,
inputs
,
place
):
def
__set_input__
(
var_name
,
var
):
if
isinstance
(
var
,
tuple
)
or
isinstance
(
var
,
np
.
ndarray
):
tensor
=
scope
.
find_var
(
var_name
).
get_tensor
()
if
isinstance
(
var
,
tuple
):
tensor
.
set_lod
(
var
[
1
])
var
=
var
[
0
]
tensor
.
set_dims
(
var
.
shape
)
tensor
.
set
(
var
,
place
)
elif
isinstance
(
var
,
float
):
scope
.
find_var
(
var_name
).
set_float
(
var
)
elif
isinstance
(
var
,
int
):
scope
.
find_var
(
var_name
).
set_int
(
var
)
for
in_name
,
in_dup
in
Operator
.
get_op_inputs
(
op
.
type
()):
if
in_name
in
inputs
:
if
in_dup
:
sub_in
=
inputs
[
in_name
]
for
item
in
sub_in
:
sub_in_name
,
sub_in_val
=
item
[
0
],
item
[
1
]
__set_input__
(
sub_in_name
,
sub_in_val
)
else
:
__set_input__
(
in_name
,
inputs
[
in_name
])
def
append_input_output
(
block
,
op_proto
,
np_list
,
is_input
,
dtype
):
'''Insert VarDesc and generate Python variable instance'''
proto_list
=
op_proto
.
inputs
if
is_input
else
op_proto
.
outputs
def
create_var
(
block
,
name
,
np_list
,
var_proto
):
dtype
=
None
shape
=
None
lod_level
=
None
if
name
not
in
np_list
:
assert
var_proto
.
intermediate
,
"{} not found"
.
format
(
name
)
else
:
np_value
=
np_list
[
name
]
if
isinstance
(
np_value
,
tuple
):
dtype
=
np_value
[
0
].
dtype
# output shape, lod should be infered from input.
if
is_input
:
shape
=
list
(
np_value
[
0
].
shape
)
lod_level
=
len
(
np_value
[
1
])
else
:
dtype
=
np_value
.
dtype
if
is_input
:
shape
=
list
(
np_value
.
shape
)
lod_level
=
0
return
block
.
create_var
(
dtype
=
dtype
,
shape
=
shape
,
lod_level
=
lod_level
,
name
=
name
)
var_dict
=
{}
for
var_proto
in
proto_list
:
var_name
=
str
(
var_proto
.
name
)
if
is_input
:
if
(
var_name
not
in
np_list
)
and
var_proto
.
dispensable
:
continue
assert
(
var_name
in
np_list
)
or
(
var_proto
.
dispensable
),
\
"Missing {} as input"
.
format
(
var_name
)
if
var_proto
.
duplicable
:
assert
isinstance
(
np_list
[
var_name
],
list
),
\
"Duplicable {} should be set as list"
.
format
(
var_name
)
var_list
=
[]
for
(
name
,
np_value
)
in
np_list
[
var_name
]:
var_list
.
append
(
create_var
(
block
,
name
,
{
name
:
np_value
},
var_proto
))
var_dict
[
var_name
]
=
var_list
else
:
var_dict
[
var_name
]
=
create_var
(
block
,
var_name
,
np_list
,
var_proto
)
return
var_dict
def
append_loss_ops
(
block
,
output_names
):
mean_inputs
=
map
(
block
.
var
,
output_names
)
# for item in mean_inputs:
# print(item)
# print("Item", item.dtype)
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
mean_inputs
},
outputs
=
{
"Out"
:
loss
},
type
=
'mean'
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
else
:
avg_sum
=
[]
for
cur_loss
in
mean_inputs
:
cur_avg_loss
=
block
.
create_var
(
dtype
=
cur_loss
.
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
inputs
=
{
"X"
:
[
cur_loss
]},
outputs
=
{
"Out"
:
[
cur_avg_loss
]},
type
=
"mean"
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
avg_sum
.
append
(
cur_avg_loss
)
loss_sum
=
block
.
create_var
(
dtype
=
avg_sum
[
0
].
dtype
,
shape
=
[
1
])
op_sum
=
block
.
append_op
(
inputs
=
{
"X"
:
avg_sum
},
outputs
=
{
"Out"
:
loss_sum
},
type
=
'sum'
)
op_sum
.
desc
.
infer_var_type
(
block
.
desc
)
op_sum
.
desc
.
infer_shape
(
block
.
desc
)
loss
=
block
.
create_var
(
dtype
=
loss_sum
.
dtype
,
shape
=
[
1
])
op_loss
=
block
.
append_op
(
inputs
=
{
"X"
:
loss_sum
},
outputs
=
{
"Out"
:
loss
},
type
=
'scale'
,
attrs
=
{
'scale'
:
1.0
/
float
(
len
(
avg_sum
))})
op_loss
.
desc
.
infer_var_type
(
block
.
desc
)
op_loss
.
desc
.
infer_shape
(
block
.
desc
)
return
loss
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录