Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
cdd55dbc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cdd55dbc
编写于
6月 13, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add-merge-splited-ids
上级
d6c8d267
14e83376
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
305 addition
and
269 deletion
+305
-269
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+4
-3
benchmark/fluid/models/resnet.py
benchmark/fluid/models/resnet.py
+6
-3
doc/fluid/api/detection.rst
doc/fluid/api/detection.rst
+0
-0
doc/fluid/api/gen_doc.sh
doc/fluid/api/gen_doc.sh
+1
-1
doc/fluid/api/io.rst
doc/fluid/api/io.rst
+0
-18
doc/fluid/api/layers.rst
doc/fluid/api/layers.rst
+51
-49
doc/fluid/api/optimizer.rst
doc/fluid/api/optimizer.rst
+0
-7
doc/fluid/api/profiler.rst
doc/fluid/api/profiler.rst
+0
-12
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/reader.h
paddle/fluid/framework/reader.h
+5
-4
paddle/fluid/operators/batch_norm_mkldnn_op.cc
paddle/fluid/operators/batch_norm_mkldnn_op.cc
+180
-146
paddle/fluid/operators/batch_norm_op.cc
paddle/fluid/operators/batch_norm_op.cc
+13
-11
paddle/fluid/operators/reader/create_batch_reader_op.cc
paddle/fluid/operators/reader/create_batch_reader_op.cc
+1
-1
paddle/fluid/operators/reader/create_custom_reader_op.cc
paddle/fluid/operators/reader/create_custom_reader_op.cc
+2
-1
paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
.../fluid/operators/reader/create_double_buffer_reader_op.cc
+2
-1
paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
+1
-1
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
+2
-1
paddle/fluid/operators/reader/create_threaded_reader_op.cc
paddle/fluid/operators/reader/create_threaded_reader_op.cc
+2
-1
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+9
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+1
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+24
-6
未找到文件。
benchmark/fluid/fluid_benchmark.py
浏览文件 @
cdd55dbc
...
@@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
...
@@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
print
(
"Pass: %d, Loss: %f"
%
(
pass_id
,
np
.
mean
(
train_losses
))),
print
(
"Pass: %d, Loss: %f"
%
(
pass_id
,
np
.
mean
(
train_losses
))),
# evaluation
# evaluation
if
not
args
.
no_test
and
batch_acc
:
if
not
args
.
no_test
and
batch_acc
and
not
args
.
use_reader_op
:
pass_test_acc
=
test
(
exe
,
infer_prog
,
test_reader
,
feeder
,
pass_test_acc
=
test
(
exe
,
infer_prog
,
test_reader
,
feeder
,
batch_acc
)
batch_acc
)
print
(
", Test Accuracy: %f"
%
pass_test_acc
)
print
(
", Test Accuracy: %f"
%
pass_test_acc
)
...
@@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
...
@@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
batch_id
+=
1
batch_id
+=
1
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
if
not
args
.
no_test
and
batch_acc
:
if
not
args
.
no_test
and
batch_acc
and
not
args
.
use_reader_op
:
# we have not implement record io for test
# skip test when use args.use_reader_op
test_acc
=
test
(
startup_exe
,
infer_prog
,
test_reader
,
feeder
,
test_acc
=
test
(
startup_exe
,
infer_prog
,
test_reader
,
feeder
,
batch_acc
)
batch_acc
)
print
(
"Pass: %d, Test Accuracy: %f
\n
"
%
(
pass_id
,
test_acc
))
print
(
"Pass: %d, Test Accuracy: %f
\n
"
%
(
pass_id
,
test_acc
))
exit
(
0
)
def
print_arguments
(
args
):
def
print_arguments
(
args
):
...
...
benchmark/fluid/models/resnet.py
浏览文件 @
cdd55dbc
...
@@ -199,7 +199,10 @@ def get_model(args):
...
@@ -199,7 +199,10 @@ def get_model(args):
batched_train_reader
=
paddle
.
batch
(
batched_train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
train_reader
,
buf_size
=
5120
),
train_reader
,
buf_size
=
5120
),
batch_size
=
args
.
batch_size
*
args
.
gpus
)
batch_size
=
args
.
batch_size
*
args
.
gpus
,
batched_test_reader
=
paddle
.
batch
(
train_reader
,
batch_size
=
args
.
batch_size
)
drop_last
=
True
)
batched_test_reader
=
paddle
.
batch
(
train_reader
,
batch_size
=
args
.
batch_size
,
drop_last
=
True
)
return
avg_cost
,
inference_program
,
optimizer
,
batched_train_reader
,
batched_test_reader
,
batch_acc
return
avg_cost
,
inference_program
,
optimizer
,
batched_train_reader
,
\
batched_test_reader
,
batch_acc
doc/fluid/api/detection.rst
0 → 100644
浏览文件 @
cdd55dbc
doc/fluid/api/gen_doc.sh
浏览文件 @
cdd55dbc
#!/bin/bash
#!/bin/bash
python gen_doc.py layers
--submodules
control_flow device io nn ops tensor
>
layers.rst
python gen_doc.py layers
--submodules
control_flow device io nn ops tensor
detection
>
layers.rst
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do
do
...
...
doc/fluid/api/io.rst
浏览文件 @
cdd55dbc
...
@@ -59,21 +59,3 @@ get_inference_program
...
@@ -59,21 +59,3 @@ get_inference_program
.. autofunction:: paddle.fluid.io.get_inference_program
.. autofunction:: paddle.fluid.io.get_inference_program
:noindex:
:noindex:
save_checkpoint
---------------
.. autofunction:: paddle.fluid.io.save_checkpoint
:noindex:
load_checkpoint
---------------
.. autofunction:: paddle.fluid.io.load_checkpoint
:noindex:
clean_checkpoint
----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex:
doc/fluid/api/layers.rst
浏览文件 @
cdd55dbc
...
@@ -181,12 +181,6 @@ Print
...
@@ -181,12 +181,6 @@ Print
.. autofunction:: paddle.fluid.layers.Print
.. autofunction:: paddle.fluid.layers.Print
:noindex:
:noindex:
is_empty
--------
.. autofunction:: paddle.fluid.layers.is_empty
:noindex:
device
device
======
======
...
@@ -261,19 +255,6 @@ double_buffer
...
@@ -261,19 +255,6 @@ double_buffer
.. autofunction:: paddle.fluid.layers.double_buffer
.. autofunction:: paddle.fluid.layers.double_buffer
:noindex:
:noindex:
random_data_generator
---------------------
.. autofunction:: paddle.fluid.layers.random_data_generator
:noindex:
Preprocessor
------------
.. autoclass:: paddle.fluid.layers.Preprocessor
:members:
:noindex:
nn
nn
==
==
...
@@ -613,30 +594,6 @@ roi_pool
...
@@ -613,30 +594,6 @@ roi_pool
.. autofunction:: paddle.fluid.layers.roi_pool
.. autofunction:: paddle.fluid.layers.roi_pool
:noindex:
:noindex:
dice_loss
---------
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
resize_bilinear
---------------
.. autofunction:: paddle.fluid.layers.resize_bilinear
:noindex:
gather
------
.. autofunction:: paddle.fluid.layers.gather
:noindex:
random_crop
-----------
.. autofunction:: paddle.fluid.layers.random_crop
:noindex:
ops
ops
===
===
...
@@ -784,12 +741,6 @@ sum
...
@@ -784,12 +741,6 @@ sum
.. autofunction:: paddle.fluid.layers.sum
.. autofunction:: paddle.fluid.layers.sum
:noindex:
:noindex:
shape
-----
.. autofunction:: paddle.fluid.layers.shape
:noindex:
sigmoid
sigmoid
-------
-------
...
@@ -1039,3 +990,54 @@ zeros
...
@@ -1039,3 +990,54 @@ zeros
.. autofunction:: paddle.fluid.layers.zeros
.. autofunction:: paddle.fluid.layers.zeros
:noindex:
:noindex:
detection
=========
multi_box_head
--------------
.. autofunction:: paddle.fluid.layers.multi_box_head
:noindex:
bipartite_match
---------------
.. autofunction:: paddle.fluid.layers.bipartite_match
:noindex:
target_assign
-------------
.. autofunction:: paddle.fluid.layers.target_assign
:noindex:
detection_output
----------------
.. autofunction:: paddle.fluid.layers.detection_output
:noindex:
ssd_loss
--------
.. autofunction:: paddle.fluid.layers.ssd_loss
:noindex:
detection_map
-------------
.. autofunction:: paddle.fluid.layers.detection_map
:noindex:
iou_similarity
--------------
.. autofunction:: paddle.fluid.layers.iou_similarity
:noindex:
box_coder
---------
.. autofunction:: paddle.fluid.layers.box_coder
:noindex:
doc/fluid/api/optimizer.rst
浏览文件 @
cdd55dbc
...
@@ -89,13 +89,6 @@ DecayedAdagradOptimizer
...
@@ -89,13 +89,6 @@ DecayedAdagradOptimizer
:members:
:members:
:noindex:
:noindex:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
Adadelta
Adadelta
--------
--------
...
...
doc/fluid/api/profiler.rst
浏览文件 @
cdd55dbc
...
@@ -23,15 +23,3 @@ profiler
...
@@ -23,15 +23,3 @@ profiler
.. autofunction:: paddle.fluid.profiler.profiler
.. autofunction:: paddle.fluid.profiler.profiler
:noindex:
:noindex:
start_profiler
--------------
.. autofunction:: paddle.fluid.profiler.start_profiler
:noindex:
stop_profiler
-------------
.. autofunction:: paddle.fluid.profiler.stop_profiler
:noindex:
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
cdd55dbc
...
@@ -84,7 +84,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
...
@@ -84,7 +84,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc
cares
grpc++_unsecure grpc_unsecure gpr
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
else
()
...
...
paddle/fluid/framework/reader.h
浏览文件 @
cdd55dbc
...
@@ -35,14 +35,15 @@ class ReaderBase {
...
@@ -35,14 +35,15 @@ class ReaderBase {
class
DecoratedReader
:
public
ReaderBase
{
class
DecoratedReader
:
public
ReaderBase
{
public:
public:
explicit
DecoratedReader
(
ReaderBase
*
reader
)
:
ReaderBase
(),
reader_
(
reader
)
{
explicit
DecoratedReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
)
:
ReaderBase
(),
reader_
(
reader
)
{
PADDLE_ENFORCE_NOT_NULL
(
reader_
);
PADDLE_ENFORCE_NOT_NULL
(
reader_
);
}
}
void
ReInit
()
override
{
reader_
->
ReInit
();
}
void
ReInit
()
override
{
reader_
->
ReInit
();
}
protected:
protected:
ReaderBase
*
reader_
;
std
::
shared_ptr
<
ReaderBase
>
reader_
;
};
};
class
FileReader
:
public
ReaderBase
{
class
FileReader
:
public
ReaderBase
{
...
@@ -64,7 +65,7 @@ class ReaderHolder {
...
@@ -64,7 +65,7 @@ class ReaderHolder {
public:
public:
void
Reset
(
ReaderBase
*
reader
)
{
reader_
.
reset
(
reader
);
}
void
Reset
(
ReaderBase
*
reader
)
{
reader_
.
reset
(
reader
);
}
ReaderBase
*
Get
()
const
{
return
reader_
.
get
()
;
}
std
::
shared_ptr
<
ReaderBase
>
Get
()
const
{
return
reader_
;
}
void
ReadNext
(
std
::
vector
<
LoDTensor
>*
out
)
{
void
ReadNext
(
std
::
vector
<
LoDTensor
>*
out
)
{
PADDLE_ENFORCE_NOT_NULL
(
reader_
);
PADDLE_ENFORCE_NOT_NULL
(
reader_
);
...
@@ -76,7 +77,7 @@ class ReaderHolder {
...
@@ -76,7 +77,7 @@ class ReaderHolder {
}
}
private:
private:
std
::
unique
_ptr
<
ReaderBase
>
reader_
;
std
::
shared
_ptr
<
ReaderBase
>
reader_
;
};
};
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/operators/batch_norm_mkldnn_op.cc
浏览文件 @
cdd55dbc
...
@@ -19,10 +19,17 @@ limitations under the License. */
...
@@ -19,10 +19,17 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
batch_norm_bwd
=
mkldnn
::
batch_normalization_backward
;
using
batch_norm_fwd
=
mkldnn
::
batch_normalization_forward
;
using
framework
::
DataLayout
;
using
framework
::
Tensor
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
using
mkldnn
::
stream
;
using
paddle
::
platform
::
MKLDNNDeviceContext
;
using
paddle
::
platform
::
MKLDNNDeviceContext
;
using
paddle
::
platform
::
MKLDNNMemDesc
;
using
paddle
::
platform
::
MKLDNNMemDesc
;
using
mkldnn
::
memory
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
template
<
typename
T
>
using
EigenArrayMap
=
using
EigenArrayMap
=
...
@@ -64,21 +71,12 @@ void run_batch_norm_op(Args &&... args) {
...
@@ -64,21 +71,12 @@ void run_batch_norm_op(Args &&... args) {
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
}
template
<
typename
T
>
inline
void
*
cast_const_to_void
(
const
T
*
t
)
{
return
static_cast
<
void
*>
(
const_cast
<
T
*>
(
t
));
}
}
// namespace
}
// namespace
template
<
typename
T
>
template
<
typename
T
>
class
BatchNormMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
BatchNormMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
data_layout_str
=
ctx
.
Attr
<
std
::
string
>
(
"data_layout"
);
auto
data_layout
=
framework
::
StringToDataLayout
(
data_layout_str
);
PADDLE_ENFORCE
(
data_layout
==
framework
::
DataLayout
::
kNCHW
,
"MKLDNN batch normalization handles only NCHW data layout"
);
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
const
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
const
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
...
@@ -99,41 +97,53 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -99,41 +97,53 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
const
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
const
auto
*
shift
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
const
auto
*
shift
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
PADDLE_ENFORCE
(
x
->
layout
()
==
DataLayout
::
kMKLDNN
&&
mean_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
x
->
format
()
!=
memory
::
format
::
format_undef
,
variance_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
"Wrong layout/format set for Input x tensor"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
mean_data
=
mean
->
data
<
T
>
();
const
T
*
variance_data
=
variance
->
data
<
T
>
();
T
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
mean_out_data
=
mean_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
variance_out_data
=
variance_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
batch_mean_data
=
nullptr
;
T
*
batch_variance_data
=
nullptr
;
if
(
!
is_test
)
{
if
(
!
is_test
)
{
batch_mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
batch_mean
_data
=
batch_mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
batch_variance
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
batch_variance
_data
=
batch_variance
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
}
auto
propagation
=
is_test
==
true
?
mkldnn
::
prop_kind
::
forward_scoring
auto
propagation
=
is_test
==
true
?
mkldnn
::
prop_kind
::
forward_scoring
:
mkldnn
::
prop_kind
::
forward_training
;
:
mkldnn
::
prop_kind
::
forward_training
;
auto
dims
=
paddle
::
framework
::
vectorize2int
(
x
->
dims
());
auto
src_tz
=
paddle
::
framework
::
vectorize2int
(
x
->
dims
());
auto
scale_tz
=
paddle
::
framework
::
vectorize2int
(
scale
->
dims
());
auto
src_md
=
PADDLE_ENFORCE
(
scale_tz
.
size
()
==
1
,
"Dims of scale tensor is NOT 1"
);
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
const
unsigned
int
ic
=
scale_tz
[
0
];
auto
dst_md
=
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
auto
src_pd
=
mkldnn
::
memory
::
primitive_desc
{
src_md
,
mkldnn_engine
};
auto
dst_pd
=
mkldnn
::
memory
::
primitive_desc
{
dst_md
,
mkldnn_engine
};
auto
src
=
mkldnn
::
memory
{
src_pd
,
cast_const_to_void
(
x
->
data
<
T
>
())};
auto
dst
=
mkldnn
::
memory
{
dst_pd
,
y
->
data
<
T
>
()};
unsigned
flags
=
mkldnn
::
use_scale_shift
;
unsigned
flags
=
mkldnn
::
use_scale_shift
;
if
(
is_test
)
flags
|=
mkldnn
::
use_global_stats
;
if
(
is_test
)
flags
|=
mkldnn
::
use_global_stats
;
// create mkldnn memory from input x tensor
auto
src_memory
=
memory
({{{
src_tz
},
memory
::
data_type
::
f32
,
x
->
format
()},
mkldnn_engine
},
to_void_cast
(
x_data
));
// create primitive descriptor for batch norm forward
using
bn_fwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_forward
>
;
using
bn_fwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_forward
>
;
auto
batch_norm_fwd_desc
=
auto
batch_norm_fwd_desc
=
bn_fwd_types
::
op_desc
{
bn_fwd_types
::
op_desc
{
propagation
,
src_md
,
epsilon
,
flags
};
propagation
,
src_memory
.
get_primitive_desc
().
desc
(),
epsilon
,
flags
};
auto
batch_norm_fwd_pd
=
std
::
shared_ptr
<
batch_norm_fwd
::
primitive_desc
>
batch_norm_fwd_pd
=
bn_fwd_types
::
op_prim
{
batch_norm_fwd_desc
,
mkldnn_engine
};
std
::
shared_ptr
<
batch_norm_fwd
::
primitive_desc
>
(
new
batch_norm_fwd
::
primitive_desc
(
batch_norm_fwd_desc
,
mkldnn_engine
));
const
unsigned
int
ic
=
dims
[
1
];
// Save the pd to be used in backward pass
const
std
::
string
key
=
ctx
.
op
().
Output
(
"SavedMean"
);
const
std
::
string
key_batch_norm_fwd_pd
=
key
+
"@bn_fwd_pd"
;
dev_ctx
.
SetBlob
(
key_batch_norm_fwd_pd
,
batch_norm_fwd_pd
);
// MKLDNN requires a single piece of memory for scale and shift/bias data
// MKLDNN requires a single piece of memory for scale and shift/bias data
const
size_t
scaleshift_size
=
2
*
ic
;
const
size_t
scaleshift_size
=
2
*
ic
;
...
@@ -143,73 +153,58 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -143,73 +153,58 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
copy_to_weights
(
scale
->
data
<
T
>
(),
scale
->
data
<
T
>
()
+
ic
,
shift
->
data
<
T
>
(),
copy_to_weights
(
scale
->
data
<
T
>
(),
scale
->
data
<
T
>
()
+
ic
,
shift
->
data
<
T
>
(),
shift
->
data
<
T
>
()
+
ic
,
&
scaleshift_data
);
shift
->
data
<
T
>
()
+
ic
,
&
scaleshift_data
);
auto
scaleshift_memory
=
mkldnn
::
memory
{
// crate mkldnn memory for weights(scale/shift)
batch_norm_fwd_pd
.
weights_primitive_desc
(),
scaleshift_data
.
data
()};
auto
scaleshift_memory
=
memory
(
batch_norm_fwd_pd
->
weights_primitive_desc
(),
scaleshift_data
.
data
());
if
(
is_test
)
{
// create mkldnn memory for output y tensor
auto
mean_memory
=
mkldnn
::
memory
{
batch_norm_fwd_pd
.
mean_primitive_desc
(),
auto
dst_memory
=
memory
(
batch_norm_fwd_pd
->
dst_primitive_desc
(),
y_data
);
cast_const_to_void
(
mean
->
data
<
T
>
())};
if
(
is_test
)
{
// create mkldnn memory for stats (as input)
auto
mean_memory
=
memory
(
batch_norm_fwd_pd
->
mean_primitive_desc
(),
to_void_cast
(
mean_data
));
auto
variance_memory
=
auto
variance_memory
=
m
kldnn
::
memory
{
batch_norm_fwd_pd
.
variance_primitive_desc
(),
m
emory
(
batch_norm_fwd_pd
->
variance_primitive_desc
(),
cast_const_to_void
(
variance
->
data
<
T
>
())}
;
to_void_cast
(
variance_data
))
;
run_batch_norm_op
<
typename
bn_fwd_types
::
op_type
>
(
run_batch_norm_op
<
typename
bn_fwd_types
::
op_type
>
(
batch_norm_fwd_pd
,
src
,
(
const
mkldnn
::
primitive
::
at
&
)
mean_memory
,
*
batch_norm_fwd_pd
,
src_memory
,
(
const
mkldnn
::
primitive
::
at
&
)
mean_memory
,
(
const
mkldnn
::
primitive
::
at
&
)
variance_memory
,
scaleshift_memory
,
(
const
mkldnn
::
primitive
::
at
&
)
variance_memory
,
scaleshift_memory
,
dst
);
dst
_memory
);
}
else
{
}
else
{
// create mkldnn memory for stats (as output)
auto
mean_memory
=
auto
mean_memory
=
mkldnn
::
memory
{
batch_norm_fwd_pd
.
mean_primitive_desc
(),
memory
(
batch_norm_fwd_pd
->
mean_primitive_desc
(),
batch_mean_data
);
cast_const_to_void
(
batch_mean
->
data
<
T
>
())};
auto
variance_memory
=
memory
(
batch_norm_fwd_pd
->
variance_primitive_desc
(),
batch_variance_data
);
auto
variance_memory
=
mkldnn
::
memory
{
batch_norm_fwd_pd
.
variance_primitive_desc
(),
cast_const_to_void
(
batch_variance
->
data
<
T
>
())};
run_batch_norm_op
<
bn_fwd_types
::
op_type
>
(
batch_norm_fwd_pd
,
src
,
run_batch_norm_op
<
bn_fwd_types
::
op_type
>
(
*
batch_norm_fwd_pd
,
src_memory
,
scaleshift_memory
,
dst
,
scaleshift_memory
,
dst
_memory
,
mean_memory
,
variance_memory
);
mean_memory
,
variance_memory
);
}
}
if
(
!
is_test
)
{
if
(
!
is_test
)
{
const
unsigned
int
in
=
dims
[
0
];
// mkldnn only compute stats for current batch
const
unsigned
int
sample_size
=
x
->
numel
()
/
in
/
ic
;
// so we need compute momentum stats via Eigen lib
EigenVectorArrayMap
<
T
>
batch_mean_e
(
batch_mean_data
,
ic
);
// saved_xx is use just in this batch of data
EigenVectorArrayMap
<
T
>
batch_variance_e
(
batch_variance_data
,
ic
);
EigenVectorArrayMap
<
T
>
saved_mean_e
(
ConstEigenVectorArrayMap
<
T
>
mean_e
(
mean_data
,
ic
);
batch_mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
ic
);
ConstEigenVectorArrayMap
<
T
>
variance_e
{
variance_data
,
ic
};
EigenVectorArrayMap
<
T
>
saved_variance_e
(
batch_variance
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
ic
);
EigenVectorArrayMap
<
T
>
running_mean_e
(
mean_out_data
,
ic
);
saved_mean_e
.
setZero
();
EigenVectorArrayMap
<
T
>
running_variance_e
(
variance_out_data
,
ic
);
saved_variance_e
.
setZero
();
const
unsigned
int
x_arr_size
=
in
*
ic
;
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
x_arr_size
);
for
(
unsigned
int
nc
=
0
;
nc
<
x_arr_size
;
++
nc
)
{
saved_mean_e
(
nc
%
ic
)
+=
x_arr
.
col
(
nc
).
sum
();
}
saved_mean_e
/=
in
*
sample_size
;
for
(
unsigned
int
nc
=
0
;
nc
<
x_arr_size
;
++
nc
)
{
saved_variance_e
(
nc
%
ic
)
+=
(
x_arr
.
col
(
nc
)
-
saved_mean_e
(
nc
%
ic
)).
matrix
().
squaredNorm
();
}
saved_variance_e
/=
in
*
sample_size
;
ConstEigenVectorArrayMap
<
T
>
mean_arr
{
mean
->
data
<
T
>
(),
ic
};
ConstEigenVectorArrayMap
<
T
>
variance_arr
{
variance
->
data
<
T
>
(),
ic
};
EigenVectorArrayMap
<
T
>
running_mean_arr
(
mean_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
ic
);
EigenVectorArrayMap
<
T
>
running_var_arr
(
variance_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
ic
);
auto
one_minus_momentum
=
1.
-
momentum
;
auto
one_minus_momentum
=
1.
-
momentum
;
running_mean_arr
=
running_mean_e
=
mean_e
*
momentum
+
batch_mean_e
*
one_minus_momentum
;
mean_arr
*
momentum
+
saved_mean_e
*
one_minus_momentum
;
running_variance_e
=
running_var_arr
=
variance_e
*
momentum
+
batch_variance_e
*
one_minus_momentum
;
variance_arr
*
momentum
+
saved_variance_e
*
one_minus_momentum
;
}
}
y
->
set_layout
(
DataLayout
::
kMKLDNN
);
y
->
set_format
(
(
memory
::
format
)
dst_memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
};
};
...
@@ -217,11 +212,6 @@ template <typename T>
...
@@ -217,11 +212,6 @@ template <typename T>
class
BatchNormMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
BatchNormMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
data_layout_str
=
ctx
.
Attr
<
std
::
string
>
(
"data_layout"
);
auto
data_layout
=
framework
::
StringToDataLayout
(
data_layout_str
);
PADDLE_ENFORCE
(
data_layout
==
framework
::
DataLayout
::
kNCHW
,
"MKLDNN batch normalization handles only NCHW data layout"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
@@ -238,88 +228,132 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -238,88 +228,132 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
*
diff_scale
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Scale"
));
auto
*
diff_scale
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Scale"
));
auto
*
diff_shift
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
diff_shift
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
diff_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
PADDLE_ENFORCE
(
diff_y
->
layout
()
==
DataLayout
::
kMKLDNN
&&
diff_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
diff_y
->
format
()
!=
memory
::
format
::
format_undef
,
diff_shift
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
"Wrong layout/format set for Input diff_y tensor"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
diff_y_data
=
diff_y
->
data
<
T
>
();
const
T
*
batch_mean_data
=
batch_mean
->
data
<
T
>
();
const
T
*
batch_variance_data
=
batch_variance
->
data
<
T
>
();
const
T
*
scale_data
=
scale
->
data
<
T
>
();
const
T
*
shift_data
=
shift
->
data
<
T
>
();
T
*
diff_x_data
=
diff_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
diff_scale_data
=
diff_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
diff_shift_data
=
diff_shift
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
src_tz
=
paddle
::
framework
::
vectorize2int
(
x
->
dims
());
auto
diff_src_tz
=
src_tz
;
auto
dst_tz
=
src_tz
;
auto
diff_dst_tz
=
dst_tz
;
auto
scale_tz
=
paddle
::
framework
::
vectorize2int
(
scale
->
dims
());
PADDLE_ENFORCE
(
scale_tz
.
size
()
==
1
,
"Dims of scale tensor is NOT 1"
);
const
unsigned
int
ic
=
scale_tz
[
0
];
// Retrieve bn_fwd_pd from device context
const
std
::
string
key
=
ctx
.
op
().
Input
(
"SavedMean"
);
const
std
::
string
key_batch_norm_fwd_pd
=
key
+
"@bn_fwd_pd"
;
auto
batch_norm_fwd_pd
=
std
::
static_pointer_cast
<
batch_norm_fwd
::
primitive_desc
>
(
dev_ctx
.
GetBlob
(
key_batch_norm_fwd_pd
));
PADDLE_ENFORCE
(
batch_norm_fwd_pd
!=
nullptr
,
"Fail to find batch_norm_fwd_pd in device context"
);
auto
dims
=
paddle
::
framework
::
vectorize2int
(
x
->
dims
());
using
bn_bwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_backward
>
;
unsigned
flags
=
mkldnn
::
use_scale_shift
|
!
mkldnn
::
use_global_stats
;
auto
src_md
=
// create mkldnn memory from input diff_y tensor
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
auto
user_diff_dst_memory
=
auto
dst_md
=
memory
({{{
diff_dst_tz
},
memory
::
data_type
::
f32
,
diff_y
->
format
()},
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
mkldnn_engine
},
auto
diff_src_md
=
to_void_cast
(
diff_y_data
));
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
auto
diff_dst_md
=
MKLDNNMemDesc
(
dims
,
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
using
bn_bwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_backward
>
;
// create mkldnn memory from input x tensor
using
bn_fwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_forward
>
;
auto
src_memory
=
memory
({{{
src_tz
},
memory
::
data_type
::
f32
,
x
->
format
()},
mkldnn_engine
},
to_void_cast
(
x_data
));
auto
batch_norm_fwd_desc
=
bn_fwd_types
::
op_desc
{
// for diff_dst, try to use same format as dst in forward pass
mkldnn
::
prop_kind
::
forward_training
,
src_md
,
epsilon
,
flags
};
auto
diff_dst_pd
=
batch_norm_fwd_pd
.
get
()
->
dst_primitive_desc
();
auto
batch_norm_fwd_pd
=
auto
diff_dst_md
=
diff_dst_pd
.
desc
();
bn_fwd_types
::
op_prim
{
batch_norm_fwd_desc
,
mkldnn_engine
};
// create primitive descriptor for batch norm backward
unsigned
flags
=
mkldnn
::
use_scale_shift
;
auto
batch_norm_bwd_desc
=
bn_bwd_types
::
op_desc
{
auto
batch_norm_bwd_desc
=
bn_bwd_types
::
op_desc
{
mkldnn
::
prop_kind
::
backward
,
diff_dst_md
,
dst_md
,
epsilon
,
flags
};
mkldnn
::
prop_kind
::
backward
,
diff_dst_md
,
src_memory
.
get_primitive_desc
().
desc
(),
epsilon
,
flags
};
auto
batch_norm_bwd_pd
=
bn_bwd_types
::
op_prim
{
auto
batch_norm_bwd_pd
=
bn_bwd_types
::
op_prim
{
batch_norm_bwd_desc
,
mkldnn_engine
,
batch_norm_fwd_pd
};
batch_norm_bwd_desc
,
mkldnn_engine
,
*
batch_norm_fwd_pd
};
auto
src
=
mkldnn
::
memory
{{
src_md
,
mkldnn_engine
},
// reorder user_diff_dst if it's not in preferred format
cast_const_to_void
(
x
->
data
<
T
>
())};
auto
diff_dst_memory
=
user_diff_dst_memory
;
primitive
reorder_diff_dst
;
auto
mean
=
mkldnn
::
memory
{
batch_norm_bwd_pd
.
mean_primitive_desc
(),
bool
is_diff_dst_reordered
=
false
;
cast_const_to_void
(
batch_mean
->
data
<
T
>
())};
if
(
diff_dst_pd
!=
user_diff_dst_memory
.
get_primitive_desc
())
{
diff_dst_memory
=
memory
(
diff_dst_pd
);
auto
variance
=
reorder_diff_dst
=
reorder
(
user_diff_dst_memory
,
diff_dst_memory
);
mkldnn
::
memory
{
batch_norm_bwd_pd
.
variance_primitive_desc
(),
is_diff_dst_reordered
=
true
;
cast_const_to_void
(
batch_variance
->
data
<
T
>
())};
}
auto
diff_dst
=
mkldnn
::
memory
{{
diff_dst_md
,
mkldnn_engine
},
cast_const_to_void
(
diff_y
->
data
<
T
>
())};
const
unsigned
int
ic
=
dims
[
1
];
// create mkldnn memory for input tensors (src/mean/variance)
auto
mean_memory
=
memory
(
batch_norm_bwd_pd
.
mean_primitive_desc
(),
to_void_cast
(
batch_mean_data
));
auto
variance_memory
=
memory
(
batch_norm_bwd_pd
.
variance_primitive_desc
(),
to_void_cast
(
batch_variance_data
));
// MKLDNN requires a single piece of memory for scale and shift/bias data
const
size_t
scaleshift_size
=
2
*
ic
;
const
size_t
scaleshift_size
=
2
*
ic
;
std
::
vector
<
T
>
scaleshift_data
;
std
::
vector
<
T
>
scaleshift_data
;
scaleshift_data
.
reserve
(
scaleshift_size
);
scaleshift_data
.
reserve
(
scaleshift_size
);
copy_to_weights
(
scale
->
data
<
T
>
(),
scale
->
data
<
T
>
()
+
ic
,
shift
->
data
<
T
>
()
,
copy_to_weights
(
scale
_data
,
scale_data
+
ic
,
shift_data
,
shift_data
+
ic
,
shift
->
data
<
T
>
()
+
ic
,
&
scaleshift_data
);
&
scaleshift_data
);
auto
scaleshift_memory
=
mkldnn
::
memory
{
// create mkldnn memory for input tensors (scale/shift)
batch_norm_bwd_pd
.
weights_primitive_desc
(),
scaleshift_data
.
data
()};
auto
scaleshift_memory
=
memory
(
batch_norm_bwd_pd
.
weights_primitive_desc
(),
scaleshift_data
.
data
());
// create mkldnn memory for output diff weights (combined scale/shift)
std
::
vector
<
T
>
diff_scaleshift_data
;
std
::
vector
<
T
>
diff_scaleshift_data
;
diff_scaleshift_data
.
reserve
(
scaleshift_size
);
diff_scaleshift_data
.
reserve
(
scaleshift_size
);
copy_to_weights
(
diff_scale
->
data
<
T
>
(),
diff_scale
->
data
<
T
>
()
+
ic
,
diff_shift
->
data
<
T
>
(),
diff_shift
->
data
<
T
>
()
+
ic
,
&
diff_scaleshift_data
);
auto
diff_scaleshift_memory
=
auto
diff_scaleshift_memory
=
mkldnn
::
memory
{
batch_norm_bwd_pd
.
diff_weights_primitive_desc
(),
memory
(
batch_norm_bwd_pd
.
diff_weights_primitive_desc
(),
diff_scaleshift_data
.
data
()};
diff_scaleshift_data
.
data
());
auto
diff_src
=
mkldnn
::
memory
{{
diff_src_md
,
mkldnn_engine
},
// here assume diff_src is in the same format of src
static_cast
<
void
*>
(
diff_x
->
data
<
T
>
())};
auto
diff_src_memory
=
memory
(
src_memory
.
get_primitive_desc
(),
diff_x_data
);
run_batch_norm_op
<
bn_bwd_types
::
op_type
>
(
// finally create batch_norm backward primitive
batch_norm_bwd_pd
,
src
,
mean
,
variance
,
diff_dst
,
scaleshift_memory
,
auto
batch_norm_bwd_prim
=
diff_src
,
diff_scaleshift_memory
);
batch_norm_bwd
(
batch_norm_bwd_pd
,
src_memory
,
mean_memory
,
variance_memory
,
diff_dst_memory
,
scaleshift_memory
,
diff_src_memory
,
diff_scaleshift_memory
);
// execute optional reorder and batch_norm backward primitive
std
::
vector
<
primitive
>
pipeline
;
if
(
is_diff_dst_reordered
)
pipeline
.
push_back
(
reorder_diff_dst
);
pipeline
.
push_back
(
batch_norm_bwd_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// copy back diff sacle/shift to output tensors (diff scale/shift)
diff_scaleshift_data
.
resize
(
scaleshift_size
);
auto
it
=
std
::
begin
(
diff_scaleshift_data
);
auto
it
=
std
::
begin
(
diff_scaleshift_data
);
std
::
copy
(
it
,
std
::
next
(
it
,
ic
),
diff_scale
->
data
<
T
>
()
);
std
::
copy
(
it
,
std
::
next
(
it
,
ic
),
diff_scale
_data
);
std
::
copy
(
std
::
next
(
it
,
ic
),
std
::
end
(
diff_scaleshift_data
),
std
::
copy
(
std
::
next
(
it
,
ic
),
std
::
end
(
diff_scaleshift_data
),
diff_shift
->
data
<
T
>
());
diff_shift_data
);
// set layout/format of output tensors
diff_x
->
set_layout
(
DataLayout
::
kMKLDNN
);
diff_x
->
set_format
((
memory
::
format
)
diff_src_memory
.
get_primitive_desc
()
.
desc
()
.
data
.
format
);
}
}
};
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
batch_norm
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
REGISTER_OP_KERNEL
(
batch_norm
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
BatchNormMKLDNNOpKernel
<
float
>
);
ops
::
BatchNormMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
batch_norm_grad
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
REGISTER_OP_KERNEL
(
batch_norm_grad
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
BatchNormMKLDNNGradOpKernel
<
float
>
);
ops
::
BatchNormMKLDNNGradOpKernel
<
float
>
);
paddle/fluid/operators/batch_norm_op.cc
浏览文件 @
cdd55dbc
...
@@ -110,19 +110,19 @@ class BatchNormOp : public framework::OperatorWithKernel {
...
@@ -110,19 +110,19 @@ class BatchNormOp : public framework::OperatorWithKernel {
ctx
.
Input
<
Tensor
>
(
"Variance"
)
->
type
()),
ctx
.
Input
<
Tensor
>
(
"Variance"
)
->
type
()),
"Variance input should be of float type"
);
"Variance input should be of float type"
);
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
LibraryType
library
=
framework
::
LibraryType
::
kPlain
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
if
(
library
_
==
framework
::
LibraryType
::
kPlain
&&
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
_
=
framework
::
LibraryType
::
kMKLDNN
;
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
}
#endif
#endif
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library
_
);
library
);
}
}
};
};
...
@@ -370,19 +370,21 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
...
@@ -370,19 +370,21 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"can't find Y@GRAD"
);
PADDLE_THROW
(
"can't find Y@GRAD"
);
}
}
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
framework
::
LibraryType
library
=
framework
::
LibraryType
::
kPlain
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
if
(
library
_
==
framework
::
LibraryType
::
kPlain
&&
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
_
=
framework
::
LibraryType
::
kMKLDNN
;
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
_
=
framework
::
DataLayout
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
}
#endif
#endif
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout
_
,
library_
);
layout
,
library
);
}
}
};
};
...
...
paddle/fluid/operators/reader/create_batch_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -20,7 +20,7 @@ namespace reader {
...
@@ -20,7 +20,7 @@ namespace reader {
class
BatchReader
:
public
framework
::
DecoratedReader
{
class
BatchReader
:
public
framework
::
DecoratedReader
{
public:
public:
BatchReader
(
ReaderBase
*
reader
,
int
batch_size
)
BatchReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
int
batch_size
)
:
DecoratedReader
(
reader
),
batch_size_
(
batch_size
)
{
:
DecoratedReader
(
reader
),
batch_size_
(
batch_size
)
{
buffer_
.
reserve
(
batch_size_
);
buffer_
.
reserve
(
batch_size_
);
}
}
...
...
paddle/fluid/operators/reader/create_custom_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -22,7 +22,8 @@ namespace reader {
...
@@ -22,7 +22,8 @@ namespace reader {
class
CustomReader
:
public
framework
::
DecoratedReader
{
class
CustomReader
:
public
framework
::
DecoratedReader
{
public:
public:
CustomReader
(
ReaderBase
*
reader
,
const
framework
::
BlockDesc
&
sub_block
,
CustomReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
const
framework
::
BlockDesc
&
sub_block
,
const
std
::
vector
<
std
::
string
>&
source_var_names
,
const
std
::
vector
<
std
::
string
>&
source_var_names
,
const
std
::
vector
<
std
::
string
>&
sink_var_names
)
const
std
::
vector
<
std
::
string
>&
sink_var_names
)
:
DecoratedReader
(
reader
),
:
DecoratedReader
(
reader
),
...
...
paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -34,7 +34,8 @@ static constexpr size_t kChannelSize = 1; // kCacheSize - 2
...
@@ -34,7 +34,8 @@ static constexpr size_t kChannelSize = 1; // kCacheSize - 2
class
DoubleBufferReader
:
public
framework
::
DecoratedReader
{
class
DoubleBufferReader
:
public
framework
::
DecoratedReader
{
public:
public:
explicit
DoubleBufferReader
(
explicit
DoubleBufferReader
(
ReaderBase
*
reader
,
platform
::
Place
target_place
=
platform
::
CPUPlace
())
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
platform
::
Place
target_place
=
platform
::
CPUPlace
())
:
DecoratedReader
(
reader
),
place_
(
target_place
)
{
:
DecoratedReader
(
reader
),
place_
(
target_place
)
{
cpu_tensor_cache_
.
resize
(
kCacheSize
);
cpu_tensor_cache_
.
resize
(
kCacheSize
);
gpu_tensor_cache_
.
resize
(
kCacheSize
);
gpu_tensor_cache_
.
resize
(
kCacheSize
);
...
...
paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -21,7 +21,7 @@ namespace reader {
...
@@ -21,7 +21,7 @@ namespace reader {
class
MultiPassReader
:
public
framework
::
DecoratedReader
{
class
MultiPassReader
:
public
framework
::
DecoratedReader
{
public:
public:
MultiPassReader
(
ReaderBase
*
reader
,
int
pass_num
)
MultiPassReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
int
pass_num
)
:
DecoratedReader
(
reader
),
pass_num_
(
pass_num
),
pass_count_
(
0
)
{}
:
DecoratedReader
(
reader
),
pass_num_
(
pass_num
),
pass_count_
(
0
)
{}
void
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
void
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
...
...
paddle/fluid/operators/reader/create_shuffle_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -23,7 +23,8 @@ namespace reader {
...
@@ -23,7 +23,8 @@ namespace reader {
class
ShuffleReader
:
public
framework
::
DecoratedReader
{
class
ShuffleReader
:
public
framework
::
DecoratedReader
{
public:
public:
ShuffleReader
(
ReaderBase
*
reader
,
size_t
buffer_size
,
size_t
seed
=
0
)
ShuffleReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
size_t
buffer_size
,
size_t
seed
=
0
)
:
DecoratedReader
(
reader
),
buffer_size_
(
buffer_size
),
seed_
(
seed
)
{
:
DecoratedReader
(
reader
),
buffer_size_
(
buffer_size
),
seed_
(
seed
)
{
VLOG
(
10
)
<<
"Create shuffle reader of "
<<
reader_
;
VLOG
(
10
)
<<
"Create shuffle reader of "
<<
reader_
;
if
(
seed_
==
0
)
{
if
(
seed_
==
0
)
{
...
...
paddle/fluid/operators/reader/create_threaded_reader_op.cc
浏览文件 @
cdd55dbc
...
@@ -21,7 +21,8 @@ namespace reader {
...
@@ -21,7 +21,8 @@ namespace reader {
class
ThreadedReader
:
public
framework
::
DecoratedReader
{
class
ThreadedReader
:
public
framework
::
DecoratedReader
{
public:
public:
explicit
ThreadedReader
(
ReaderBase
*
reader
)
:
DecoratedReader
(
reader
)
{}
explicit
ThreadedReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
)
:
DecoratedReader
(
reader
)
{}
void
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
void
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
cdd55dbc
...
@@ -21,12 +21,17 @@ limitations under the License. */
...
@@ -21,12 +21,17 @@ limitations under the License. */
#include <unistd.h>
#include <unistd.h>
#endif
#endif
#include <algorithm>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
"reserve the rest for page tables, etc"
);
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500
,
"Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."
);
DEFINE_double
(
DEFINE_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
...
@@ -54,7 +59,10 @@ inline size_t CpuTotalPhysicalMemory() {
...
@@ -54,7 +59,10 @@ inline size_t CpuTotalPhysicalMemory() {
size_t
CpuMaxAllocSize
()
{
size_t
CpuMaxAllocSize
()
{
// For distributed systems, it requires configuring and limiting
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
// the fraction of memory to use.
return
FLAGS_fraction_of_cpu_memory_to_use
*
CpuTotalPhysicalMemory
();
return
std
::
min
(
static_cast
<
size_t
>
(
FLAGS_fraction_of_cpu_memory_to_use
*
CpuTotalPhysicalMemory
()),
static_cast
<
size_t
>
(
FLAGS_initial_cpu_memory_in_mb
*
1
<<
20
));
}
}
size_t
CpuMinChunkSize
()
{
size_t
CpuMinChunkSize
()
{
...
...
python/paddle/fluid/framework.py
浏览文件 @
cdd55dbc
...
@@ -382,7 +382,7 @@ class Operator(object):
...
@@ -382,7 +382,7 @@ class Operator(object):
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'channel_recv'
,
'select'
'channel_recv'
,
'select'
,
'gen_nccl_id'
}
}
def
__init__
(
self
,
def
__init__
(
self
,
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
cdd55dbc
...
@@ -261,9 +261,10 @@ def embedding(input,
...
@@ -261,9 +261,10 @@ def embedding(input,
return
tmp
return
tmp
# TODO(qijun): expose H0 and C0
def
dynamic_lstm
(
input
,
def
dynamic_lstm
(
input
,
size
,
size
,
h_0
=
None
,
c_0
=
None
,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
use_peepholes
=
True
,
use_peepholes
=
True
,
...
@@ -324,6 +325,13 @@ def dynamic_lstm(input,
...
@@ -324,6 +325,13 @@ def dynamic_lstm(input,
(T X 4D), where T is the total time steps in this
(T X 4D), where T is the total time steps in this
mini-batch, D is the hidden size.
mini-batch, D is the hidden size.
size(int): 4 * hidden size.
size(int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
hidden-hidden weights.
...
@@ -387,12 +395,20 @@ def dynamic_lstm(input,
...
@@ -387,12 +395,20 @@ def dynamic_lstm(input,
cell
=
helper
.
create_tmp_variable
(
dtype
)
cell
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
batch_cell_pre_act
=
helper
.
create_tmp_variable
(
dtype
)
batch_cell_pre_act
=
helper
.
create_tmp_variable
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
if
h_0
:
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of h0 should be (batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
if
c_0
:
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of c0 should be (batch_size, %d)'
%
size
inputs
[
'C0'
]
=
c_0
helper
.
append_op
(
helper
.
append_op
(
type
=
'lstm'
,
type
=
'lstm'
,
inputs
=
{
'Input'
:
input
,
inputs
=
inputs
,
'Weight'
:
weight
,
'Bias'
:
bias
},
outputs
=
{
outputs
=
{
'Hidden'
:
hidden
,
'Hidden'
:
hidden
,
'Cell'
:
cell
,
'Cell'
:
cell
,
...
@@ -677,11 +693,13 @@ def dynamic_gru(input,
...
@@ -677,11 +693,13 @@ def dynamic_gru(input,
attr
=
helper
.
param_attr
,
shape
=
[
size
,
3
*
size
],
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
[
size
,
3
*
size
],
dtype
=
dtype
)
bias
=
helper
.
create_parameter
(
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
3
*
size
],
dtype
=
dtype
,
is_bias
=
True
)
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
3
*
size
],
dtype
=
dtype
,
is_bias
=
True
)
batch_size
=
input
.
shape
[
0
]
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
if
h_0
!=
None
:
if
h_0
!=
None
:
assert
h_0
.
shape
==
(
assert
h_0
.
shape
==
(
size
,
size
),
'The shape of h0 should be(%d, %d)'
%
(
size
,
size
)
batch_size
,
size
inputs
[
'h0'
]
=
h_0
),
'The shape of h0 should be(batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
hidden
=
helper
.
create_tmp_variable
(
dtype
)
hidden
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
batch_gate
=
helper
.
create_tmp_variable
(
dtype
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录