Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
6d8d3d4c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6d8d3d4c
编写于
11月 17, 2020
作者:
J
Jacek Czaja
提交者:
GitHub
11月 17, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[oneDNN] Layer norm bf16 kernel (#28619)
上级
cdc4e662
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
528 addition
and
6 deletion
+528
-6
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+2
-2
paddle/fluid/operators/layer_norm_op.cc
paddle/fluid/operators/layer_norm_op.cc
+35
-0
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+177
-0
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+6
-0
python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py
.../tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py
+146
-0
python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py
...fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py
+151
-0
python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py
...e/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_layer_norm_op.py
python/paddle/fluid/tests/unittests/test_layer_norm_op.py
+8
-3
tools/static_mode_white_list.py
tools/static_mode_white_list.py
+2
-0
未找到文件。
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
6d8d3d4c
...
...
@@ -2102,8 +2102,8 @@ PDNode *patterns::Bfloat16Placement::operator()(
const
std
::
unordered_set
<
std
::
string
>
&
bfloat16_enabled_op_types
)
{
std
::
unordered_set
<
std
::
string
>
supported_op_types
=
std
::
unordered_set
<
std
::
string
>
({
"concat"
,
"conv2d"
,
"fusion_gru"
,
"gelu"
,
"
reshape2"
,
"softmax"
,
"sum
"
,
"transpose2"
});
"
layer_norm"
,
"reshape2"
,
"softmax
"
,
"
sum"
,
"
transpose2"
});
if
(
!
bfloat16_enabled_op_types
.
empty
())
{
supported_op_types
=
bfloat16_enabled_op_types
;
}
...
...
paddle/fluid/operators/layer_norm_op.cc
浏览文件 @
6d8d3d4c
...
...
@@ -15,6 +15,10 @@ limitations under the License. */
#include "paddle/fluid/operators/layer_norm_op.h"
#include <memory>
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
operators
{
...
...
@@ -91,6 +95,25 @@ class LayerNormOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"Variance"
,
{
left
});
ctx
->
ShareLoD
(
"X"
,
"Y"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
=
framework
::
LibraryType
::
kPlain
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
),
ctx
.
GetPlace
(),
layout
,
library
);
}
};
class
LayerNormOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -134,6 +157,18 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
"greater than zero. But received [%d]."
,
begin_norm_axis
));
});
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"mkldnn_data_type"
,
"(string, default
\"
float32
\"
). Data type of mkldnn kernel"
)
.
SetDefault
(
"float32"
)
.
InEnum
({
"float32"
,
"bfloat16"
});
AddAttr
<
bool
>
(
"is_test"
,
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Assume feature vectors exist on dimensions
...
...
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
0 → 100644
浏览文件 @
6d8d3d4c
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/layer_norm_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
LayerNormMKLDNNHandler
:
public
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
layer_normalization_forward
>
{
public:
LayerNormMKLDNNHandler
(
const
std
::
vector
<
int64_t
>&
dims
,
const
float
&
epsilon
,
const
dnnl
::
normalization_flags
&
flags
,
const
bool
&
is_test
,
const
MKLDNNMemoryFormat
fmt
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
platform
::
Place
cpu_place
,
const
std
::
string
&
uniq_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
layer_normalization_forward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
uniq_name
))
{
if
(
!
this
->
isCached
())
{
auto
md
=
dnnl
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
if
(
!
is_test
)
{
// TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced
auto
stats_md
=
dnnl
::
memory
::
desc
(
{
begin
(
dims
),
end
(
dims
)
-
1
},
platform
::
MKLDNNGetDataType
<
float
>
(),
platform
::
MKLDNNFormatForSize
(
dims
.
size
()
-
1
,
MKLDNNMemoryFormat
::
nchw
));
this
->
AcquireForwardPrimitiveDescriptor
(
dnnl
::
prop_kind
::
forward_training
,
md
,
stats_md
,
epsilon
,
flags
);
}
else
{
this
->
AcquireForwardPrimitiveDescriptor
(
dnnl
::
prop_kind
::
forward_inference
,
md
,
epsilon
,
flags
);
}
}
}
std
::
shared_ptr
<
dnnl
::
memory
>
AcquireScaleShiftMemory
()
{
return
this
->
AcquireMemoryFromPrimitive
(
"@scaleshift_mem_p"
);
}
std
::
shared_ptr
<
dnnl
::
memory
>
AcquireScaleShiftMemory
(
std
::
vector
<
float
>&
scaleshift_data
)
{
// scaleshift_data comes from temporary buffer so we need to copy it into
// created memory primitivie
auto
scaleshift_mem
=
this
->
AcquireMemoryFromPrimitive
(
this
->
fwd_pd_
->
weights_desc
(),
"@scaleshift_mem_p"
);
auto
data_ptr
=
scaleshift_mem
->
get_data_handle
();
std
::
size_t
num_bytes
=
scaleshift_data
.
size
()
*
sizeof
(
float
);
std
::
memcpy
(
data_ptr
,
scaleshift_data
.
data
(),
num_bytes
);
return
scaleshift_mem
;
}
std
::
shared_ptr
<
dnnl
::
memory
>
AcquireMeanMemory
(
framework
::
Tensor
*
mean
)
{
T
*
mean_data
=
mean
->
mutable_data
<
T
>
(
this
->
place_
,
this
->
fwd_pd_
->
mean_desc
().
get_size
());
return
this
->
AcquireMemoryFromPrimitive
(
this
->
fwd_pd_
->
mean_desc
(),
mean_data
,
"@mean_mem_p"
);
}
std
::
shared_ptr
<
dnnl
::
memory
>
AcquireVarianceMemory
(
framework
::
Tensor
*
variance
)
{
T
*
variance_data
=
variance
->
mutable_data
<
T
>
(
this
->
place_
,
this
->
fwd_pd_
->
variance_desc
().
get_size
());
return
this
->
AcquireMemoryFromPrimitive
(
this
->
fwd_pd_
->
variance_desc
(),
variance_data
,
"@variance_mem_p"
);
}
};
template
<
typename
T
>
class
LayerNormMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
const
auto
begin_norm_axis
=
ctx
.
Attr
<
int
>
(
"begin_norm_axis"
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
src_tz
=
paddle
::
framework
::
vectorize
(
x
->
dims
());
PADDLE_ENFORCE_EQ
(
begin_norm_axis
,
(
src_tz
.
size
()
-
1
),
platform
::
errors
::
InvalidArgument
(
"MKL-DNN Layer Norm supports only last logical "
"axis:%d as begin_norm_axis."
,
(
src_tz
.
size
()
-
1
)));
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
bool
with_scaleshift
=
(
scale
&&
bias
);
dnnl
::
normalization_flags
flags
{};
if
(
with_scaleshift
)
{
flags
|=
dnnl
::
normalization_flags
::
use_scale_shift
;
}
LayerNormMKLDNNHandler
<
T
>
handler
(
src_tz
,
epsilon
,
flags
,
is_test
,
x
->
format
(),
dev_ctx
,
ctx
.
GetPlace
(),
ctx
.
OutputName
(
"Y"
));
auto
src_memory
=
handler
.
AcquireSrcMemory
(
x
);
auto
dst_memory
=
handler
.
AcquireDstMemory
(
y
);
auto
layer_norm_p
=
handler
.
AcquireForwardPrimitive
();
dnnl
::
stream
astream
(
dev_ctx
.
GetEngine
());
std
::
unordered_map
<
int
,
dnnl
::
memory
>
args
;
args
.
insert
({
DNNL_ARG_SRC
,
*
src_memory
});
args
.
insert
({
DNNL_ARG_DST
,
*
dst_memory
});
if
(
!
is_test
)
{
auto
*
mean
=
ctx
.
Output
<
Tensor
>
(
"Mean"
);
auto
*
var
=
ctx
.
Output
<
Tensor
>
(
"Variance"
);
mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
var
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
mean_memory
=
handler
.
AcquireMeanMemory
(
mean
);
auto
variance_memory
=
handler
.
AcquireVarianceMemory
(
var
);
args
.
insert
({
DNNL_ARG_MEAN
,
*
mean_memory
});
args
.
insert
({
DNNL_ARG_VARIANCE
,
*
variance_memory
});
}
auto
scaleshift_memory
=
handler
.
AcquireScaleShiftMemory
();
if
(
with_scaleshift
)
{
if
(
scaleshift_memory
==
nullptr
||
!
is_test
)
{
auto
scale_tz
=
paddle
::
framework
::
vectorize
(
scale
->
dims
());
const
unsigned
int
C
=
scale_tz
[
0
];
// MKLDNN requires a single piece of memory for scale and shift/bias
// data
std
::
vector
<
float
>
scaleshift_data
;
scaleshift_data
.
reserve
(
2
*
C
);
scaleshift_data
.
insert
(
scaleshift_data
.
begin
(),
scale
->
data
<
float
>
(),
scale
->
data
<
float
>
()
+
C
);
scaleshift_data
.
insert
(
scaleshift_data
.
end
(),
bias
->
data
<
float
>
(),
bias
->
data
<
float
>
()
+
C
);
scaleshift_memory
=
handler
.
AcquireScaleShiftMemory
(
scaleshift_data
);
}
args
.
insert
({
DNNL_ARG_SCALE_SHIFT
,
*
scaleshift_memory
});
}
layer_norm_p
->
execute
(
astream
,
args
);
astream
.
wait
();
y
->
set_layout
(
DataLayout
::
kMKLDNN
);
y
->
set_format
(
platform
::
GetMKLDNNFormat
(
*
dst_memory
));
}
};
}
// namespace operators
}
// namespace paddle
// TODO(jczaja): Enable FP32 when performance is good
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
layer_norm
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
LayerNormMKLDNNOpKernel
<
paddle
::
platform
::
bfloat16
>
);
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
6d8d3d4c
...
...
@@ -190,6 +190,12 @@ class MKLDNNHandlerT {
}
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemoryFromPrimitive
(
const
std
::
string
&
suffix
)
{
return
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
key_
+
suffix
));
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemoryFromPrimitive
(
mkldnn
::
memory
::
desc
md
,
void
*
ptr
,
const
std
::
string
&
suffix
)
{
const
auto
local_key
=
key_
+
suffix
;
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py
0 → 100644
浏览文件 @
6d8d3d4c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# from paddle.fluid.tests.unittests.test_layer_norm_op import *
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
operator
import
mul
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
paddle
import
enable_static
from
functools
import
reduce
from
paddle.fluid.tests.unittests.mkldnn.test_layer_norm_mkldnn_op
import
TestLayerNormMKLDNNOp
from
paddle.fluid.tests.unittests.mkldnn.test_layer_norm_mkldnn_op
import
_reference_layer_norm_naive
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
convert_float_to_uint16
from
paddle.fluid.tests.unittests.op_test
import
_set_use_system_allocator
np
.
random
.
random
(
123
)
_set_use_system_allocator
(
True
)
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLayerNormBF16MKLDNNOp
(
TestLayerNormMKLDNNOp
):
def
__assert_close
(
self
,
tensor
,
np_array
,
msg
,
rtol
=
2e-02
,
atol
=
2
):
self
.
assertTrue
(
np
.
allclose
(
np
.
array
(
tensor
),
np_array
,
rtol
=
rtol
,
atol
=
atol
),
msg
)
def
check_forward
(
self
,
shape
,
begin_norm_axis
,
with_scale_bias
=
True
,
with_is_test
=
False
):
# attr
epsilon
=
0.00001
x_shape
=
shape
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x_shape
)],
1
)
scale_shape
=
[
D
]
np
.
random
.
seed
(
123
)
x
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
x_bf16
=
convert_float_to_uint16
(
x
)
if
with_scale_bias
:
scale
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
else
:
scale
=
np
.
array
([])
bias
=
np
.
array
([])
# reference forward & backward
y
,
mean
,
variance
=
_reference_layer_norm_naive
(
x
,
scale
,
bias
,
epsilon
,
begin_norm_axis
)
y_bf16
=
convert_float_to_uint16
(
y
)
var_dict
=
locals
()
var_names
=
[
'x_bf16'
,
'mean'
,
'variance'
,
'y_bf16'
]
if
with_scale_bias
:
var_names
.
append
(
'scale'
)
var_names
.
append
(
'bias'
)
ground_truth
=
{
name
:
var_dict
[
name
]
for
name
in
var_names
}
program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
program
):
block
=
program
.
global_block
()
# scale and bias are fp32 and other vars are of bf16
for
name
in
ground_truth
:
if
name
==
'x_bf16'
or
name
==
'y_bf16'
:
block
.
create_var
(
name
=
name
,
dtype
=
'uint16'
,
shape
=
ground_truth
[
name
].
shape
)
else
:
block
.
create_var
(
name
=
name
,
dtype
=
'float32'
,
shape
=
ground_truth
[
name
].
shape
)
inputs
=
{
"X"
:
block
.
var
(
'x_bf16'
)}
if
with_scale_bias
:
inputs
[
"Scale"
]
=
block
.
var
(
'scale'
)
inputs
[
"Bias"
]
=
block
.
var
(
'bias'
)
block
.
append_op
(
type
=
"layer_norm"
,
inputs
=
inputs
,
outputs
=
{
"Y"
:
block
.
var
(
'y_bf16'
),
"Mean"
:
block
.
var
(
'mean'
),
# share the same memory
"Variance"
:
block
.
var
(
'variance'
),
# share the same memory
},
attrs
=
{
"epsilon"
:
epsilon
,
"begin_norm_axis"
:
begin_norm_axis
,
"use_mkldnn"
:
True
,
"is_test"
:
with_is_test
})
exe
=
fluid
.
Executor
(
core
.
CPUPlace
())
input_list
=
[
'x_bf16'
]
if
with_scale_bias
:
input_list
.
append
(
'scale'
)
input_list
.
append
(
'bias'
)
out
=
exe
.
run
(
program
,
feed
=
{
name
:
var_dict
[
name
]
for
name
in
input_list
},
fetch_list
=
[
'y_bf16'
,
'mean'
,
'variance'
])
self
.
__assert_close
(
y_bf16
,
out
[
0
],
"y_bf16"
,
2
)
if
not
with_is_test
:
self
.
__assert_close
(
mean
,
out
[
1
],
"mean"
)
self
.
__assert_close
(
variance
,
out
[
2
],
"variance"
,
1e-3
)
def
test_check_forward_with_is_test
(
self
):
self
.
check_forward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
,
with_is_test
=
True
)
# TODO (jczaja): Enable those to test when enabling training using bf16
def
test_check_forward_with_scale_and_bias
(
self
):
pass
def
test_check_forward_without_scale_and_bias
(
self
):
pass
if
__name__
==
"__main__"
:
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py
0 → 100644
浏览文件 @
6d8d3d4c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# from paddle.fluid.tests.unittests.test_layer_norm_op import *
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
operator
import
mul
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
paddle
import
enable_static
from
functools
import
reduce
from
paddle.fluid.tests.unittests.op_test
import
_set_use_system_allocator
np
.
random
.
random
(
123
)
_set_use_system_allocator
(
True
)
def
_reference_layer_norm_naive
(
x
,
scale
,
beta
,
epsilon
,
begin_norm_axis
=
1
):
x_shape
=
x
.
shape
N
=
reduce
(
mul
,
x_shape
[
0
:
begin_norm_axis
],
1
)
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x_shape
)],
1
)
x
.
shape
=
[
N
,
D
]
if
scale
.
size
==
0
and
beta
.
size
==
0
:
scale
=
np
.
ones
([
1
,
D
])
beta
=
np
.
zeros
([
1
,
D
])
else
:
scale
=
scale
.
reshape
([
1
,
D
])
beta
=
beta
.
reshape
([
1
,
D
])
mean
=
np
.
mean
(
x
,
axis
=
1
)
var
=
np
.
var
(
x
,
axis
=
1
)
+
epsilon
output
=
scale
*
np
.
divide
((
x
-
mean
.
reshape
([
N
,
1
])),
(
np
.
sqrt
(
var
)).
reshape
([
N
,
1
]))
+
beta
x
.
shape
,
output
.
shape
=
x_shape
,
x_shape
return
output
,
mean
,
var
class
TestLayerNormMKLDNNOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
use_mkldnn
=
True
def
__assert_close
(
self
,
tensor
,
np_array
,
msg
,
atol
=
1e-4
):
self
.
assertTrue
(
np
.
allclose
(
np
.
array
(
tensor
),
np_array
,
atol
=
atol
),
msg
)
def
check_forward
(
self
,
shape
,
begin_norm_axis
,
with_scale_bias
=
True
,
with_is_test
=
False
):
# attr
epsilon
=
0.00001
x_shape
=
shape
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x_shape
)],
1
)
scale_shape
=
[
D
]
np
.
random
.
seed
(
123
)
x
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
if
with_scale_bias
:
scale
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
else
:
scale
=
np
.
array
([])
bias
=
np
.
array
([])
# reference forward & backward
y
,
mean
,
variance
=
_reference_layer_norm_naive
(
x
,
scale
,
bias
,
epsilon
,
begin_norm_axis
)
var_dict
=
locals
()
var_names
=
[
'x'
,
'mean'
,
'variance'
,
'y'
]
if
with_scale_bias
:
var_names
.
append
(
'scale'
)
var_names
.
append
(
'bias'
)
ground_truth
=
{
name
:
var_dict
[
name
]
for
name
in
var_names
}
program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
program
):
block
=
program
.
global_block
()
for
name
in
ground_truth
:
block
.
create_var
(
name
=
name
,
dtype
=
'float32'
,
shape
=
ground_truth
[
name
].
shape
)
inputs
=
{
"X"
:
block
.
var
(
'x'
)}
if
with_scale_bias
:
inputs
[
"Scale"
]
=
block
.
var
(
'scale'
)
inputs
[
"Bias"
]
=
block
.
var
(
'bias'
)
block
.
append_op
(
type
=
"layer_norm"
,
inputs
=
inputs
,
outputs
=
{
"Y"
:
block
.
var
(
'y'
),
"Mean"
:
block
.
var
(
'mean'
),
# share the same memory
"Variance"
:
block
.
var
(
'variance'
),
# share the same memory
},
attrs
=
{
"epsilon"
:
epsilon
,
"begin_norm_axis"
:
begin_norm_axis
,
"use_mkldnn"
:
True
,
"is_test"
:
with_is_test
})
exe
=
fluid
.
Executor
(
core
.
CPUPlace
())
input_list
=
[
'x'
]
if
with_scale_bias
:
input_list
.
append
(
'scale'
)
input_list
.
append
(
'bias'
)
out
=
exe
.
run
(
program
,
feed
=
{
name
:
var_dict
[
name
]
for
name
in
input_list
},
fetch_list
=
[
'y'
,
'mean'
,
'variance'
])
self
.
__assert_close
(
y
,
out
[
0
],
"y"
)
if
not
with_is_test
:
self
.
__assert_close
(
mean
,
out
[
1
],
"mean"
)
self
.
__assert_close
(
variance
,
out
[
2
],
"variance"
,
1e-3
)
def
test_check_forward_with_scale_and_bias
(
self
):
self
.
check_forward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
)
def
test_check_forward_without_scale_and_bias
(
self
):
self
.
check_forward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
,
with_scale_bias
=
False
)
def
test_check_forward_with_is_test
(
self
):
self
.
check_forward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
,
with_is_test
=
True
)
if
__name__
==
"__main__"
:
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py
浏览文件 @
6d8d3d4c
...
...
@@ -25,7 +25,7 @@ import paddle.fluid.op as fluid_op
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestSumMKLDNN
(
TestSumOp
):
class
TestSum
BF16
MKLDNN
(
TestSumOp
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
use_mkldnn
=
True
...
...
python/paddle/fluid/tests/unittests/test_layer_norm_op.py
浏览文件 @
6d8d3d4c
...
...
@@ -117,8 +117,12 @@ class TestLayerNormOp(unittest.TestCase):
begin_norm_axis
,
has_scale
=
True
,
has_bias
=
True
,
y_grad_scale
=
1.0
):
def
test_with_place
(
place
,
shape
,
begin_norm_axis
):
y_grad_scale
=
1.0
,
use_mkldnn
=
False
):
def
test_with_place
(
place
,
shape
,
begin_norm_axis
,
use_mkldnn
=
use_mkldnn
):
# attr
epsilon
=
0.00001
x_shape
=
shape
...
...
@@ -181,7 +185,8 @@ class TestLayerNormOp(unittest.TestCase):
},
attrs
=
{
"epsilon"
:
epsilon
,
"begin_norm_axis"
:
begin_norm_axis
"begin_norm_axis"
:
begin_norm_axis
,
"use_mkldnn"
:
use_mkldnn
})
# generate backward op_desc
grad_op_desc_list
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
...
...
tools/static_mode_white_list.py
浏览文件 @
6d8d3d4c
...
...
@@ -293,6 +293,8 @@ STATIC_MODE_TESTING_LIST = [
'test_label_smooth_op'
,
'test_lamb_op'
,
'test_layer_norm_op'
,
'test_layer_norm_mkldnn_op'
,
'test_layer_norm_bf16_mkldnn_op'
,
'test_layer_norm_op_v2'
,
'test_learning_rate_scheduler'
,
'test_linear_interp_op'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录