Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a4a2b77d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a4a2b77d
编写于
3月 19, 2021
作者:
A
Adam Osewski
提交者:
GitHub
3月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[oneDNN] lookup_table op with support for BF16 data type. (#31558)
上级
c86e771e
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
213 addition
and
10 deletion
+213
-10
paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc
.../fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc
+2
-2
paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h
...e/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h
+1
-1
paddle/fluid/operators/lookup_table_op.cc
paddle/fluid/operators/lookup_table_op.cc
+5
-2
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+4
-2
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+11
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+13
-3
python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
...paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
+176
-0
tools/static_mode_white_list.py
tools/static_mode_white_list.py
+1
-0
未找到文件。
paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc
浏览文件 @
a4a2b77d
...
...
@@ -53,7 +53,7 @@ void CPUBfloat16PlacementPass::SetMkldnnDataType(
gpd
(
graph
,
handler
);
}
void
CPUBfloat16PlacementPass
::
RemoveOrhanedOperators
(
void
CPUBfloat16PlacementPass
::
RemoveOr
p
hanedOperators
(
ir
::
Graph
*
graph
,
int
*
bfloat16_operators
)
const
{
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
...
...
@@ -74,7 +74,7 @@ void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
void
CPUBfloat16PlacementPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
int
bfloat16_operators
=
0
;
SetMkldnnDataType
(
graph
,
&
bfloat16_operators
);
RemoveOrhanedOperators
(
graph
,
&
bfloat16_operators
);
RemoveOr
p
hanedOperators
(
graph
,
&
bfloat16_operators
);
PrettyLogDetail
(
"--- marked %d operators to bfloat16 "
,
bfloat16_operators
);
}
...
...
paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h
浏览文件 @
a4a2b77d
...
...
@@ -28,7 +28,7 @@ class CPUBfloat16PlacementPass : public Pass {
protected:
void
SetMkldnnDataType
(
ir
::
Graph
*
graph
,
int
*
bfloat16_operators
)
const
;
void
RemoveOrhanedOperators
(
ir
::
Graph
*
graph
,
int
*
bfloat16_operators
)
const
;
void
RemoveOr
p
hanedOperators
(
ir
::
Graph
*
graph
,
int
*
bfloat16_operators
)
const
;
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
...
...
paddle/fluid/operators/lookup_table_op.cc
浏览文件 @
a4a2b77d
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/platform/bfloat16.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -222,9 +223,11 @@ REGISTER_OPERATOR(lookup_table_grad, ops::LookupTableOpGrad,
REGISTER_OP_CPU_KERNEL
(
lookup_table
,
ops
::
LookupTableKernel
<
float
>
,
ops
::
LookupTableKernel
<
double
>
,
ops
::
LookupTableKernel
<
int8_t
>
);
ops
::
LookupTableKernel
<
int8_t
>
,
ops
::
LookupTableKernel
<
paddle
::
platform
::
bfloat16
>
);
REGISTER_OP_CPU_KERNEL
(
lookup_table_grad
,
ops
::
LookupTableGradKernel
<
float
>
,
ops
::
LookupTableGradKernel
<
double
>
);
ops
::
LookupTableGradKernel
<
double
>
,
ops
::
LookupTableGradKernel
<
paddle
::
platform
::
bfloat16
>
);
/* ========================== register checkpoint ===========================*/
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
a4a2b77d
...
...
@@ -102,7 +102,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
auto
id_index
=
table_t
.
GetIndexFromId
(
ids
[
i
]);
if
(
id_index
!=
-
1
)
{
if
(
input_data_type
==
framework
::
proto
::
VarType
::
INT8
)
{
if
(
input_data_type
==
framework
::
proto
::
VarType
::
INT8
||
input_data_type
==
framework
::
proto
::
VarType
::
BF16
)
{
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
row_width
*
sizeof
(
T
));
}
else
{
...
...
@@ -128,7 +129,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
"the input key should be exists. But received %d."
,
id_index
));
if
(
input_data_type
==
framework
::
proto
::
VarType
::
INT8
)
{
if
(
input_data_type
==
framework
::
proto
::
VarType
::
INT8
||
input_data_type
==
framework
::
proto
::
VarType
::
BF16
)
{
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
row_width
*
sizeof
(
T
));
}
else
{
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
a4a2b77d
...
...
@@ -21,6 +21,7 @@
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
...
...
@@ -40,6 +41,16 @@ struct CBlas<int8_t> {
}
};
template
<
>
struct
CBlas
<
platform
::
bfloat16
>
{
template
<
typename
...
ARGS
>
static
void
VCOPY
(
ARGS
...
args
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Blas VCOPY do not supported on CPU with bfloat16,"
" please check your code"
));
}
};
#ifdef PADDLE_WITH_MKLML
template
<
>
struct
CBlas
<
float
>
{
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
a4a2b77d
...
...
@@ -33,10 +33,19 @@ from paddle.fluid.backward import append_backward
from
paddle.fluid.op
import
Operator
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.framework
import
Program
,
OpProtoHolder
,
Variable
from
testsuite
import
create_op
,
set_input
,
append_input_output
,
append_loss_ops
from
paddle.fluid.tests.unittests.testsuite
import
(
create_op
,
set_input
,
append_input_output
,
append_loss_ops
,
)
from
paddle.fluid
import
unique_name
from
white_list
import
op_accuracy_white_list
,
check_shape_white_list
,
compile_vs_runtime_white_list
,
no_check_set_white_list
from
white_list
import
op_threshold_white_list
,
no_grad_set_white_list
from
paddle.fluid.tests.unittests.white_list
import
(
op_accuracy_white_list
,
check_shape_white_list
,
compile_vs_runtime_white_list
,
no_check_set_white_list
,
op_threshold_white_list
,
no_grad_set_white_list
,
)
def
check_out_dtype
(
api_fn
,
in_specs
,
expect_dtypes
,
target_index
=
0
,
**
configs
):
...
...
@@ -1452,6 +1461,7 @@ class OpTest(unittest.TestCase):
analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place
,
output_names
,
no_grad_set
,
user_defined_grad_outputs
)
# comparison of bf16 results will happen as fp32
# loop over list of grads and convert bf16 to fp32
fp32_grads
=
[]
...
...
python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
0 → 100644
浏览文件 @
a4a2b77d
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
(
OpTest
,
convert_float_to_uint16
,
convert_uint16_to_float
,
skip_check_grad_ci
)
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
paddle
import
enable_static
def
_lookup
(
weights
,
ids
,
flat_ids
):
w_shape
=
weights
.
shape
out_shape
=
list
(
ids
.
shape
[:
-
1
])
out_shape
.
append
(
w_shape
[
-
1
])
out
=
weights
[
flat_ids
].
reshape
(
out_shape
)
return
out
def
_get_grad
(
weights
,
ids
,
flat_ids
):
w_shape
=
weights
.
shape
w_grad
=
np
.
zeros
((
w_shape
),
dtype
=
weights
.
dtype
)
out_grad_shape
=
(
np
.
prod
(
ids
.
shape
[:
-
1
]),
w_shape
[
-
1
])
out_grad
=
weights
[
flat_ids
].
reshape
(
out_grad_shape
)
for
i
,
idx
in
enumerate
(
flat_ids
):
w_grad
[
idx
,
:]
+=
out_grad
[
i
]
return
w_grad
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16Op
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"lookup_table"
self
.
dtype
=
np
.
uint16
table
=
np
.
random
.
random
((
17
,
31
)).
astype
(
"float32"
)
self
.
ids
=
np
.
random
.
randint
(
0
,
17
,
(
4
,
1
)).
astype
(
"int64"
)
self
.
flat_ids
=
self
.
ids
.
flatten
()
self
.
w_bf16
=
convert_float_to_uint16
(
table
)
self
.
out_bf16
=
_lookup
(
self
.
w_bf16
,
self
.
ids
,
self
.
flat_ids
)
self
.
out_fp32
=
_lookup
(
table
,
self
.
ids
,
self
.
flat_ids
)
self
.
w_grad_fp32
=
_get_grad
(
table
,
self
.
ids
,
self
.
flat_ids
)
self
.
inputs
=
{
'W'
:
self
.
w_bf16
,
'Ids'
:
self
.
ids
}
self
.
outputs
=
{
'Out'
:
self
.
out_fp32
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
core
.
CPUPlace
(),
check_dygraph
=
False
)
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
core
.
CPUPlace
(),
[
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
),
check_dygraph
=
False
,
max_relative_error
=
1.5e-2
,
user_defined_grads
=
[
self
.
w_grad_fp32
],
user_defined_grad_outputs
=
[
self
.
out_bf16
])
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16OpIds4D
(
TestLookupTableBF16Op
):
def
setUp
(
self
):
super
(
TestLookupTableBF16OpIds4D
,
self
).
setUp
()
self
.
ids
=
np
.
random
.
randint
(
0
,
17
,
(
2
,
4
,
5
,
1
)).
astype
(
"int64"
)
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16OpWIsSelectedRows
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
ids
=
np
.
random
.
randint
(
low
=
0
,
high
=
15
,
size
=
(
10
,
1
)).
astype
(
"int64"
)
self
.
flat_ids
=
self
.
ids
.
flatten
()
self
.
w_fp32
=
np
.
random
.
random
((
15
,
32
)).
astype
(
"float32"
)
self
.
w_bf16
=
convert_float_to_uint16
(
self
.
w_fp32
)
self
.
scope
=
core
.
Scope
()
self
.
place
=
core
.
CPUPlace
()
def
prepare_w
(
self
):
rows
=
[
a
for
a
in
range
(
self
.
w_bf16
.
shape
[
0
])]
row_numel
=
self
.
w_bf16
.
shape
[
1
]
w_selected_rows
=
self
.
scope
.
var
(
'W'
).
get_selected_rows
()
w_selected_rows
.
set_height
(
len
(
rows
))
w_selected_rows
.
set_rows
(
rows
)
w_tensor
=
w_selected_rows
.
get_tensor
()
w_tensor
.
set
(
self
.
w_bf16
,
self
.
place
)
def
prepare_ids
(
self
):
ids_tensor
=
self
.
scope
.
var
(
'Ids'
).
get_tensor
()
ids_tensor
.
set
(
self
.
ids
,
self
.
place
)
def
_check_output
(
self
,
reference
,
result_array
):
result_array_fp32
=
convert_uint16_to_float
(
result_array
)
np
.
testing
.
assert_allclose
(
result_array_fp32
,
reference
,
rtol
=
1.5e-2
)
def
test_check_output
(
self
):
self
.
prepare_ids
()
self
.
prepare_w
()
out_tensor
=
self
.
scope
.
var
(
'Out'
).
get_tensor
()
# create and run lookup_table operator
lookup_table
=
Operator
(
"lookup_table"
,
W
=
'W'
,
Ids
=
'Ids'
,
Out
=
'Out'
)
lookup_table
.
run
(
self
.
scope
,
self
.
place
)
# get result from Out
result_array
=
np
.
array
(
out_tensor
)
ref
=
_lookup
(
self
.
w_fp32
,
self
.
ids
,
self
.
flat_ids
)
self
.
_check_output
(
ref
,
result_array
)
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16OpWIsSelectedRows4DIds
(
TestLookupTableBF16OpWIsSelectedRows
):
def
setUp
(
self
):
super
(
TestLookupTableBF16OpWIsSelectedRows4DIds
,
self
).
setUp
()
self
.
ids
=
np
.
random
.
randint
(
low
=
0
,
high
=
15
,
size
=
(
3
,
4
,
5
,
1
)).
astype
(
"int64"
)
self
.
flat_ids
=
self
.
ids
.
flatten
()
@
skip_check_grad_ci
(
reason
=
"Since paddings are not trainable and fixed in forward,"
"the gradient of paddings makes no sense and we don't "
"test the gradient here."
)
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16OpWithPadding
(
TestLookupTableBF16Op
):
def
test_check_output
(
self
):
ids
=
np
.
squeeze
(
self
.
inputs
[
'Ids'
])
padding_idx
=
np
.
random
.
choice
(
ids
,
1
)[
0
]
self
.
outputs
[
'Out'
][
ids
==
padding_idx
]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output_with_place
(
core
.
CPUPlace
(),
check_dygraph
=
False
)
@
skip_check_grad_ci
(
reason
=
"Since paddings are not trainable and fixed in forward,"
"the gradient of paddings makes no sense and we don't "
"test the gradient here."
)
@
unittest
.
skipIf
(
not
core
.
supports_bfloat16
(),
"place does not support BF16 evaluation"
)
class
TestLookupTableBF16OpIds4DPadding
(
TestLookupTableBF16OpIds4D
):
def
test_check_output
(
self
):
ids
=
self
.
inputs
[
'Ids'
]
flatten_idx
=
ids
.
flatten
()
padding_idx
=
np
.
random
.
choice
(
flatten_idx
,
1
)[
0
]
self
.
outputs
[
'Out'
][
np
.
squeeze
(
ids
==
padding_idx
)]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output_with_place
(
core
.
CPUPlace
(),
check_dygraph
=
False
)
if
__name__
==
"__main__"
:
enable_static
()
unittest
.
main
()
tools/static_mode_white_list.py
浏览文件 @
a4a2b77d
...
...
@@ -21,6 +21,7 @@ STATIC_MODE_TESTING_LIST = [
'test_linear_chain_crf_op'
,
'test_lod_reset_op'
,
'test_lookup_table_op'
,
'test_lookup_table_bf16_op'
,
'test_pad2d_op'
,
'test_scatter_op'
,
'test_sequence_concat'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录