Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a214e5dc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a214e5dc
编写于
1月 06, 2023
作者:
N
niuliling123
提交者:
GitHub
1月 06, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix inaccurate return of low precision op list (#49391)
上级
c7899074
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
92 addition
and
50 deletion
+92
-50
paddle/fluid/eager/amp_utils.h
paddle/fluid/eager/amp_utils.h
+0
-10
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+0
-11
paddle/fluid/imperative/amp_auto_cast.h
paddle/fluid/imperative/amp_auto_cast.h
+0
-7
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
paddle/phi/api/yaml/generator/api_base.py
paddle/phi/api/yaml/generator/api_base.py
+3
-0
paddle/phi/api/yaml/generator/api_gen.py
paddle/phi/api/yaml/generator/api_gen.py
+1
-0
paddle/phi/api/yaml/generator/backward_api_gen.py
paddle/phi/api/yaml/generator/backward_api_gen.py
+1
-0
paddle/phi/api/yaml/generator/intermediate_api_gen.py
paddle/phi/api/yaml/generator/intermediate_api_gen.py
+2
-0
paddle/phi/api/yaml/generator/sparse_api_gen.py
paddle/phi/api/yaml/generator/sparse_api_gen.py
+5
-0
paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
+2
-0
paddle/phi/api/yaml/generator/strings_api_gen.py
paddle/phi/api/yaml/generator/strings_api_gen.py
+5
-0
paddle/phi/core/flags.cc
paddle/phi/core/flags.cc
+10
-7
paddle/phi/core/kernel_factory.cc
paddle/phi/core/kernel_factory.cc
+25
-0
paddle/phi/core/kernel_factory.h
paddle/phi/core/kernel_factory.h
+10
-1
paddle/phi/tests/api/scale_api.h
paddle/phi/tests/api/scale_api.h
+9
-0
python/paddle/amp/auto_cast.py
python/paddle/amp/auto_cast.py
+17
-11
python/paddle/fluid/tests/unittests/test_low_precision_list.py
...n/paddle/fluid/tests/unittests/test_low_precision_list.py
+1
-2
未找到文件。
paddle/fluid/eager/amp_utils.h
浏览文件 @
a214e5dc
...
...
@@ -100,7 +100,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
op_name
))
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
return
paddle
::
experimental
::
DataType
::
FLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
...
...
@@ -118,8 +117,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
.
GetMutableUnsupportedFp16Ops
()
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
}
return
dst_type
;
}
...
...
@@ -132,8 +129,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
.
GetMutableBlockOps
()
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
}
return
dst_type
;
}
...
...
@@ -142,7 +137,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
op_name
))
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
return
paddle
::
experimental
::
DataType
::
BFLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
...
...
@@ -158,8 +152,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
.
GetMutableUnsupportedBf16Ops
()
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
}
return
dst_type
;
}
...
...
@@ -172,8 +164,6 @@ inline paddle::experimental::DataType GetAmpDestDtype(
.
GetMutableBlockOps
()
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
paddle
::
imperative
::
AmpOperators
::
Instance
().
AddToAmpOpList
(
op_name
);
}
return
dst_type
;
}
...
...
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
a214e5dc
...
...
@@ -22,7 +22,6 @@
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/var_helper.h"
DECLARE_bool
(
low_precision_op_list
);
namespace
paddle
{
namespace
imperative
{
...
...
@@ -194,16 +193,6 @@ AmpOperators::GetMutableUnsupportedBf16Ops() {
return
unsupported_bf16_ops_
;
}
void
AmpOperators
::
AddToAmpOpList
(
const
std
::
string
&
op_name
)
{
if
(
FLAGS_low_precision_op_list
)
{
current_amp_ops_
[
op_name
]
+=
1
;
}
}
std
::
map
<
const
std
::
string
,
int
>
AmpOperators
::
GetAmpOpList
()
{
return
current_amp_ops_
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
)
{
os
<<
"allow ops: "
;
auto
allow_ops
=
ops
.
GetMutableAllowOps
();
...
...
paddle/fluid/imperative/amp_auto_cast.h
浏览文件 @
a214e5dc
...
...
@@ -60,10 +60,6 @@ class AmpOperators {
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetMutableUnsupportedBf16Ops
();
void
AddToAmpOpList
(
const
std
::
string
&
op_name
);
std
::
map
<
const
std
::
string
,
int
>
GetAmpOpList
();
private:
AmpOperators
();
// forbid calling default constructor
...
...
@@ -80,9 +76,6 @@ class AmpOperators {
// The set of ops that has no bf16 CUDA kennel.
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
unsupported_bf16_ops_
;
// The amp op list of current module.
std
::
map
<
const
std
::
string
,
int
>
current_amp_ops_
;
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
a214e5dc
...
...
@@ -2546,7 +2546,7 @@ All parameter, weight, gradient are variables in Paddle.
[]
{
return
phi
::
autotune
::
AutoTuneStatus
::
Instance
().
Update
();
});
m
.
def
(
"get_low_precision_op_list"
,
[]
{
return
p
addle
::
imperative
::
AmpOperators
::
Instance
().
GetAmpOp
List
();
return
p
hi
::
KernelFactory
::
Instance
().
GetLowPrecisionKernel
List
();
});
m
.
def
(
"autotune_status"
,
[]
{
...
...
paddle/phi/api/yaml/generator/api_base.py
浏览文件 @
a214e5dc
...
...
@@ -1200,6 +1200,9 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
{
code_indent
}
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{
code_indent
}
"
{
kernel_name
}
", {{kernel_backend, kernel_layout, kernel_data_type}});
{
code_indent
}
const auto& kernel = kernel_result.kernel;
{
code_indent
}
if (FLAGS_low_precision_op_list) {{
{
code_indent
}
phi::KernelFactory::Instance().AddToLowPrecisionKernelList("
{
self
.
api
}
", kernel_data_type);
{
code_indent
}
}}
{
code_indent
}
VLOG(6) << "
{
kernel_name
}
kernel: " << kernel;
{
code_indent
}
auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);
{
input_tensors
}
...
...
paddle/phi/api/yaml/generator/api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -347,6 +347,7 @@ def source_include(header_file_path):
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
DECLARE_bool(conv2d_disable_cudnn);
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/api/yaml/generator/backward_api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -290,6 +290,7 @@ def source_include(header_file_path):
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
DECLARE_bool(conv2d_disable_cudnn);
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/api/yaml/generator/intermediate_api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -54,6 +54,8 @@ def source_include(header_file_path):
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/api/yaml/generator/sparse_api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -221,6 +221,9 @@ class SparseAPI(ForwardAPI):
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"
{
kernel_name
}
", {{kernel_backend, kernel_layout, kernel_data_type}});
const auto& phi_kernel = kernel_result.kernel;
if (FLAGS_low_precision_op_list) {{
phi::KernelFactory::Instance().AddToLowPrecisionKernelList("
{
self
.
api
}
", kernel_data_type);
}}
VLOG(6) << "
{
self
.
api
}
api sparse kernel: " << phi_kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);
...
...
@@ -324,6 +327,8 @@ def source_include(header_file_path):
#include "paddle/phi/infermeta/sparse/unary.h"
#include "paddle/phi/infermeta/sparse/binary.h"
#include "paddle/phi/infermeta/sparse/multiary.h"
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -134,6 +134,8 @@ def source_include(header_file_path):
#include "paddle/phi/infermeta/sparse/unary.h"
#include "paddle/phi/infermeta/sparse/binary.h"
#include "paddle/phi/infermeta/sparse/backward.h"
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/api/yaml/generator/strings_api_gen.py
浏览文件 @
a214e5dc
...
...
@@ -210,6 +210,9 @@ class StringsAPI(ForwardAPI):
VLOG(6) << "
{
self
.
api
}
api strings kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"
{
self
.
kernel
[
'func'
][
0
]
}
", {{kernel_backend, kernel_layout, kernel_data_type}});
if (FLAGS_low_precision_op_list) {{
phi::KernelFactory::Instance().AddToLowPrecisionKernelList("
{
self
.
api
}
", kernel_data_type);
}}
const auto& kernel = kernel_result.kernel;
VLOG(6) << "
{
self
.
api
}
api strings kernel: " << kernel;
...
...
@@ -334,6 +337,8 @@ def source_include(header_file_path):
#include "paddle/phi/api/lib/api_registry.h"
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/core/kernel_registry.h"
DECLARE_int32(low_precision_op_list);
"""
...
...
paddle/phi/core/flags.cc
浏览文件 @
a214e5dc
...
...
@@ -55,16 +55,19 @@ PADDLE_DEFINE_EXPORTED_int32(paddle_num_threads,
/**
* Low Precision Op related FLAG
* Name: FLAGS_low_precision_op_list
* Since Version:
0.13
.0
* Value Range:
bool, default=false
* Since Version:
2.5
.0
* Value Range:
int32, default=0
* Example:
* Note: Used to debug. Get the low precision op list of current module.
* FLAGS_check_nan_inf is set.
* - 1, return the low precision op list of current module.
* - 2, return the op list of current module.
*/
PADDLE_DEFINE_EXPORTED_
bool
(
low_precision_op_list
,
false
,
"Checking whether get the low precision op list of
"
"current module. It will b
e "
"rerun the low precision list after
module."
);
PADDLE_DEFINE_EXPORTED_
int32
(
low_precision_op_list
,
0
,
"Setting the level of low precision op
"
"list printing. It will be return th
e "
"low precision op list of current
module."
);
/**
* Operator related FLAG
...
...
paddle/phi/core/kernel_factory.cc
浏览文件 @
a214e5dc
...
...
@@ -23,6 +23,7 @@
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/utils/string/string_helper.h"
DECLARE_int32
(
low_precision_op_list
);
DECLARE_bool
(
enable_api_kernel_fallback
);
namespace
phi
{
...
...
@@ -106,9 +107,33 @@ bool KernelFactory::HasKernel(const std::string& kernel_name,
return
true
;
}
void
KernelFactory
::
AddToLowPrecisionKernelList
(
const
std
::
string
&
name
,
const
paddle
::
experimental
::
DataType
&
kernel_key_type
)
{
if
(
FLAGS_low_precision_op_list
>=
1
)
{
auto
op_name
=
phi
::
TransToFluidOpName
(
name
);
if
(
op_name
.
find
(
"_grad"
)
!=
std
::
string
::
npos
)
{
return
;
// only record forward api
}
bool
is_low_precision
=
(
kernel_key_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
||
kernel_key_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
);
bool
need_record
=
FLAGS_low_precision_op_list
==
1
?
is_low_precision
:
true
;
if
(
need_record
)
{
low_precision_kernels_
[
op_name
]
+=
1
;
}
}
}
std
::
map
<
const
std
::
string
,
int
>
KernelFactory
::
GetLowPrecisionKernelList
()
{
return
low_precision_kernels_
;
}
KernelResult
KernelFactory
::
SelectKernelOrThrowError
(
const
std
::
string
&
kernel_name
,
const
KernelKey
&
const_kernel_key
)
const
{
auto
iter
=
kernels_
.
find
(
kernel_name
);
PADDLE_ENFORCE_NE
(
iter
,
kernels_
.
end
(),
...
...
paddle/phi/core/kernel_factory.h
浏览文件 @
a214e5dc
...
...
@@ -14,12 +14,12 @@
#pragma once
#include <map>
#include <ostream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
...
...
@@ -305,10 +305,19 @@ class KernelFactory {
const
KernelArgsDef
&
GetFirstKernelArgsDef
(
const
std
::
string
&
kernel_name
)
const
;
void
AddToLowPrecisionKernelList
(
const
std
::
string
&
name
,
const
paddle
::
experimental
::
DataType
&
kernel_key_type
);
std
::
map
<
const
std
::
string
,
int
>
GetLowPrecisionKernelList
();
private:
KernelFactory
()
=
default
;
KernelNameMap
kernels_
;
// Get the low precision kernel list of current module.
std
::
map
<
const
std
::
string
,
int
>
low_precision_kernels_
;
};
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
KernelKey
&
kernel_key
)
{
...
...
paddle/phi/tests/api/scale_api.h
浏览文件 @
a214e5dc
...
...
@@ -25,6 +25,7 @@
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/scale_kernel.h"
DECLARE_int32
(
low_precision_op_list
);
namespace
paddle
{
namespace
experimental
{
...
...
@@ -54,6 +55,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
auto
kernel_result
=
phi
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
"scale"
,
{
kernel_backend
,
kernel_layout
,
kernel_data_type
});
const
auto
&
kernel
=
kernel_result
.
kernel
;
if
(
FLAGS_low_precision_op_list
)
{
phi
::
KernelFactory
::
Instance
().
AddToLowPrecisionKernelList
(
"scale"
,
kernel_data_type
);
}
VLOG
(
6
)
<<
"scale API kernel key: ["
<<
kernel_backend
<<
", "
<<
kernel_layout
<<
", "
<<
kernel_data_type
<<
"]"
;
VLOG
(
6
)
<<
"scale API kernel: "
<<
kernel
;
...
...
@@ -225,6 +230,10 @@ Tensor scale_switch_case(const Tensor& x,
auto
kernel_result
=
phi
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
"scale"
,
{
kernel_backend
,
kernel_layout
,
kernel_data_type
});
const
auto
&
kernel
=
kernel_result
.
kernel
;
if
(
FLAGS_low_precision_op_list
)
{
phi
::
KernelFactory
::
Instance
().
AddToLowPrecisionKernelList
(
"scale"
,
kernel_data_type
);
}
VLOG
(
6
)
<<
"scale API kernel key: ["
<<
kernel_backend
<<
", "
<<
kernel_layout
<<
", "
<<
kernel_data_type
<<
"]"
;
VLOG
(
6
)
<<
"scale API kernel: "
<<
kernel
;
...
...
python/paddle/amp/auto_cast.py
浏览文件 @
a214e5dc
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
copy
import
os
import
warnings
import
paddle
...
...
@@ -94,18 +95,23 @@ _g_amp_state_ = None
def
low_precision_op_list
():
op_list
=
paddle
.
fluid
.
core
.
get_low_precision_op_list
()
op_count
=
0
print
(
'<---------------- low precision op list ------------------->'
)
print
(
'<---- op name ------|------- op count---------------------->'
)
for
x
in
op_list
:
print
(
' %-18s| %4d'
%
(
x
,
op_list
[
x
]))
op_count
+=
1
print
(
'<------------- low precision op num:{:5d} ----------------->'
.
format
(
op_count
if
os
.
getenv
(
"FLAGS_low_precision_op_list"
)
is
not
None
:
level
=
int
(
os
.
getenv
(
"FLAGS_low_precision_op_list"
))
if
level
==
0
:
return
if
level
==
1
:
print
(
'<{:-^60}>'
.
format
(
" low precision op list "
))
else
:
print
(
'<{:-^60}>'
.
format
(
" op list "
))
op_list
=
paddle
.
fluid
.
core
.
get_low_precision_op_list
()
op_count
=
0
print
(
'<{:-^40}'
.
format
(
" op_name "
),
'|'
,
'{:-^17}>'
.
format
(
" op count "
)
)
)
for
x
in
op_list
:
print
(
' %-40s| %-15d'
%
(
x
,
op_list
[
x
]))
op_count
+=
1
print
(
'<{:-^60}>'
.
format
(
" op count: "
+
str
(
op_count
)
+
" "
))
def
amp_state
():
...
...
python/paddle/fluid/tests/unittests/test_low_precision_list.py
浏览文件 @
a214e5dc
...
...
@@ -25,12 +25,11 @@ class TestAMPList(unittest.TestCase):
b
=
paddle
.
rand
([
2
,
3
])
# amp list conv2d, cast
with
paddle
.
amp
.
auto_cast
():
with
paddle
.
amp
.
auto_cast
(
enable
=
True
,
level
=
'O2'
):
conv
=
conv2d
(
data
)
c
=
a
+
b
paddle
.
amp
.
low_precision_op_list
()
op_list
=
paddle
.
fluid
.
core
.
get_low_precision_op_list
()
print
(
conv
.
dtype
)
if
conv
.
dtype
==
paddle
.
float16
:
self
.
assertTrue
(
'elementwise_add'
in
op_list
)
self
.
assertTrue
(
'conv2d'
in
op_list
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录