Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c5affb78
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c5affb78
编写于
2月 09, 2022
作者:
L
Leo Chen
提交者:
GitHub
2月 09, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[pten] fit pten for amp (#39403)
* fit pten for amp * fix typo
上级
db7d129e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
101 addition
and
76 deletion
+101
-76
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+93
-15
paddle/fluid/imperative/amp_auto_cast.h
paddle/fluid/imperative/amp_auto_cast.h
+6
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+2
-61
未找到文件。
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
c5affb78
...
@@ -25,6 +25,80 @@ namespace imperative {
...
@@ -25,6 +25,80 @@ namespace imperative {
class
VarBase
;
class
VarBase
;
// According to the input `place` and `dtype`, this function returns a tuple
// consists of three sets:
// 1) All operators registered in the Paddle framework.
// 2) All operators supported for `place` and `dtype`.
// 3) All operators unsupported for `place` and `dtype`.
// The input `place` is a type of string, which can only be `GPU` or `CPU`.
// The input `dtype` is a type of paddle::framework::proto::VarType::Type,
// which can be paddle::framework::proto::VarType::FP16,
// paddle::framework::proto::VarType::FP32 and so on.
std
::
tuple
<
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>>
OpSupportedInfos
(
const
std
::
string
&
place
,
framework
::
proto
::
VarType
::
Type
dtype
)
{
std
::
string
query_place
;
std
::
transform
(
place
.
begin
(),
place
.
end
(),
std
::
back_inserter
(
query_place
),
[](
unsigned
char
c
)
{
return
std
::
toupper
(
c
);
});
using
fn_type
=
std
::
add_pointer
<
bool
(
const
platform
::
Place
&
)
>::
type
;
std
::
unordered_map
<
std
::
string
,
fn_type
>
is_target_place
{
{
"GPU"
,
&
platform
::
is_gpu_place
},
{
"CPU"
,
&
platform
::
is_cpu_place
},
{
"XPU"
,
&
platform
::
is_xpu_place
},
{
"NPU"
,
&
platform
::
is_npu_place
},
{
"MLU"
,
&
platform
::
is_mlu_place
},
};
PADDLE_ENFORCE_NE
(
is_target_place
.
count
(
query_place
),
0
,
platform
::
errors
::
InvalidArgument
(
"The argument `place` should be 'GPU', 'CPU', 'XPU', "
"'NPU', 'MLU', but got '%s'."
,
place
));
std
::
unordered_set
<
std
::
string
>
all_ops
;
const
auto
&
op_info
=
framework
::
OpInfoMap
::
Instance
().
map
();
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
all_ops
.
emplace
(
it
->
first
);
}
std
::
unordered_set
<
std
::
string
>
supported_ops
;
auto
&
all_kernels
=
framework
::
OperatorWithKernel
::
AllOpKernels
();
for
(
auto
it
=
all_kernels
.
begin
();
it
!=
all_kernels
.
end
();
it
++
)
{
for
(
auto
&
kernel_type
:
it
->
second
)
{
if
(
is_target_place
[
query_place
](
kernel_type
.
first
.
place_
)
&&
kernel_type
.
first
.
data_type_
==
dtype
)
{
supported_ops
.
emplace
(
it
->
first
);
}
}
}
auto
pten_kernels
=
pten
::
KernelFactory
::
Instance
().
kernels
();
for
(
auto
&
kernel_pair
:
pten_kernels
)
{
auto
op_type
=
pten
::
TransToFluidOpName
(
kernel_pair
.
first
);
for
(
auto
&
info_pair
:
kernel_pair
.
second
)
{
framework
::
OpKernelType
kernel_type
=
framework
::
TransPtenKernelKeyToOpKernelType
(
info_pair
.
first
);
if
(
is_target_place
[
query_place
](
kernel_type
.
place_
)
&&
kernel_type
.
data_type_
==
dtype
&&
all_ops
.
count
(
op_type
))
{
VLOG
(
4
)
<<
op_type
<<
" "
<<
supported_ops
.
size
();
supported_ops
.
emplace
(
op_type
);
}
}
}
std
::
unordered_set
<
std
::
string
>
unsupported_ops
;
for
(
auto
&
op
:
all_ops
)
{
if
(
!
supported_ops
.
count
(
op
))
{
unsupported_ops
.
emplace
(
op
);
}
}
VLOG
(
4
)
<<
"-- The size of all_ops: "
<<
all_ops
.
size
()
<<
" --"
;
VLOG
(
4
)
<<
"-- The size of supported_ops: "
<<
supported_ops
.
size
()
<<
" --"
;
VLOG
(
4
)
<<
"-- The size of unsupported_ops: "
<<
unsupported_ops
.
size
()
<<
" --"
;
return
std
::
make_tuple
(
std
::
move
(
all_ops
),
std
::
move
(
supported_ops
),
std
::
move
(
unsupported_ops
));
}
AutoCastGuard
::
AutoCastGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
AmpLevel
level
)
AutoCastGuard
::
AutoCastGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
AmpLevel
level
)
:
tracer_
(
tracer
)
{
:
tracer_
(
tracer
)
{
pre_amp_level_
=
tracer_
->
GetAmpLevel
();
pre_amp_level_
=
tracer_
->
GetAmpLevel
();
...
@@ -40,21 +114,25 @@ AmpOperators::AmpOperators()
...
@@ -40,21 +114,25 @@ AmpOperators::AmpOperators()
:
allow_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
:
allow_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
block_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
block_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
unsupported_fp16_ops_
(
new
std
::
unordered_set
<
std
::
string
>
())
{
unsupported_fp16_ops_
(
new
std
::
unordered_set
<
std
::
string
>
())
{
auto
&
all_kernels
=
framework
::
OperatorWithKernel
::
AllOpKernels
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
fp16_dtype
=
framework
::
proto
::
VarType
::
FP16
;
auto
unsupported_ops_gpu
=
std
::
get
<
2
>
(
for
(
auto
it
=
all_kernels
.
begin
();
it
!=
all_kernels
.
end
();
it
++
)
{
OpSupportedInfos
(
"GPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
bool
supported
=
false
;
unsupported_fp16_ops_
->
insert
(
unsupported_ops_gpu
.
begin
(),
for
(
auto
&
kernel_type
:
it
->
second
)
{
unsupported_ops_gpu
.
end
());
if
((
platform
::
is_gpu_place
(
kernel_type
.
first
.
place_
)
||
// NOTE: GPU/NPU/XPU is compiled seperatly.
platform
::
is_xpu_place
(
kernel_type
.
first
.
place_
))
&&
#elif defined(PADDLE_WITH_ASCEND_CL)
kernel_type
.
first
.
data_type_
==
fp16_dtype
)
{
auto
unsupported_ops_npu
=
std
::
get
<
2
>
(
supported
=
true
;
OpSupportedInfos
(
"NPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
}
unsupported_fp16_ops_
->
insert
(
unsupported_ops_npu
.
begin
(),
}
unsupported_ops_npu
.
end
());
if
(
!
supported
)
{
#elif defined(PADDLE_WITH_XPU)
unsupported_fp16_ops_
->
insert
(
it
->
first
);
auto
unsupported_ops_xpu
=
std
::
get
<
2
>
(
}
OpSupportedInfos
(
"XPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
}
unsupported_fp16_ops_
->
insert
(
unsupported_ops_xpu
.
begin
(),
unsupported_ops_xpu
.
end
());
#endif
VLOG
(
4
)
<<
allow_ops_
->
size
()
<<
" "
<<
block_ops_
->
size
()
<<
" "
<<
unsupported_fp16_ops_
->
size
();
}
}
AmpOperators
::~
AmpOperators
()
{}
AmpOperators
::~
AmpOperators
()
{}
...
...
paddle/fluid/imperative/amp_auto_cast.h
浏览文件 @
c5affb78
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
#include <tuple>
#include <tuple>
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -32,6 +33,11 @@ enum class AmpLevel {
...
@@ -32,6 +33,11 @@ enum class AmpLevel {
O3
,
// fp16
O3
,
// fp16
};
};
std
::
tuple
<
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>>
OpSupportedInfos
(
const
std
::
string
&
place
,
framework
::
proto
::
VarType
::
Type
dtype
);
class
Tracer
;
class
Tracer
;
// Singleton implementation with C++ 11
// Singleton implementation with C++ 11
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
c5affb78
...
@@ -59,6 +59,7 @@ limitations under the License. */
...
@@ -59,6 +59,7 @@ limitations under the License. */
#include "paddle/fluid/framework/trainer.h"
#include "paddle/fluid/framework/trainer.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/amp_auto_cast.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
...
@@ -304,66 +305,6 @@ bool SupportsVNNI() {
...
@@ -304,66 +305,6 @@ bool SupportsVNNI() {
#endif
#endif
}
}
// According to the input `place` and `dtype`, this function returns a tuple
// consists of three sets:
// 1) All operators registered in the Paddle framework.
// 2) All operators supported for `place` and `dtype`.
// 3) All operators unsupported for `place` and `dtype`.
// The input `place` is a type of string, which can only be `GPU` or `CPU`.
// The input `dtype` is a type of paddle::framework::proto::VarType::Type,
// which can be paddle::framework::proto::VarType::FP16,
// paddle::framework::proto::VarType::FP32 and so on.
std
::
tuple
<
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>
,
std
::
unordered_set
<
std
::
string
>>
OpSupportedInfos
(
const
std
::
string
&
place
,
framework
::
proto
::
VarType
::
Type
dtype
)
{
std
::
string
query_place
;
std
::
transform
(
place
.
begin
(),
place
.
end
(),
std
::
back_inserter
(
query_place
),
[](
unsigned
char
c
)
{
return
std
::
toupper
(
c
);
});
using
fn_type
=
std
::
add_pointer
<
bool
(
const
platform
::
Place
&
)
>::
type
;
std
::
unordered_map
<
std
::
string
,
fn_type
>
is_target_place
{
{
"GPU"
,
&
platform
::
is_gpu_place
},
{
"CPU"
,
&
platform
::
is_cpu_place
},
{
"XPU"
,
&
platform
::
is_xpu_place
},
{
"NPU"
,
&
platform
::
is_npu_place
},
{
"MLU"
,
&
platform
::
is_mlu_place
},
};
PADDLE_ENFORCE_NE
(
is_target_place
.
count
(
query_place
),
0
,
platform
::
errors
::
InvalidArgument
(
"The argument `place` should be 'GPU' or 'CPU', but get '%s'."
,
place
));
std
::
unordered_set
<
std
::
string
>
all_ops
;
const
auto
&
op_info
=
framework
::
OpInfoMap
::
Instance
().
map
();
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
all_ops
.
emplace
(
it
->
first
);
}
std
::
unordered_set
<
std
::
string
>
supported_ops
;
auto
&
all_kernels
=
framework
::
OperatorWithKernel
::
AllOpKernels
();
for
(
auto
it
=
all_kernels
.
begin
();
it
!=
all_kernels
.
end
();
it
++
)
{
for
(
auto
&
kernel_type
:
it
->
second
)
{
if
(
is_target_place
[
query_place
](
kernel_type
.
first
.
place_
)
&&
kernel_type
.
first
.
data_type_
==
dtype
)
{
supported_ops
.
emplace
(
it
->
first
);
}
}
}
std
::
unordered_set
<
std
::
string
>
unsupported_ops
;
for
(
auto
&
op
:
all_ops
)
{
if
(
!
supported_ops
.
count
(
op
))
{
unsupported_ops
.
emplace
(
op
);
}
}
VLOG
(
4
)
<<
"-- The size of all_ops: "
<<
all_ops
.
size
()
<<
" --"
;
VLOG
(
4
)
<<
"-- The size of supported_ops: "
<<
supported_ops
.
size
()
<<
" --"
;
VLOG
(
4
)
<<
"-- The size of unsupported_ops: "
<<
unsupported_ops
.
size
()
<<
" --"
;
return
std
::
make_tuple
(
std
::
move
(
all_ops
),
std
::
move
(
supported_ops
),
std
::
move
(
unsupported_ops
));
}
bool
IsCompiledWithBrpc
()
{
bool
IsCompiledWithBrpc
()
{
#ifndef PADDLE_WITH_DISTRIBUTE
#ifndef PADDLE_WITH_DISTRIBUTE
return
false
;
return
false
;
...
@@ -2449,7 +2390,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -2449,7 +2390,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"supports_bfloat16_fast_performance"
,
SupportsBfloat16FastPerformance
);
m
.
def
(
"supports_bfloat16_fast_performance"
,
SupportsBfloat16FastPerformance
);
m
.
def
(
"supports_int8"
,
SupportsInt8
);
m
.
def
(
"supports_int8"
,
SupportsInt8
);
m
.
def
(
"supports_vnni"
,
SupportsVNNI
);
m
.
def
(
"supports_vnni"
,
SupportsVNNI
);
m
.
def
(
"op_supported_infos"
,
OpSupportedInfos
);
m
.
def
(
"op_supported_infos"
,
imperative
::
OpSupportedInfos
);
m
.
def
(
"is_compiled_with_brpc"
,
IsCompiledWithBrpc
);
m
.
def
(
"is_compiled_with_brpc"
,
IsCompiledWithBrpc
);
m
.
def
(
"is_compiled_with_dist"
,
IsCompiledWithDIST
);
m
.
def
(
"is_compiled_with_dist"
,
IsCompiledWithDIST
);
m
.
def
(
"_cuda_synchronize"
,
[](
const
platform
::
CUDAPlace
&
place
)
{
m
.
def
(
"_cuda_synchronize"
,
[](
const
platform
::
CUDAPlace
&
place
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录