Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0d45ac73
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d45ac73
编写于
5月 11, 2023
作者:
张
张春乔
提交者:
GitHub
5月 11, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
昇腾和寒武纪相关代码退场 npu相关代码退场2 (#53568)
上级
00ded2ea
变更
37
隐藏空白更改
内联
并排
Showing
37 changed file
with
55 addition
and
411 deletion
+55
-411
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+4
-6
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+0
-5
paddle/fluid/framework/executor_cache.cc
paddle/fluid/framework/executor_cache.cc
+0
-4
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
...fluid/framework/new_executor/interpreter/data_transfer.cc
+2
-2
paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
...uid/framework/new_executor/interpreter/stream_analyzer.cc
+1
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-2
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+7
-7
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+0
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+0
-26
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-5
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+3
-3
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+0
-16
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+0
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+0
-20
paddle/fluid/inference/capi_exp/pd_config.cc
paddle/fluid/inference/capi_exp/pd_config.cc
+0
-9
paddle/fluid/inference/capi_exp/pd_config.h
paddle/fluid/inference/capi_exp/pd_config.h
+0
-16
paddle/fluid/inference/goapi/config.go
paddle/fluid/inference/goapi/config.go
+0
-18
paddle/fluid/platform/monitor.h
paddle/fluid/platform/monitor.h
+0
-10
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+0
-6
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+0
-3
paddle/fluid/pybind/place.cc
paddle/fluid/pybind/place.cc
+0
-1
paddle/phi/backends/cpu/cpu_info.cc
paddle/phi/backends/cpu/cpu_info.cc
+0
-17
paddle/phi/backends/cpu/cpu_info.h
paddle/phi/backends/cpu/cpu_info.h
+0
-9
paddle/phi/common/place.cc
paddle/phi/common/place.cc
+0
-3
paddle/phi/common/place.h
paddle/phi/common/place.h
+0
-10
paddle/phi/kernels/funcs/math_function.cc
paddle/phi/kernels/funcs/math_function.cc
+0
-6
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
+1
-6
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+1
-7
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+6
-16
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-17
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+5
-11
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+0
-15
python/paddle/nn/functional/loss.py
python/paddle/nn/functional/loss.py
+13
-81
python/paddle/static/amp/decorator.py
python/paddle/static/amp/decorator.py
+6
-33
python/paddle/static/amp/fp16_lists.py
python/paddle/static/amp/fp16_lists.py
+0
-2
python/paddle/static/nn/common.py
python/paddle/static/nn/common.py
+0
-7
tools/timeline.py
tools/timeline.py
+0
-6
未找到文件。
paddle/fluid/distributed/fleet_executor/message_bus.cc
浏览文件 @
0d45ac73
...
@@ -111,8 +111,7 @@ bool MessageBus::Send(int64_t dst_rank,
...
@@ -111,8 +111,7 @@ bool MessageBus::Send(int64_t dst_rank,
#else
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Fleet executor does not support sending message between different "
"Fleet executor does not support sending message between different "
"ranks when Paddle is compiled with npu or "
"ranks when Paddle isn't compiled with distributed for now."
));
"isn't compiled with distributed for now."
));
#endif
#endif
return
true
;
return
true
;
}
}
...
@@ -202,10 +201,9 @@ void MessageBus::ListenPort() {
...
@@ -202,10 +201,9 @@ void MessageBus::ListenPort() {
}
}
LOG
(
INFO
)
<<
"Message bus's listen port thread starts successful."
;
LOG
(
INFO
)
<<
"Message bus's listen port thread starts successful."
;
#else
#else
LOG
(
WARNING
)
LOG
(
WARNING
)
<<
"Fleet executor's ListenPort() is a fake function when "
<<
"Fleet executor's ListenPort() is a fake function when Paddle is "
"Paddle isn't compiled "
"compiled with npu or Paddle isn't compiled "
"with distributed for now."
;
"with distributed for now."
;
#endif
#endif
}
}
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
0d45ac73
...
@@ -89,11 +89,6 @@ struct DLDeviceVisitor
...
@@ -89,11 +89,6 @@ struct DLDeviceVisitor
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
}
}
inline
::
DLDevice
operator
()(
const
platform
::
NPUPinnedPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::NPUPinnedPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
CustomPlace
&
place
)
const
{
inline
::
DLDevice
operator
()(
const
platform
::
CustomPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::CustomPlace is not supported"
));
"platform::CustomPlace is not supported"
));
...
...
paddle/fluid/framework/executor_cache.cc
浏览文件 @
0d45ac73
...
@@ -50,10 +50,6 @@ static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) {
...
@@ -50,10 +50,6 @@ static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) {
execution_strategy
.
num_threads_
=
1
;
execution_strategy
.
num_threads_
=
1
;
break
;
break
;
}
}
case
platform
::
DeviceType
::
NPU
:
{
execution_strategy
.
num_threads_
=
1
;
break
;
}
case
platform
::
DeviceType
::
CUSTOM_DEVICE
:
{
case
platform
::
DeviceType
::
CUSTOM_DEVICE
:
{
execution_strategy
.
num_threads_
=
1
;
execution_strategy
.
num_threads_
=
1
;
break
;
break
;
...
...
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
浏览文件 @
0d45ac73
...
@@ -196,7 +196,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
...
@@ -196,7 +196,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
?
OpFuncType
::
kGpuSync
?
OpFuncType
::
kGpuSync
:
OpFuncType
::
kGpuAsync
;
:
OpFuncType
::
kGpuAsync
;
}
else
{
}
else
{
// Memcpy in
npu and
custom devices is asynchronous
// Memcpy in custom devices is asynchronous
new_op_func_node
.
type_
=
OpFuncType
::
kGpuAsync
;
new_op_func_node
.
type_
=
OpFuncType
::
kGpuAsync
;
}
}
...
@@ -225,7 +225,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
...
@@ -225,7 +225,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
}
}
}
}
// NOTE(winter-wang): in
npu and
custom device, D2H kernel is asynchronous.
// NOTE(winter-wang): in custom device, D2H kernel is asynchronous.
// need to explicit synchronization.
// need to explicit synchronization.
if
((
platform
::
is_custom_place
(
place
))
&&
op_type
==
kMemcpyD2H
)
{
if
((
platform
::
is_custom_place
(
place
))
&&
op_type
==
kMemcpyD2H
)
{
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
...
...
paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
浏览文件 @
0d45ac73
...
@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
...
@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
DeviceContext
*
dev_ctx
=
nullptr
;
DeviceContext
*
dev_ctx
=
nullptr
;
// only gpu
/npu need
update. xpu not need, because xpu memcpy op kernel is
// only gpu
needs
update. xpu not need, because xpu memcpy op kernel is
// synchronous.
// synchronous.
if
(
platform
::
is_gpu_place
(
place_
)
||
platform
::
is_custom_place
(
place_
))
{
if
(
platform
::
is_gpu_place
(
place_
)
||
platform
::
is_custom_place
(
place_
))
{
VLOG
(
6
)
<<
"Parse DeviceContext for "
<<
op_type
VLOG
(
6
)
<<
"Parse DeviceContext for "
<<
op_type
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
0d45ac73
...
@@ -1331,8 +1331,8 @@ void ParallelExecutor::InitExecutorPrivateMemberInfo(
...
@@ -1331,8 +1331,8 @@ void ParallelExecutor::InitExecutorPrivateMemberInfo(
device_name
=
"XPU"
;
device_name
=
"XPU"
;
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Only CPU/CUDA/
NPU/
XPU is supportted. "
platform
::
errors
::
Unavailable
(
"Only CPU/CUDA/XPU is supportted. "
"please use CPU/CUDA/
NPU/
XPU backend."
));
"please use CPU/CUDA/XPU backend."
));
}
}
VLOG
(
1
)
<<
string
::
Sprintf
(
VLOG
(
1
)
<<
string
::
Sprintf
(
...
...
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
0d45ac73
...
@@ -52,12 +52,12 @@ OpSupportedInfos(const std::string& place,
...
@@ -52,12 +52,12 @@ OpSupportedInfos(const std::string& place,
{
"CPU"
,
&
platform
::
is_cpu_place
},
{
"CPU"
,
&
platform
::
is_cpu_place
},
{
"XPU"
,
&
platform
::
is_xpu_place
},
{
"XPU"
,
&
platform
::
is_xpu_place
},
};
};
PADDLE_ENFORCE_NE
(
is_target_place
.
count
(
query_place
),
PADDLE_ENFORCE_NE
(
0
,
is_target_place
.
count
(
query_place
)
,
platform
::
errors
::
InvalidArgument
(
0
,
"The argument `place` should be 'GPU', 'CPU', 'XPU', "
platform
::
errors
::
InvalidArgument
(
"'N
PU', but got '%s'."
,
"The argument `place` should be 'GPU', 'CPU', 'X
PU', but got '%s'."
,
place
));
place
));
std
::
unordered_set
<
std
::
string
>
all_ops
;
std
::
unordered_set
<
std
::
string
>
all_ops
;
const
auto
&
op_info
=
framework
::
OpInfoMap
::
Instance
().
map
();
const
auto
&
op_info
=
framework
::
OpInfoMap
::
Instance
().
map
();
...
@@ -147,7 +147,7 @@ AmpOperators::AmpOperators()
...
@@ -147,7 +147,7 @@ AmpOperators::AmpOperators()
OpSupportedInfos
(
"GPU"
,
paddle
::
framework
::
proto
::
VarType
::
BF16
));
OpSupportedInfos
(
"GPU"
,
paddle
::
framework
::
proto
::
VarType
::
BF16
));
unsupported_bf16_ops_
->
insert
(
unsupported_ops_gpu_bf16
.
begin
(),
unsupported_bf16_ops_
->
insert
(
unsupported_ops_gpu_bf16
.
begin
(),
unsupported_ops_gpu_bf16
.
end
());
unsupported_ops_gpu_bf16
.
end
());
// NOTE: GPU/
NPU/
XPU is compiled seperatly.
// NOTE: GPU/XPU is compiled seperatly.
#elif defined(PADDLE_WITH_XPU)
#elif defined(PADDLE_WITH_XPU)
auto
unsupported_ops_xpu_fp16
=
std
::
get
<
2
>
(
auto
unsupported_ops_xpu_fp16
=
std
::
get
<
2
>
(
OpSupportedInfos
(
"XPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
OpSupportedInfos
(
"XPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
0d45ac73
...
@@ -364,10 +364,6 @@ struct Argument {
...
@@ -364,10 +364,6 @@ struct Argument {
IpuEnableModelRuntimeExecutor
,
IpuEnableModelRuntimeExecutor
,
bool
);
bool
);
// npu related
DECL_ARGUMENT_FIELD
(
use_npu
,
UseNpu
,
bool
);
DECL_ARGUMENT_FIELD
(
npu_device_id
,
NPUDeviceId
,
int
);
// mixed precision related
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
0d45ac73
...
@@ -56,8 +56,6 @@ PassStrategy *AnalysisConfig::pass_builder() const {
...
@@ -56,8 +56,6 @@ PassStrategy *AnalysisConfig::pass_builder() const {
pass_builder_
.
reset
(
new
GpuPassStrategy
);
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
if
(
use_xpu_
)
{
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
);
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
if
(
use_ipu_
)
{
}
else
if
(
use_ipu_
)
{
LOG
(
INFO
)
<<
"Create IPU IR passes"
;
LOG
(
INFO
)
<<
"Create IPU IR passes"
;
pass_builder_
.
reset
(
new
IpuPassStrategy
);
pass_builder_
.
reset
(
new
IpuPassStrategy
);
...
@@ -506,8 +504,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -506,8 +504,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_opencl_
);
CP_MEMBER
(
use_opencl_
);
// NPU related.
// NPU related.
CP_MEMBER
(
use_npu_
);
CP_MEMBER
(
npu_device_id_
);
CP_MEMBER
(
nnadapter_config_
);
CP_MEMBER
(
nnadapter_config_
);
// profile related.
// profile related.
...
@@ -574,9 +570,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -574,9 +570,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
}
else
if
(
use_custom_device_
)
{
}
else
if
(
use_custom_device_
)
{
pass_builder_
.
reset
(
new
CustomDevicePassStrategy
(
pass_builder_
.
reset
(
new
CustomDevicePassStrategy
(
*
static_cast
<
CustomDevicePassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
CustomDevicePassStrategy
*>
(
other
.
pass_builder
())));
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
...
@@ -827,7 +820,6 @@ void AnalysisConfig::Update() {
...
@@ -827,7 +820,6 @@ void AnalysisConfig::Update() {
// Transfer pass_builder and copy the existing compatible passes.
// Transfer pass_builder and copy the existing compatible passes.
if
(
!
pass_builder_
||
((
use_gpu
()
^
pass_builder_
->
use_gpu
()))
||
if
(
!
pass_builder_
||
((
use_gpu
()
^
pass_builder_
->
use_gpu
()))
||
((
use_xpu
()
^
pass_builder_
->
use_xpu
()))
||
((
use_xpu
()
^
pass_builder_
->
use_xpu
()))
||
((
use_npu
()
^
pass_builder_
->
use_npu
()))
||
((
use_ipu
()
^
pass_builder_
->
use_ipu
()))
||
((
use_ipu
()
^
pass_builder_
->
use_ipu
()))
||
((
use_custom_device
()
^
pass_builder_
->
use_custom_device
())))
{
((
use_custom_device
()
^
pass_builder_
->
use_custom_device
())))
{
if
(
use_gpu
())
{
if
(
use_gpu
())
{
...
@@ -841,13 +833,6 @@ void AnalysisConfig::Update() {
...
@@ -841,13 +833,6 @@ void AnalysisConfig::Update() {
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
if
(
use_custom_device
())
{
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
use_gpu
(),
use_gpu
(),
...
@@ -875,14 +860,6 @@ void AnalysisConfig::Update() {
...
@@ -875,14 +860,6 @@ void AnalysisConfig::Update() {
"Only one choice can be made between CPU and XPU."
));
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_custom_device
())
{
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
use_gpu
(),
use_gpu
(),
...
@@ -1114,9 +1091,6 @@ std::string AnalysisConfig::SerializeInfoCache() {
...
@@ -1114,9 +1091,6 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
op_type
;
ss
<<
op_type
;
}
}
ss
<<
use_npu_
;
ss
<<
npu_device_id_
;
ss
<<
thread_local_stream_
;
ss
<<
thread_local_stream_
;
ss
<<
use_ipu_
;
ss
<<
use_ipu_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
0d45ac73
...
@@ -148,8 +148,8 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
...
@@ -148,8 +148,8 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
return
phi
::
Backend
::
CUSTOM
;
return
phi
::
Backend
::
CUSTOM
;
default:
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support backend, we now only support GPU, XPU
,
"
"Paddle Inference not support backend, we now only support GPU, XPU "
"
NPU
and CPU."
));
"and CPU."
));
return
phi
::
Backend
::
CPU
;
return
phi
::
Backend
::
CPU
;
}
}
}
}
...
@@ -1432,9 +1432,6 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -1432,9 +1432,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_
->
SetIpuCustomPatterns
(
config_
.
ipu_custom_patterns_
);
argument_
->
SetIpuCustomPatterns
(
config_
.
ipu_custom_patterns_
);
#endif
#endif
argument_
->
SetUseNpu
(
config_
.
use_npu_
);
argument_
->
SetNPUDeviceId
(
config_
.
npu_device_id
());
if
(
config_
.
use_mkldnn_
)
{
if
(
config_
.
use_mkldnn_
)
{
LOG
(
INFO
)
<<
"MKLDNN is enabled"
;
LOG
(
INFO
)
<<
"MKLDNN is enabled"
;
argument_
->
SetMKLDNNEnabledOpTypes
(
config_
.
mkldnn_enabled_op_types_
);
argument_
->
SetMKLDNNEnabledOpTypes
(
config_
.
mkldnn_enabled_op_types_
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
0d45ac73
...
@@ -130,7 +130,7 @@ T *Tensor::mutable_data(PlaceType place) {
...
@@ -130,7 +130,7 @@ T *Tensor::mutable_data(PlaceType place) {
}
}
default:
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Only CPU / CUDA / XPU
/ NPU
places is supported. The place `%d` is "
"Only CPU / CUDA / XPU places is supported. The place `%d` is "
"not supported."
,
"not supported."
,
static_cast
<
int
>
(
place
)));
static_cast
<
int
>
(
place
)));
break
;
break
;
...
@@ -261,7 +261,7 @@ void Tensor::CopyFromCpu(const T *data) {
...
@@ -261,7 +261,7 @@ void Tensor::CopyFromCpu(const T *data) {
dev_ctx
->
stream
());
dev_ctx
->
stream
());
#else
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
"The analysis predictor supports CPU, GPU and XPU now."
));
#endif
#endif
}
}
}
}
...
@@ -468,7 +468,7 @@ void Tensor::CopyToCpuImpl(T *data,
...
@@ -468,7 +468,7 @@ void Tensor::CopyToCpuImpl(T *data,
dev_ctx
->
GetStream
()
->
Synchronize
();
dev_ctx
->
GetStream
()
->
Synchronize
();
#else
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
"The analysis predictor supports CPU, GPU and XPU now."
));
#endif
#endif
}
}
}
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
0d45ac73
...
@@ -414,12 +414,6 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -414,12 +414,6 @@ struct PD_INFER_DECL AnalysisConfig {
/// \return bool Whether the XPU is turned on.
/// \return bool Whether the XPU is turned on.
///
///
bool
use_xpu
()
const
{
return
use_xpu_
;
}
bool
use_xpu
()
const
{
return
use_xpu_
;
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
bool
use_npu
()
const
{
return
use_npu_
;
}
/// \brief A boolean state telling whether the IPU is turned on.
/// \brief A boolean state telling whether the IPU is turned on.
///
///
/// \return bool Whether the IPU is turned on.
/// \return bool Whether the IPU is turned on.
...
@@ -461,12 +455,6 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -461,12 +455,6 @@ struct PD_INFER_DECL AnalysisConfig {
/// \return int The XPU device id.
/// \return int The XPU device id.
///
///
int
xpu_device_id
()
const
{
return
xpu_device_id_
;
}
int
xpu_device_id
()
const
{
return
xpu_device_id_
;
}
///
/// \brief Get the NPU device id.
///
/// \return int The NPU device id.
///
int
npu_device_id
()
const
{
return
npu_device_id_
;
}
/// \brief Get the number of IPU device .
/// \brief Get the number of IPU device .
///
///
/// \return int The number of IPU device.
/// \return int The number of IPU device.
...
@@ -1083,10 +1071,6 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -1083,10 +1071,6 @@ struct PD_INFER_DECL AnalysisConfig {
bool
use_external_stream_
{
false
};
bool
use_external_stream_
{
false
};
void
*
exec_stream_
{
nullptr
};
void
*
exec_stream_
{
nullptr
};
// NPU related
bool
use_npu_
{
false
};
int
npu_device_id_
{
0
};
// CustomDevice related
// CustomDevice related
bool
use_custom_device_
{
false
};
bool
use_custom_device_
{
false
};
int
custom_device_id_
{
0
};
int
custom_device_id_
{
0
};
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
0d45ac73
...
@@ -360,7 +360,6 @@ struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
...
@@ -360,7 +360,6 @@ struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
/// GPU related fields.
/// GPU related fields.
bool
use_xpu
{
false
};
bool
use_xpu
{
false
};
bool
use_gpu
{
false
};
bool
use_gpu
{
false
};
bool
use_npu
{
false
};
int
device
{
0
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
float
fraction_of_gpu_memory
{
-
1.
f
};
///< Change to a float in (0,1] if needed.
-
1.
f
};
///< Change to a float in (0,1] if needed.
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
0d45ac73
...
@@ -162,10 +162,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -162,10 +162,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in xpu mode.
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
bool
use_xpu
()
const
{
return
use_xpu_
;
}
/// \brief Check if we are using npu.
/// \return A bool variable implying whether we are in npu mode.
bool
use_npu
()
const
{
return
use_npu_
;
}
/// \brief Check if we are using ipu.
/// \brief Check if we are using ipu.
/// \return A bool variable implying whether we are in ipu mode.
/// \return A bool variable implying whether we are in ipu mode.
bool
use_ipu
()
const
{
return
use_ipu_
;
}
bool
use_ipu
()
const
{
return
use_ipu_
;
}
...
@@ -181,7 +177,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -181,7 +177,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \cond Protected
/// \cond Protected
bool
use_xpu_
{
false
};
bool
use_xpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_npu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_custom_device_
{
false
};
bool
use_custom_device_
{
false
};
...
@@ -293,21 +288,6 @@ class PD_INFER_DECL XpuPassStrategy final : public PassStrategy {
...
@@ -293,21 +288,6 @@ class PD_INFER_DECL XpuPassStrategy final : public PassStrategy {
XpuPassStrategy
();
XpuPassStrategy
();
};
};
/// \class NpuPassStrategy
/// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU
/// mode.
class
PD_INFER_DECL
NpuPassStrategy
final
:
public
PassStrategy
{
public:
NpuPassStrategy
()
:
PassStrategy
({})
{
use_npu_
=
true
;
}
/// \brief Construct by copying another NpuPassStrategy object.
/// \param[in] other The NpuPassStrategy object we want to copy.
explicit
NpuPassStrategy
(
const
NpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{
use_npu_
=
true
;
}
};
/// \class CustomDevicePassStrategy
/// \class CustomDevicePassStrategy
/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor
/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor
/// with CustomDevice
/// with CustomDevice
...
...
paddle/fluid/inference/capi_exp/pd_config.cc
浏览文件 @
0d45ac73
...
@@ -176,11 +176,6 @@ PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
...
@@ -176,11 +176,6 @@ PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
return
config
->
use_xpu
();
return
config
->
use_xpu
();
}
}
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
use_npu
();
}
int32_t
PD_ConfigGpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
int32_t
PD_ConfigGpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
gpu_device_id
();
return
config
->
gpu_device_id
();
...
@@ -189,10 +184,6 @@ int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) {
...
@@ -189,10 +184,6 @@ int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG
;
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
xpu_device_id
();
return
config
->
xpu_device_id
();
}
}
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
npu_device_id
();
}
void
PD_ConfigEnableCustomDevice
(
__pd_keep
PD_Config
*
pd_config
,
void
PD_ConfigEnableCustomDevice
(
__pd_keep
PD_Config
*
pd_config
,
char
*
device_type
,
char
*
device_type
,
...
...
paddle/fluid/inference/capi_exp/pd_config.h
浏览文件 @
0d45ac73
...
@@ -222,14 +222,6 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
...
@@ -222,14 +222,6 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseXpu
(
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseXpu
(
__pd_keep
PD_Config
*
pd_config
);
__pd_keep
PD_Config
*
pd_config
);
///
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \param[in] pd_onfig config
/// \return Whether the NPU is turned on.
///
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the GPU device id.
/// \brief Get the GPU device id.
///
///
/// \param[in] pd_onfig config
/// \param[in] pd_onfig config
...
@@ -246,14 +238,6 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId(
...
@@ -246,14 +238,6 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId(
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigXpuDeviceId
(
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigXpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
__pd_keep
PD_Config
*
pd_config
);
///
///
/// \brief Get the NPU device id.
///
/// \param[in] pd_onfig config
/// \return The NPU device id.
///
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Turn on custome device.
/// \brief Turn on custome device.
///
///
/// \param[in] pd_config config
/// \param[in] pd_config config
...
...
paddle/fluid/inference/goapi/config.go
浏览文件 @
0d45ac73
...
@@ -230,15 +230,6 @@ func (config *Config) UseXpu() bool {
...
@@ -230,15 +230,6 @@ func (config *Config) UseXpu() bool {
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseXpu
(
config
.
c
))
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseXpu
(
config
.
c
))
}
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
func
(
config
*
Config
)
UseNpu
()
bool
{
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseNpu
(
config
.
c
))
}
///
///
/// \brief Get the GPU device id.
/// \brief Get the GPU device id.
///
///
...
@@ -257,15 +248,6 @@ func (config *Config) XpuDeviceId() int32 {
...
@@ -257,15 +248,6 @@ func (config *Config) XpuDeviceId() int32 {
return
int32
(
C
.
PD_ConfigXpuDeviceId
(
config
.
c
))
return
int32
(
C
.
PD_ConfigXpuDeviceId
(
config
.
c
))
}
}
///
/// \brief Get the NPU device id.
///
/// \return int32 The NPU device id.
///
func
(
config
*
Config
)
NpuDeviceId
()
int32
{
return
int32
(
C
.
PD_ConfigNpuDeviceId
(
config
.
c
))
}
///
///
/// \brief Get the initial size in MB of the GPU memory pool.
/// \brief Get the initial size in MB of the GPU memory pool.
///
///
...
...
paddle/fluid/platform/monitor.h
浏览文件 @
0d45ac73
...
@@ -190,13 +190,3 @@ class StatRegistry {
...
@@ -190,13 +190,3 @@ class StatRegistry {
USE_INT_STAT(STAT_gpu13_mem_size); \
USE_INT_STAT(STAT_gpu13_mem_size); \
USE_INT_STAT(STAT_gpu14_mem_size); \
USE_INT_STAT(STAT_gpu14_mem_size); \
USE_INT_STAT(STAT_gpu15_mem_size)
USE_INT_STAT(STAT_gpu15_mem_size)
#define USE_NPU_MEM_STAT \
USE_INT_STAT(STAT_npu0_mem_size); \
USE_INT_STAT(STAT_npu1_mem_size); \
USE_INT_STAT(STAT_npu2_mem_size); \
USE_INT_STAT(STAT_npu3_mem_size); \
USE_INT_STAT(STAT_npu4_mem_size); \
USE_INT_STAT(STAT_npu5_mem_size); \
USE_INT_STAT(STAT_npu6_mem_size); \
USE_INT_STAT(STAT_npu7_mem_size)
paddle/fluid/platform/place.h
浏览文件 @
0d45ac73
...
@@ -28,7 +28,6 @@ using Place = phi::Place;
...
@@ -28,7 +28,6 @@ using Place = phi::Place;
using
CPUPlace
=
phi
::
CPUPlace
;
using
CPUPlace
=
phi
::
CPUPlace
;
using
CUDAPlace
=
phi
::
GPUPlace
;
using
CUDAPlace
=
phi
::
GPUPlace
;
using
CUDAPinnedPlace
=
phi
::
GPUPinnedPlace
;
using
CUDAPinnedPlace
=
phi
::
GPUPinnedPlace
;
using
NPUPinnedPlace
=
phi
::
NPUPinnedPlace
;
using
XPUPlace
=
phi
::
XPUPlace
;
using
XPUPlace
=
phi
::
XPUPlace
;
using
IPUPlace
=
phi
::
IPUPlace
;
using
IPUPlace
=
phi
::
IPUPlace
;
using
CustomPlace
=
phi
::
CustomPlace
;
using
CustomPlace
=
phi
::
CustomPlace
;
...
@@ -87,11 +86,6 @@ typename Visitor::result_type VisitPlace(const Place &place,
...
@@ -87,11 +86,6 @@ typename Visitor::result_type VisitPlace(const Place &place,
return
typename
Visitor
::
result_type
();
return
typename
Visitor
::
result_type
();
#endif
#endif
}
}
case
phi
::
AllocationType
::
NPUPINNED
:
{
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with NPU. Cannot visit npu_pinned"
));
return
typename
Visitor
::
result_type
();
}
case
phi
::
AllocationType
::
IPU
:
{
case
phi
::
AllocationType
::
IPU
:
{
#ifdef PADDLE_WITH_IPU
#ifdef PADDLE_WITH_IPU
platform
::
IPUPlace
p
(
place
.
GetDeviceId
());
platform
::
IPUPlace
p
(
place
.
GetDeviceId
());
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
0d45ac73
...
@@ -673,7 +673,6 @@ void BindNativeConfig(py::module *m) {
...
@@ -673,7 +673,6 @@ void BindNativeConfig(py::module *m) {
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"use_npu"
,
&
NativeConfig
::
use_npu
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"fraction_of_gpu_memory"
,
.
def_readwrite
(
"fraction_of_gpu_memory"
,
&
NativeConfig
::
fraction_of_gpu_memory
)
&
NativeConfig
::
fraction_of_gpu_memory
)
...
@@ -805,10 +804,8 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -805,10 +804,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_ort_optimization"
,
&
AnalysisConfig
::
EnableORTOptimization
)
.
def
(
"enable_ort_optimization"
,
&
AnalysisConfig
::
EnableORTOptimization
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"use_npu"
,
&
AnalysisConfig
::
use_npu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"npu_device_id"
,
&
AnalysisConfig
::
npu_device_id
)
.
def
(
"memory_pool_init_size_mb"
,
.
def
(
"memory_pool_init_size_mb"
,
&
AnalysisConfig
::
memory_pool_init_size_mb
)
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
.
def
(
"fraction_of_gpu_memory_for_pool"
,
...
...
paddle/fluid/pybind/place.cc
浏览文件 @
0d45ac73
...
@@ -629,7 +629,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
...
@@ -629,7 +629,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
[](
platform
::
Place
&
self
)
{
return
platform
::
is_custom_place
(
self
);
})
[](
platform
::
Place
&
self
)
{
return
platform
::
is_custom_place
(
self
);
})
.
def
(
"gpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"gpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"xpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"xpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"npu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"ipu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"ipu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"custom_device_id"
,
.
def
(
"custom_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
...
...
paddle/phi/backends/cpu/cpu_info.cc
浏览文件 @
0d45ac73
...
@@ -110,23 +110,6 @@ size_t CUDAPinnedMaxChunkSize() {
...
@@ -110,23 +110,6 @@ size_t CUDAPinnedMaxChunkSize() {
return
CUDAPinnedMaxAllocSize
()
/
256
;
return
CUDAPinnedMaxAllocSize
()
/
256
;
}
}
size_t
NPUPinnedMaxAllocSize
()
{
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return
FLAGS_fraction_of_cuda_pinned_memory_to_use
*
CpuTotalPhysicalMemory
();
}
size_t
NPUPinnedMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 64 KB.
return
1
<<
16
;
}
size_t
NPUPinnedMaxChunkSize
()
{
// Allow to allocate the maximum chunk size is roughly 1/256 of NPU_PINNED
// memory.
return
NPUPinnedMaxAllocSize
()
/
256
;
}
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
static
Xbyak
::
util
::
Cpu
cpu
;
static
Xbyak
::
util
::
Cpu
cpu
;
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
)
{
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
)
{
...
...
paddle/phi/backends/cpu/cpu_info.h
浏览文件 @
0d45ac73
...
@@ -75,15 +75,6 @@ size_t CUDAPinnedMinChunkSize();
...
@@ -75,15 +75,6 @@ size_t CUDAPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
//! Get the maximum chunk size for buddy allocator.
size_t
CUDAPinnedMaxChunkSize
();
size_t
CUDAPinnedMaxChunkSize
();
//! Get the maximum allocation size for a machine.
size_t
NPUPinnedMaxAllocSize
();
//! Get the minimum chunk size for buddy allocator.
size_t
NPUPinnedMinChunkSize
();
//! Get the maximum chunk size for buddy allocator.
size_t
NPUPinnedMaxChunkSize
();
typedef
enum
{
typedef
enum
{
isa_any
,
isa_any
,
sse42
,
sse42
,
...
...
paddle/phi/common/place.cc
浏览文件 @
0d45ac73
...
@@ -35,8 +35,6 @@ const char *AllocationTypeStr(AllocationType type) {
...
@@ -35,8 +35,6 @@ const char *AllocationTypeStr(AllocationType type) {
return
"gpu_pinned"
;
return
"gpu_pinned"
;
case
AllocationType
::
XPU
:
case
AllocationType
::
XPU
:
return
"xpu"
;
return
"xpu"
;
case
AllocationType
::
NPUPINNED
:
return
"npu_pinned"
;
case
AllocationType
::
IPU
:
case
AllocationType
::
IPU
:
return
"ipu"
;
return
"ipu"
;
default:
default:
...
@@ -55,7 +53,6 @@ std::string Place::DebugString() const {
...
@@ -55,7 +53,6 @@ std::string Place::DebugString() const {
os
<<
AllocationTypeStr
(
alloc_type_
);
os
<<
AllocationTypeStr
(
alloc_type_
);
}
}
if
(
alloc_type_
==
AllocationType
::
GPUPINNED
||
if
(
alloc_type_
==
AllocationType
::
GPUPINNED
||
alloc_type_
==
AllocationType
::
NPUPINNED
||
alloc_type_
==
AllocationType
::
CPU
)
{
alloc_type_
==
AllocationType
::
CPU
)
{
os
<<
")"
;
os
<<
")"
;
}
else
{
}
else
{
...
...
paddle/phi/common/place.h
浏览文件 @
0d45ac73
...
@@ -32,7 +32,6 @@ enum class AllocationType : int8_t {
...
@@ -32,7 +32,6 @@ enum class AllocationType : int8_t {
GPUPINNED
=
3
,
GPUPINNED
=
3
,
XPU
=
4
,
XPU
=
4
,
NPU
=
5
,
NPU
=
5
,
NPUPINNED
=
6
,
IPU
=
7
,
IPU
=
7
,
CUSTOM
=
9
,
CUSTOM
=
9
,
};
};
...
@@ -163,15 +162,6 @@ class XPUPlace : public Place {
...
@@ -163,15 +162,6 @@ class XPUPlace : public Place {
:
Place
(
AllocationType
::
XPU
,
place
.
GetDeviceId
())
{}
:
Place
(
AllocationType
::
XPU
,
place
.
GetDeviceId
())
{}
};
};
class
NPUPinnedPlace
:
public
Place
{
public:
NPUPinnedPlace
()
:
Place
(
AllocationType
::
NPUPINNED
)
{}
NPUPinnedPlace
(
const
NPUPinnedPlace
&
)
=
default
;
NPUPinnedPlace
(
const
Place
&
place
UNUSED
)
// NOLINT
:
Place
(
AllocationType
::
NPUPINNED
)
{}
};
class
IPUPlace
:
public
Place
{
class
IPUPlace
:
public
Place
{
public:
public:
IPUPlace
()
:
Place
(
AllocationType
::
IPU
,
0
)
{}
IPUPlace
()
:
Place
(
AllocationType
::
IPU
,
0
)
{}
...
...
paddle/phi/kernels/funcs/math_function.cc
浏览文件 @
0d45ac73
...
@@ -161,12 +161,6 @@ void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
...
@@ -161,12 +161,6 @@ void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
#endif
#endif
}
}
template
<
>
void
set_constant_with_place
<
phi
::
NPUPinnedPlace
>
(
const
phi
::
DeviceContext
&
context
,
phi
::
DenseTensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
template
<
>
void
set_constant_with_place
<
phi
::
IPUPlace
>
(
const
phi
::
DeviceContext
&
context
,
void
set_constant_with_place
<
phi
::
IPUPlace
>
(
const
phi
::
DeviceContext
&
context
,
phi
::
DenseTensor
*
tensor
,
phi
::
DenseTensor
*
tensor
,
...
...
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
浏览文件 @
0d45ac73
...
@@ -15,7 +15,6 @@
...
@@ -15,7 +15,6 @@
import
paddle
import
paddle
from
paddle
import
_legacy_C_ops
from
paddle
import
_legacy_C_ops
from
paddle.distributed
import
collective
from
paddle.distributed
import
collective
from
paddle.fluid
import
core
from
paddle.fluid.data_feeder
import
check_dtype
,
check_variable_and_dtype
from
paddle.fluid.data_feeder
import
check_dtype
,
check_variable_and_dtype
from
paddle.framework
import
LayerHelper
,
_create_tensor
,
in_dygraph_mode
from
paddle.framework
import
LayerHelper
,
_create_tensor
,
in_dygraph_mode
from
paddle.nn
import
Layer
from
paddle.nn
import
Layer
...
@@ -551,11 +550,7 @@ def _parallel_linear(
...
@@ -551,11 +550,7 @@ def _parallel_linear(
)
)
# NOTE: npu linear function use matmul_v2 but linear use matmul
# NOTE: npu linear function use matmul_v2 but linear use matmul
linear_function
=
(
linear_function
=
paddle
.
nn
.
functional
.
linear
_linear
if
core
.
is_compiled_with_custom_device
(
'npu'
)
else
paddle
.
nn
.
functional
.
linear
)
linear_out
=
linear_function
(
linear_out
=
linear_function
(
x
,
x
,
linear
.
weight
,
linear
.
weight
,
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
0d45ac73
...
@@ -595,9 +595,6 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -595,9 +595,6 @@ class ShardingOptimizer(MetaOptimizerBase):
# amp inf_var & clip global_norm_var
# amp inf_var & clip global_norm_var
rings
=
[
self
.
mp_ring_id
,
self
.
pp_ring_id
]
rings
=
[
self
.
mp_ring_id
,
self
.
pp_ring_id
]
# FIXME(wangxi): some problem with NPU found_finite, need sync with DP
if
core
.
is_compiled_with_custom_device
(
'npu'
):
rings
+=
[
self
.
dp_ring_id
]
FP16Utils
.
sync_amp_check_nan_inf
(
main_block
,
rings
)
FP16Utils
.
sync_amp_check_nan_inf
(
main_block
,
rings
)
gradientclip_helper
=
GradientClipHelper
(
None
)
gradientclip_helper
=
GradientClipHelper
(
None
)
...
@@ -719,10 +716,7 @@ class ShardingOptimizer(MetaOptimizerBase):
...
@@ -719,10 +716,7 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
_recreate_not_persist_param_as_var
()
self
.
_recreate_not_persist_param_as_var
()
self
.
_dump_program_for_debug
()
self
.
_dump_program_for_debug
()
self
.
_wait
()
# GPU need to wait server ready, GPU and NPU is Layered connection
if
not
core
.
is_compiled_with_custom_device
(
'npu'
):
self
.
_wait
()
return
optimize_ops
,
params_grads
return
optimize_ops
,
params_grads
def
_init_pair_comm
(
self
,
pair
,
ring_id
):
def
_init_pair_comm
(
self
,
pair
,
ring_id
):
...
...
python/paddle/fluid/executor.py
浏览文件 @
0d45ac73
...
@@ -1988,14 +1988,9 @@ class Executor:
...
@@ -1988,14 +1988,9 @@ class Executor:
for
var
in
program
.
global_block
().
vars
.
values
():
for
var
in
program
.
global_block
().
vars
.
values
():
if
var
.
is_data
:
if
var
.
is_data
:
data_vars
.
append
(
var
)
data_vars
.
append
(
var
)
if
core
.
is_compiled_with_custom_device
(
'npu'
):
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
'InMemoryDataset'
)
)
else
:
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_filelist
([
'None'
])
dataset
.
set_filelist
([
'None'
])
...
@@ -2165,14 +2160,9 @@ class Executor:
...
@@ -2165,14 +2160,9 @@ class Executor:
for
var
in
program
.
global_block
().
vars
.
values
():
for
var
in
program
.
global_block
().
vars
.
values
():
if
var
.
is_data
:
if
var
.
is_data
:
data_vars
.
append
(
var
)
data_vars
.
append
(
var
)
if
core
.
is_compiled_with_custom_device
(
'npu'
):
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
'InMemoryDataset'
)
)
else
:
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_filelist
([
'None'
])
dataset
.
set_filelist
([
'None'
])
...
...
python/paddle/fluid/framework.py
浏览文件 @
0d45ac73
...
@@ -597,21 +597,6 @@ def _current_expected_place():
...
@@ -597,21 +597,6 @@ def _current_expected_place():
"You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default."
"You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default."
)
)
_global_expected_place_
=
core
.
CPUPlace
()
_global_expected_place_
=
core
.
CPUPlace
()
elif
core
.
is_compiled_with_custom_device
(
"npu"
):
# TODO(duanyanhui): Optimize DeviceManager and Return all expected places when device registered in DeviceManager is greater than 1.
try
:
device_count
=
core
.
get_custom_device_count
(
"npu"
)
except
Exception
as
e
:
device_count
=
0
if
device_count
>
0
:
_global_expected_place_
=
core
.
CustomPlace
(
"npu"
,
_custom_device_ids
(
"npu"
)[
0
]
)
else
:
warnings
.
warn
(
"You are using NPU version Paddle, but your NPU device is not set properly. CPU device will be used by default."
)
_global_expected_place_
=
core
.
CPUPlace
()
else
:
else
:
_global_expected_place_
=
core
.
CPUPlace
()
_global_expected_place_
=
core
.
CPUPlace
()
...
@@ -7454,9 +7439,9 @@ def device_guard(device=None):
...
@@ -7454,9 +7439,9 @@ def device_guard(device=None):
device
,
index
=
device
.
split
(
':'
)
device
,
index
=
device
.
split
(
':'
)
if
device
==
'cpu'
:
if
device
==
'cpu'
:
raise
ValueError
(
"Should not set device id for cpu."
)
raise
ValueError
(
"Should not set device id for cpu."
)
if
device
not
in
[
'cpu'
,
'gpu'
,
'xpu'
,
'
npu'
,
'
'
,
None
]:
if
device
not
in
[
'cpu'
,
'gpu'
,
'xpu'
,
''
,
None
]:
raise
ValueError
(
raise
ValueError
(
"The Attr(device) should be 'cpu'
'npu'
or 'gpu', and it can also be empty string or None "
"The Attr(device) should be 'cpu' or 'gpu', and it can also be empty string or None "
"when there is no need to specify device. But received %s"
%
device
"when there is no need to specify device. But received %s"
%
device
)
)
if
index
:
if
index
:
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
0d45ac73
...
@@ -4554,9 +4554,7 @@ class PipelineOptimizer:
...
@@ -4554,9 +4554,7 @@ class PipelineOptimizer:
def
__init__
(
self
,
optimizer
,
num_microbatches
=
1
,
start_cpu_core_id
=
0
):
def
__init__
(
self
,
optimizer
,
num_microbatches
=
1
,
start_cpu_core_id
=
0
):
self
.
_device
=
'cpu'
self
.
_device
=
'cpu'
if
core
.
is_compiled_with_custom_device
(
'npu'
):
if
core
.
is_compiled_with_cuda
():
self
.
_device
=
"npu"
elif
core
.
is_compiled_with_cuda
():
self
.
_device
=
"gpu"
self
.
_device
=
"gpu"
if
in_dygraph_mode
():
if
in_dygraph_mode
():
raise
Exception
(
"In dygraph, don't support PipelineOptimizer."
)
raise
Exception
(
"In dygraph, don't support PipelineOptimizer."
)
...
@@ -4945,8 +4943,8 @@ class PipelineOptimizer:
...
@@ -4945,8 +4943,8 @@ class PipelineOptimizer:
else
None
else
None
)
)
if
device
:
if
device
:
assert
device
[
0
:
3
]
==
'gpu'
or
device
[
0
:
3
]
==
'npu'
,
(
assert
device
[
0
:
3
]
==
'gpu'
,
(
"Now, only gpu
and npu
devices are "
"Now, only gpu devices are "
"supported in pipeline parallemism."
"supported in pipeline parallemism."
)
)
return
device
return
device
...
@@ -5148,8 +5146,8 @@ class PipelineOptimizer:
...
@@ -5148,8 +5146,8 @@ class PipelineOptimizer:
continue
continue
dev_type
=
device
.
split
(
':'
)[
0
]
dev_type
=
device
.
split
(
':'
)[
0
]
assert
dev_type
==
"gpu"
or
dev_type
==
'npu'
,
(
assert
dev_type
==
"gpu"
,
(
"Now only gpu
and npu
devices are supported "
"Now only gpu devices are supported "
"for pipeline parallelism."
"for pipeline parallelism."
)
)
...
@@ -6388,8 +6386,6 @@ class PipelineOptimizer:
...
@@ -6388,8 +6386,6 @@ class PipelineOptimizer:
dev_index
=
int
(
dev
.
split
(
":"
)[
1
])
dev_index
=
int
(
dev
.
split
(
":"
)[
1
])
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
1
))
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
1
))
elif
paddle
.
is_compiled_with_custom_device
(
'npu'
):
place_list
.
append
(
paddle
.
CustomPlace
(
'npu'
,
dev_index
%
1
))
# Step6: Split startup program
# Step6: Split startup program
new_startup_program
=
self
.
_split_startup_program
(
new_startup_program
=
self
.
_split_startup_program
(
...
@@ -6412,8 +6408,6 @@ class PipelineOptimizer:
...
@@ -6412,8 +6408,6 @@ class PipelineOptimizer:
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
elif
core
.
is_compiled_with_custom_device
(
'npu'
):
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
))
# A pass to move the recv op to the beginning of
# A pass to move the recv op to the beginning of
# the forward/backward phase
# the forward/backward phase
self
.
_mv_head_recv
(
program_list
[
self
.
local_rank
])
self
.
_mv_head_recv
(
program_list
[
self
.
local_rank
])
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
0d45ac73
...
@@ -16,7 +16,6 @@ from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode
...
@@ -16,7 +16,6 @@ from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode
from
paddle.device
import
(
from
paddle.device
import
(
get_all_custom_device_type
,
get_all_custom_device_type
,
is_compiled_with_cuda
,
is_compiled_with_cuda
,
is_compiled_with_custom_device
,
is_compiled_with_rocm
,
is_compiled_with_rocm
,
)
)
from
paddle.fluid.framework
import
_global_flags
,
in_dygraph_mode
from
paddle.fluid.framework
import
_global_flags
,
in_dygraph_mode
...
@@ -465,13 +464,6 @@ def conv1d(
...
@@ -465,13 +464,6 @@ def conv1d(
l_type
=
'depthwise_conv2d'
l_type
=
'depthwise_conv2d'
use_cudnn
=
False
use_cudnn
=
False
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
squeeze_aixs
=
-
3
if
channel_last
else
-
2
squeeze_aixs
=
-
3
if
channel_last
else
-
2
x
=
unsqueeze
(
x
,
axis
=
[
squeeze_aixs
])
x
=
unsqueeze
(
x
,
axis
=
[
squeeze_aixs
])
...
@@ -755,13 +747,6 @@ def conv2d(
...
@@ -755,13 +747,6 @@ def conv2d(
use_mkldnn
=
_global_flags
()[
"FLAGS_use_mkldnn"
]
use_mkldnn
=
_global_flags
()[
"FLAGS_use_mkldnn"
]
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
if
(
if
(
is_compiled_with_cuda
()
is_compiled_with_cuda
()
and
get_flags
(
"FLAGS_conv2d_disable_cudnn"
)[
and
get_flags
(
"FLAGS_conv2d_disable_cudnn"
)[
...
...
python/paddle/nn/functional/loss.py
浏览文件 @
0d45ac73
...
@@ -16,7 +16,7 @@ import math
...
@@ -16,7 +16,7 @@ import math
# TODO: define loss functions of neural network
# TODO: define loss functions of neural network
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
,
fluid
,
in_dynamic_mode
from
paddle
import
_C_ops
,
fluid
,
in_dynamic_mode
from
paddle.framework
import
core
from
paddle.framework
import
core
from
paddle.static.nn.control_flow
import
Assert
from
paddle.static.nn.control_flow
import
Assert
from
paddle.utils
import
deprecated
from
paddle.utils
import
deprecated
...
@@ -269,51 +269,15 @@ def fluid_softmax_with_cross_entropy(
...
@@ -269,51 +269,15 @@ def fluid_softmax_with_cross_entropy(
if
input_dims
-
1
==
label_dims
:
if
input_dims
-
1
==
label_dims
:
label
=
paddle
.
unsqueeze
(
label
,
axis
=
axis
)
label
=
paddle
.
unsqueeze
(
label
,
axis
=
axis
)
if
in_dygraph_mode
():
if
in_dygraph_mode
():
if
core
.
is_compiled_with_custom_device
(
"npu"
):
softmax
,
loss
=
_C_ops
.
cross_entropy_with_softmax
(
if
not
soft_label
:
logits
,
valid_label
=
(
label
,
paddle
.
cast
(
label
!=
ignore_index
,
dtype
=
label
.
dtype
)
soft_label
,
*
label
True
,
)
numeric_stable_mode
,
softmax
,
loss
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
ignore_index
,
logits
,
axis
,
valid_label
,
)
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
numeric_stable_mode
,
'axis'
,
axis
,
'use_softmax'
,
True
,
)
else
:
softmax
,
loss
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
logits
,
label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
numeric_stable_mode
,
'axis'
,
axis
,
'use_softmax'
,
True
,
)
else
:
softmax
,
loss
=
_C_ops
.
cross_entropy_with_softmax
(
logits
,
label
,
soft_label
,
True
,
numeric_stable_mode
,
ignore_index
,
axis
,
)
if
not
return_softmax
:
if
not
return_softmax
:
return
loss
return
loss
else
:
else
:
...
@@ -2734,41 +2698,9 @@ def cross_entropy(
...
@@ -2734,41 +2698,9 @@ def cross_entropy(
valid_label
=
(
valid_label
=
(
paddle
.
cast
(
label
!=
ignore_index
,
dtype
=
label
.
dtype
)
*
label
paddle
.
cast
(
label
!=
ignore_index
,
dtype
=
label
.
dtype
)
*
label
)
)
if
core
.
is_compiled_with_custom_device
(
"npu"
):
_
,
out
=
_C_ops
.
cross_entropy_with_softmax
(
if
not
soft_label
:
input
,
label
,
soft_label
,
use_softmax
,
True
,
ignore_index
,
axis
_
,
out
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
)
input
,
valid_label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
True
,
'axis'
,
axis
,
'use_softmax'
,
use_softmax
,
)
else
:
_
,
out
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
input
,
label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
True
,
'axis'
,
axis
,
'use_softmax'
,
use_softmax
,
)
else
:
_
,
out
=
_C_ops
.
cross_entropy_with_softmax
(
input
,
label
,
soft_label
,
use_softmax
,
True
,
ignore_index
,
axis
)
if
weight
is
not
None
:
if
weight
is
not
None
:
...
...
python/paddle/static/amp/decorator.py
浏览文件 @
0d45ac73
...
@@ -220,24 +220,7 @@ class OptimizerWithMixedPrecision:
...
@@ -220,24 +220,7 @@ class OptimizerWithMixedPrecision:
"""
"""
train_program
=
loss
.
block
.
program
train_program
=
loss
.
block
.
program
self
.
_train_program
=
train_program
self
.
_train_program
=
train_program
self
.
_float_status
=
None
# NOTE(zhiqiu): _float_status is only used for NPU.
if
core
.
is_compiled_with_custom_device
(
'npu'
):
float_status
=
paddle
.
static
.
data
(
name
=
"float_status"
,
shape
=
[
8
],
dtype
=
'float32'
)
self
.
_train_program
.
global_block
().
append_op
(
type
=
"alloc_float_status"
,
outputs
=
{
"FloatStatus"
:
float_status
},
)
self
.
_train_program
.
global_block
().
append_op
(
type
=
"clear_float_status"
,
inputs
=
{
"FloatStatus"
:
float_status
},
outputs
=
{
"FloatStatusOut"
:
float_status
},
)
self
.
_float_status
=
float_status
else
:
self
.
_float_status
=
None
with
program_guard
(
self
.
_train_program
,
startup_program
):
with
program_guard
(
self
.
_train_program
,
startup_program
):
self
.
_init_amp_var
()
self
.
_init_amp_var
()
...
@@ -476,27 +459,17 @@ class OptimizerWithMixedPrecision:
...
@@ -476,27 +459,17 @@ class OptimizerWithMixedPrecision:
if
self
.
_is_distributed
:
if
self
.
_is_distributed
:
# if distributed, split check_finite_and_unscale to overlap
# if distributed, split check_finite_and_unscale to overlap
# unscale with communication
# unscale with communication
if
core
.
is_compiled_with_custom_device
(
'npu'
)
:
for
p
,
g
in
params_grads
:
with
self
.
_train_program
.
_optimized_guard
(
grads
):
with
self
.
_train_program
.
_optimized_guard
(
[
p
,
g
]
):
_
,
found_inf
=
check_finite_and_unscale
(
_
,
found_inf
=
check_finite_and_unscale
(
grads
,
[
g
,
],
self
.
_loss_scaling
,
self
.
_loss_scaling
,
name
=
"find_infinite_scale"
,
name
=
"find_infinite_scale"
,
float_status
=
self
.
_float_status
,
float_status
=
self
.
_float_status
,
)
)
found_infs
.
append
(
found_inf
)
found_infs
.
append
(
found_inf
)
else
:
for
p
,
g
in
params_grads
:
with
self
.
_train_program
.
_optimized_guard
([
p
,
g
]):
_
,
found_inf
=
check_finite_and_unscale
(
[
g
,
],
self
.
_loss_scaling
,
name
=
"find_infinite_scale"
,
float_status
=
self
.
_float_status
,
)
found_infs
.
append
(
found_inf
)
elif
self
.
_use_pure_fp16
:
elif
self
.
_use_pure_fp16
:
if
fp32_grads
:
if
fp32_grads
:
with
self
.
_train_program
.
_optimized_guard
(
fp32_grads
):
with
self
.
_train_program
.
_optimized_guard
(
fp32_grads
):
...
...
python/paddle/static/amp/fp16_lists.py
浏览文件 @
0d45ac73
...
@@ -97,8 +97,6 @@ def _get_sys_unsupported_list(dtype):
...
@@ -97,8 +97,6 @@ def _get_sys_unsupported_list(dtype):
device
=
None
device
=
None
if
core
.
is_compiled_with_xpu
():
if
core
.
is_compiled_with_xpu
():
device
=
'XPU'
device
=
'XPU'
elif
core
.
is_compiled_with_custom_device
(
'npu'
):
device
=
'NPU'
else
:
else
:
device
=
'GPU'
device
=
'GPU'
_
,
_
,
sys_unsupported_list
=
core
.
op_supported_infos
(
device
,
var_type
)
_
,
_
,
sys_unsupported_list
=
core
.
op_supported_infos
(
device
,
var_type
)
...
...
python/paddle/static/nn/common.py
浏览文件 @
0d45ac73
...
@@ -948,13 +948,6 @@ def conv2d(
...
@@ -948,13 +948,6 @@ def conv2d(
):
):
l_type
=
'depthwise_conv2d'
l_type
=
'depthwise_conv2d'
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
core
.
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
helper
=
LayerHelper
(
l_type
,
**
locals
())
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
...
...
tools/timeline.py
浏览文件 @
0d45ac73
...
@@ -212,12 +212,6 @@ class Timeline:
...
@@ -212,12 +212,6 @@ class Timeline:
self
.
_chrome_trace
.
emit_pid
(
self
.
_chrome_trace
.
emit_pid
(
"memory usage on %s:cudapinnedplace:%d"
%
(
k
,
0
),
pid
"memory usage on %s:cudapinnedplace:%d"
%
(
k
,
0
),
pid
)
)
if
(
k
,
0
,
"NPU"
)
not
in
self
.
_mem_devices
:
pid
=
self
.
_allocate_pid
()
self
.
_mem_devices
[(
k
,
0
,
"NPU"
)]
=
pid
self
.
_chrome_trace
.
emit_pid
(
"memory usage on %s:npu:%d"
%
(
k
,
0
),
pid
)
def
_allocate_events
(
self
):
def
_allocate_events
(
self
):
for
k
,
profile_pb
in
self
.
_profile_dict
.
items
():
for
k
,
profile_pb
in
self
.
_profile_dict
.
items
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录