Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0d45ac73
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d45ac73
编写于
5月 11, 2023
作者:
张
张春乔
提交者:
GitHub
5月 11, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
昇腾和寒武纪相关代码退场 npu相关代码退场2 (#53568)
上级
00ded2ea
变更
37
隐藏空白更改
内联
并排
Showing
37 changed file
with
55 addition
and
411 deletion
+55
-411
paddle/fluid/distributed/fleet_executor/message_bus.cc
paddle/fluid/distributed/fleet_executor/message_bus.cc
+4
-6
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+0
-5
paddle/fluid/framework/executor_cache.cc
paddle/fluid/framework/executor_cache.cc
+0
-4
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
...fluid/framework/new_executor/interpreter/data_transfer.cc
+2
-2
paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
...uid/framework/new_executor/interpreter/stream_analyzer.cc
+1
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-2
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+7
-7
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+0
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+0
-26
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-5
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+3
-3
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+0
-16
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+0
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+0
-20
paddle/fluid/inference/capi_exp/pd_config.cc
paddle/fluid/inference/capi_exp/pd_config.cc
+0
-9
paddle/fluid/inference/capi_exp/pd_config.h
paddle/fluid/inference/capi_exp/pd_config.h
+0
-16
paddle/fluid/inference/goapi/config.go
paddle/fluid/inference/goapi/config.go
+0
-18
paddle/fluid/platform/monitor.h
paddle/fluid/platform/monitor.h
+0
-10
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+0
-6
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+0
-3
paddle/fluid/pybind/place.cc
paddle/fluid/pybind/place.cc
+0
-1
paddle/phi/backends/cpu/cpu_info.cc
paddle/phi/backends/cpu/cpu_info.cc
+0
-17
paddle/phi/backends/cpu/cpu_info.h
paddle/phi/backends/cpu/cpu_info.h
+0
-9
paddle/phi/common/place.cc
paddle/phi/common/place.cc
+0
-3
paddle/phi/common/place.h
paddle/phi/common/place.h
+0
-10
paddle/phi/kernels/funcs/math_function.cc
paddle/phi/kernels/funcs/math_function.cc
+0
-6
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
+1
-6
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+1
-7
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+6
-16
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-17
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+5
-11
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+0
-15
python/paddle/nn/functional/loss.py
python/paddle/nn/functional/loss.py
+13
-81
python/paddle/static/amp/decorator.py
python/paddle/static/amp/decorator.py
+6
-33
python/paddle/static/amp/fp16_lists.py
python/paddle/static/amp/fp16_lists.py
+0
-2
python/paddle/static/nn/common.py
python/paddle/static/nn/common.py
+0
-7
tools/timeline.py
tools/timeline.py
+0
-6
未找到文件。
paddle/fluid/distributed/fleet_executor/message_bus.cc
浏览文件 @
0d45ac73
...
...
@@ -111,8 +111,7 @@ bool MessageBus::Send(int64_t dst_rank,
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Fleet executor does not support sending message between different "
"ranks when Paddle is compiled with npu or "
"isn't compiled with distributed for now."
));
"ranks when Paddle isn't compiled with distributed for now."
));
#endif
return
true
;
}
...
...
@@ -202,10 +201,9 @@ void MessageBus::ListenPort() {
}
LOG
(
INFO
)
<<
"Message bus's listen port thread starts successful."
;
#else
LOG
(
WARNING
)
<<
"Fleet executor's ListenPort() is a fake function when Paddle is "
"compiled with npu or Paddle isn't compiled "
"with distributed for now."
;
LOG
(
WARNING
)
<<
"Fleet executor's ListenPort() is a fake function when "
"Paddle isn't compiled "
"with distributed for now."
;
#endif
}
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
0d45ac73
...
...
@@ -89,11 +89,6 @@ struct DLDeviceVisitor
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
NPUPinnedPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::NPUPinnedPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
CustomPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::CustomPlace is not supported"
));
...
...
paddle/fluid/framework/executor_cache.cc
浏览文件 @
0d45ac73
...
...
@@ -50,10 +50,6 @@ static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) {
execution_strategy
.
num_threads_
=
1
;
break
;
}
case
platform
::
DeviceType
::
NPU
:
{
execution_strategy
.
num_threads_
=
1
;
break
;
}
case
platform
::
DeviceType
::
CUSTOM_DEVICE
:
{
execution_strategy
.
num_threads_
=
1
;
break
;
...
...
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
浏览文件 @
0d45ac73
...
...
@@ -196,7 +196,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
?
OpFuncType
::
kGpuSync
:
OpFuncType
::
kGpuAsync
;
}
else
{
// Memcpy in
npu and
custom devices is asynchronous
// Memcpy in custom devices is asynchronous
new_op_func_node
.
type_
=
OpFuncType
::
kGpuAsync
;
}
...
...
@@ -225,7 +225,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
}
}
// NOTE(winter-wang): in
npu and
custom device, D2H kernel is asynchronous.
// NOTE(winter-wang): in custom device, D2H kernel is asynchronous.
// need to explicit synchronization.
if
((
platform
::
is_custom_place
(
place
))
&&
op_type
==
kMemcpyD2H
)
{
dev_ctx
->
Wait
();
...
...
paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
浏览文件 @
0d45ac73
...
...
@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
DeviceContext
*
dev_ctx
=
nullptr
;
// only gpu
/npu need
update. xpu not need, because xpu memcpy op kernel is
// only gpu
needs
update. xpu not need, because xpu memcpy op kernel is
// synchronous.
if
(
platform
::
is_gpu_place
(
place_
)
||
platform
::
is_custom_place
(
place_
))
{
VLOG
(
6
)
<<
"Parse DeviceContext for "
<<
op_type
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
0d45ac73
...
...
@@ -1331,8 +1331,8 @@ void ParallelExecutor::InitExecutorPrivateMemberInfo(
device_name
=
"XPU"
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Only CPU/CUDA/
NPU/
XPU is supportted. "
"please use CPU/CUDA/
NPU/
XPU backend."
));
platform
::
errors
::
Unavailable
(
"Only CPU/CUDA/XPU is supportted. "
"please use CPU/CUDA/XPU backend."
));
}
VLOG
(
1
)
<<
string
::
Sprintf
(
...
...
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
0d45ac73
...
...
@@ -52,12 +52,12 @@ OpSupportedInfos(const std::string& place,
{
"CPU"
,
&
platform
::
is_cpu_place
},
{
"XPU"
,
&
platform
::
is_xpu_place
},
};
PADDLE_ENFORCE_NE
(
is_target_place
.
count
(
query_place
),
0
,
platform
::
errors
::
InvalidArgument
(
"The argument `place` should be 'GPU', 'CPU', 'XPU', "
"'N
PU', but got '%s'."
,
place
));
PADDLE_ENFORCE_NE
(
is_target_place
.
count
(
query_place
)
,
0
,
platform
::
errors
::
InvalidArgument
(
"The argument `place` should be 'GPU', 'CPU', 'X
PU', but got '%s'."
,
place
));
std
::
unordered_set
<
std
::
string
>
all_ops
;
const
auto
&
op_info
=
framework
::
OpInfoMap
::
Instance
().
map
();
...
...
@@ -147,7 +147,7 @@ AmpOperators::AmpOperators()
OpSupportedInfos
(
"GPU"
,
paddle
::
framework
::
proto
::
VarType
::
BF16
));
unsupported_bf16_ops_
->
insert
(
unsupported_ops_gpu_bf16
.
begin
(),
unsupported_ops_gpu_bf16
.
end
());
// NOTE: GPU/
NPU/
XPU is compiled seperatly.
// NOTE: GPU/XPU is compiled seperatly.
#elif defined(PADDLE_WITH_XPU)
auto
unsupported_ops_xpu_fp16
=
std
::
get
<
2
>
(
OpSupportedInfos
(
"XPU"
,
paddle
::
framework
::
proto
::
VarType
::
FP16
));
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
0d45ac73
...
...
@@ -364,10 +364,6 @@ struct Argument {
IpuEnableModelRuntimeExecutor
,
bool
);
// npu related
DECL_ARGUMENT_FIELD
(
use_npu
,
UseNpu
,
bool
);
DECL_ARGUMENT_FIELD
(
npu_device_id
,
NPUDeviceId
,
int
);
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
0d45ac73
...
...
@@ -56,8 +56,6 @@ PassStrategy *AnalysisConfig::pass_builder() const {
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
if
(
use_ipu_
)
{
LOG
(
INFO
)
<<
"Create IPU IR passes"
;
pass_builder_
.
reset
(
new
IpuPassStrategy
);
...
...
@@ -506,8 +504,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_opencl_
);
// NPU related.
CP_MEMBER
(
use_npu_
);
CP_MEMBER
(
npu_device_id_
);
CP_MEMBER
(
nnadapter_config_
);
// profile related.
...
...
@@ -574,9 +570,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
}
else
if
(
use_custom_device_
)
{
pass_builder_
.
reset
(
new
CustomDevicePassStrategy
(
*
static_cast
<
CustomDevicePassStrategy
*>
(
other
.
pass_builder
())));
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
...
...
@@ -827,7 +820,6 @@ void AnalysisConfig::Update() {
// Transfer pass_builder and copy the existing compatible passes.
if
(
!
pass_builder_
||
((
use_gpu
()
^
pass_builder_
->
use_gpu
()))
||
((
use_xpu
()
^
pass_builder_
->
use_xpu
()))
||
((
use_npu
()
^
pass_builder_
->
use_npu
()))
||
((
use_ipu
()
^
pass_builder_
->
use_ipu
()))
||
((
use_custom_device
()
^
pass_builder_
->
use_custom_device
())))
{
if
(
use_gpu
())
{
...
...
@@ -841,13 +833,6 @@ void AnalysisConfig::Update() {
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
...
...
@@ -875,14 +860,6 @@ void AnalysisConfig::Update() {
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_custom_device
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
...
...
@@ -1114,9 +1091,6 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
op_type
;
}
ss
<<
use_npu_
;
ss
<<
npu_device_id_
;
ss
<<
thread_local_stream_
;
ss
<<
use_ipu_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
0d45ac73
...
...
@@ -148,8 +148,8 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
return
phi
::
Backend
::
CUSTOM
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle Inference not support backend, we now only support GPU, XPU
,
"
"
NPU
and CPU."
));
"Paddle Inference not support backend, we now only support GPU, XPU "
"and CPU."
));
return
phi
::
Backend
::
CPU
;
}
}
...
...
@@ -1432,9 +1432,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_
->
SetIpuCustomPatterns
(
config_
.
ipu_custom_patterns_
);
#endif
argument_
->
SetUseNpu
(
config_
.
use_npu_
);
argument_
->
SetNPUDeviceId
(
config_
.
npu_device_id
());
if
(
config_
.
use_mkldnn_
)
{
LOG
(
INFO
)
<<
"MKLDNN is enabled"
;
argument_
->
SetMKLDNNEnabledOpTypes
(
config_
.
mkldnn_enabled_op_types_
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
0d45ac73
...
...
@@ -130,7 +130,7 @@ T *Tensor::mutable_data(PlaceType place) {
}
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Only CPU / CUDA / XPU
/ NPU
places is supported. The place `%d` is "
"Only CPU / CUDA / XPU places is supported. The place `%d` is "
"not supported."
,
static_cast
<
int
>
(
place
)));
break
;
...
...
@@ -261,7 +261,7 @@ void Tensor::CopyFromCpu(const T *data) {
dev_ctx
->
stream
());
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
"The analysis predictor supports CPU, GPU and XPU now."
));
#endif
}
}
...
...
@@ -468,7 +468,7 @@ void Tensor::CopyToCpuImpl(T *data,
dev_ctx
->
GetStream
()
->
Synchronize
();
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
"The analysis predictor supports CPU, GPU and XPU now."
));
#endif
}
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
0d45ac73
...
...
@@ -414,12 +414,6 @@ struct PD_INFER_DECL AnalysisConfig {
/// \return bool Whether the XPU is turned on.
///
bool
use_xpu
()
const
{
return
use_xpu_
;
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
bool
use_npu
()
const
{
return
use_npu_
;
}
/// \brief A boolean state telling whether the IPU is turned on.
///
/// \return bool Whether the IPU is turned on.
...
...
@@ -461,12 +455,6 @@ struct PD_INFER_DECL AnalysisConfig {
/// \return int The XPU device id.
///
int
xpu_device_id
()
const
{
return
xpu_device_id_
;
}
///
/// \brief Get the NPU device id.
///
/// \return int The NPU device id.
///
int
npu_device_id
()
const
{
return
npu_device_id_
;
}
/// \brief Get the number of IPU device .
///
/// \return int The number of IPU device.
...
...
@@ -1083,10 +1071,6 @@ struct PD_INFER_DECL AnalysisConfig {
bool
use_external_stream_
{
false
};
void
*
exec_stream_
{
nullptr
};
// NPU related
bool
use_npu_
{
false
};
int
npu_device_id_
{
0
};
// CustomDevice related
bool
use_custom_device_
{
false
};
int
custom_device_id_
{
0
};
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
0d45ac73
...
...
@@ -360,7 +360,6 @@ struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
/// GPU related fields.
bool
use_xpu
{
false
};
bool
use_gpu
{
false
};
bool
use_npu
{
false
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
-
1.
f
};
///< Change to a float in (0,1] if needed.
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
0d45ac73
...
...
@@ -162,10 +162,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
/// \brief Check if we are using npu.
/// \return A bool variable implying whether we are in npu mode.
bool
use_npu
()
const
{
return
use_npu_
;
}
/// \brief Check if we are using ipu.
/// \return A bool variable implying whether we are in ipu mode.
bool
use_ipu
()
const
{
return
use_ipu_
;
}
...
...
@@ -181,7 +177,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \cond Protected
bool
use_xpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_npu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_custom_device_
{
false
};
...
...
@@ -293,21 +288,6 @@ class PD_INFER_DECL XpuPassStrategy final : public PassStrategy {
XpuPassStrategy
();
};
/// \class NpuPassStrategy
/// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU
/// mode.
class
PD_INFER_DECL
NpuPassStrategy
final
:
public
PassStrategy
{
public:
NpuPassStrategy
()
:
PassStrategy
({})
{
use_npu_
=
true
;
}
/// \brief Construct by copying another NpuPassStrategy object.
/// \param[in] other The NpuPassStrategy object we want to copy.
explicit
NpuPassStrategy
(
const
NpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{
use_npu_
=
true
;
}
};
/// \class CustomDevicePassStrategy
/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor
/// with CustomDevice
...
...
paddle/fluid/inference/capi_exp/pd_config.cc
浏览文件 @
0d45ac73
...
...
@@ -176,11 +176,6 @@ PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
return
config
->
use_xpu
();
}
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
use_npu
();
}
int32_t
PD_ConfigGpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
gpu_device_id
();
...
...
@@ -189,10 +184,6 @@ int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
xpu_device_id
();
}
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
npu_device_id
();
}
void
PD_ConfigEnableCustomDevice
(
__pd_keep
PD_Config
*
pd_config
,
char
*
device_type
,
...
...
paddle/fluid/inference/capi_exp/pd_config.h
浏览文件 @
0d45ac73
...
...
@@ -222,14 +222,6 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseXpu
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \param[in] pd_onfig config
/// \return Whether the NPU is turned on.
///
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the GPU device id.
///
/// \param[in] pd_onfig config
...
...
@@ -246,14 +238,6 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId(
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigXpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the NPU device id.
///
/// \param[in] pd_onfig config
/// \return The NPU device id.
///
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Turn on custome device.
///
/// \param[in] pd_config config
...
...
paddle/fluid/inference/goapi/config.go
浏览文件 @
0d45ac73
...
...
@@ -230,15 +230,6 @@ func (config *Config) UseXpu() bool {
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseXpu
(
config
.
c
))
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
func
(
config
*
Config
)
UseNpu
()
bool
{
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseNpu
(
config
.
c
))
}
///
/// \brief Get the GPU device id.
///
...
...
@@ -257,15 +248,6 @@ func (config *Config) XpuDeviceId() int32 {
return
int32
(
C
.
PD_ConfigXpuDeviceId
(
config
.
c
))
}
///
/// \brief Get the NPU device id.
///
/// \return int32 The NPU device id.
///
func
(
config
*
Config
)
NpuDeviceId
()
int32
{
return
int32
(
C
.
PD_ConfigNpuDeviceId
(
config
.
c
))
}
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
...
...
paddle/fluid/platform/monitor.h
浏览文件 @
0d45ac73
...
...
@@ -190,13 +190,3 @@ class StatRegistry {
USE_INT_STAT(STAT_gpu13_mem_size); \
USE_INT_STAT(STAT_gpu14_mem_size); \
USE_INT_STAT(STAT_gpu15_mem_size)
#define USE_NPU_MEM_STAT \
USE_INT_STAT(STAT_npu0_mem_size); \
USE_INT_STAT(STAT_npu1_mem_size); \
USE_INT_STAT(STAT_npu2_mem_size); \
USE_INT_STAT(STAT_npu3_mem_size); \
USE_INT_STAT(STAT_npu4_mem_size); \
USE_INT_STAT(STAT_npu5_mem_size); \
USE_INT_STAT(STAT_npu6_mem_size); \
USE_INT_STAT(STAT_npu7_mem_size)
paddle/fluid/platform/place.h
浏览文件 @
0d45ac73
...
...
@@ -28,7 +28,6 @@ using Place = phi::Place;
using
CPUPlace
=
phi
::
CPUPlace
;
using
CUDAPlace
=
phi
::
GPUPlace
;
using
CUDAPinnedPlace
=
phi
::
GPUPinnedPlace
;
using
NPUPinnedPlace
=
phi
::
NPUPinnedPlace
;
using
XPUPlace
=
phi
::
XPUPlace
;
using
IPUPlace
=
phi
::
IPUPlace
;
using
CustomPlace
=
phi
::
CustomPlace
;
...
...
@@ -87,11 +86,6 @@ typename Visitor::result_type VisitPlace(const Place &place,
return
typename
Visitor
::
result_type
();
#endif
}
case
phi
::
AllocationType
::
NPUPINNED
:
{
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with NPU. Cannot visit npu_pinned"
));
return
typename
Visitor
::
result_type
();
}
case
phi
::
AllocationType
::
IPU
:
{
#ifdef PADDLE_WITH_IPU
platform
::
IPUPlace
p
(
place
.
GetDeviceId
());
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
0d45ac73
...
...
@@ -673,7 +673,6 @@ void BindNativeConfig(py::module *m) {
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"use_npu"
,
&
NativeConfig
::
use_npu
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"fraction_of_gpu_memory"
,
&
NativeConfig
::
fraction_of_gpu_memory
)
...
...
@@ -805,10 +804,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_ort_optimization"
,
&
AnalysisConfig
::
EnableORTOptimization
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"use_npu"
,
&
AnalysisConfig
::
use_npu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"npu_device_id"
,
&
AnalysisConfig
::
npu_device_id
)
.
def
(
"memory_pool_init_size_mb"
,
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
...
...
paddle/fluid/pybind/place.cc
浏览文件 @
0d45ac73
...
...
@@ -629,7 +629,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
[](
platform
::
Place
&
self
)
{
return
platform
::
is_custom_place
(
self
);
})
.
def
(
"gpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"xpu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"npu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"ipu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
.
def
(
"custom_device_id"
,
[](
platform
::
Place
&
self
)
{
return
self
.
device
;
})
...
...
paddle/phi/backends/cpu/cpu_info.cc
浏览文件 @
0d45ac73
...
...
@@ -110,23 +110,6 @@ size_t CUDAPinnedMaxChunkSize() {
return
CUDAPinnedMaxAllocSize
()
/
256
;
}
size_t
NPUPinnedMaxAllocSize
()
{
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return
FLAGS_fraction_of_cuda_pinned_memory_to_use
*
CpuTotalPhysicalMemory
();
}
size_t
NPUPinnedMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 64 KB.
return
1
<<
16
;
}
size_t
NPUPinnedMaxChunkSize
()
{
// Allow to allocate the maximum chunk size is roughly 1/256 of NPU_PINNED
// memory.
return
NPUPinnedMaxAllocSize
()
/
256
;
}
#ifdef PADDLE_WITH_XBYAK
static
Xbyak
::
util
::
Cpu
cpu
;
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
)
{
...
...
paddle/phi/backends/cpu/cpu_info.h
浏览文件 @
0d45ac73
...
...
@@ -75,15 +75,6 @@ size_t CUDAPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t
CUDAPinnedMaxChunkSize
();
//! Get the maximum allocation size for a machine.
size_t
NPUPinnedMaxAllocSize
();
//! Get the minimum chunk size for buddy allocator.
size_t
NPUPinnedMinChunkSize
();
//! Get the maximum chunk size for buddy allocator.
size_t
NPUPinnedMaxChunkSize
();
typedef
enum
{
isa_any
,
sse42
,
...
...
paddle/phi/common/place.cc
浏览文件 @
0d45ac73
...
...
@@ -35,8 +35,6 @@ const char *AllocationTypeStr(AllocationType type) {
return
"gpu_pinned"
;
case
AllocationType
::
XPU
:
return
"xpu"
;
case
AllocationType
::
NPUPINNED
:
return
"npu_pinned"
;
case
AllocationType
::
IPU
:
return
"ipu"
;
default:
...
...
@@ -55,7 +53,6 @@ std::string Place::DebugString() const {
os
<<
AllocationTypeStr
(
alloc_type_
);
}
if
(
alloc_type_
==
AllocationType
::
GPUPINNED
||
alloc_type_
==
AllocationType
::
NPUPINNED
||
alloc_type_
==
AllocationType
::
CPU
)
{
os
<<
")"
;
}
else
{
...
...
paddle/phi/common/place.h
浏览文件 @
0d45ac73
...
...
@@ -32,7 +32,6 @@ enum class AllocationType : int8_t {
GPUPINNED
=
3
,
XPU
=
4
,
NPU
=
5
,
NPUPINNED
=
6
,
IPU
=
7
,
CUSTOM
=
9
,
};
...
...
@@ -163,15 +162,6 @@ class XPUPlace : public Place {
:
Place
(
AllocationType
::
XPU
,
place
.
GetDeviceId
())
{}
};
class
NPUPinnedPlace
:
public
Place
{
public:
NPUPinnedPlace
()
:
Place
(
AllocationType
::
NPUPINNED
)
{}
NPUPinnedPlace
(
const
NPUPinnedPlace
&
)
=
default
;
NPUPinnedPlace
(
const
Place
&
place
UNUSED
)
// NOLINT
:
Place
(
AllocationType
::
NPUPINNED
)
{}
};
class
IPUPlace
:
public
Place
{
public:
IPUPlace
()
:
Place
(
AllocationType
::
IPU
,
0
)
{}
...
...
paddle/phi/kernels/funcs/math_function.cc
浏览文件 @
0d45ac73
...
...
@@ -161,12 +161,6 @@ void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
#endif
}
template
<
>
void
set_constant_with_place
<
phi
::
NPUPinnedPlace
>
(
const
phi
::
DeviceContext
&
context
,
phi
::
DenseTensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
phi
::
IPUPlace
>
(
const
phi
::
DeviceContext
&
context
,
phi
::
DenseTensor
*
tensor
,
...
...
python/paddle/distributed/fleet/layers/mpu/mp_ops.py
浏览文件 @
0d45ac73
...
...
@@ -15,7 +15,6 @@
import
paddle
from
paddle
import
_legacy_C_ops
from
paddle.distributed
import
collective
from
paddle.fluid
import
core
from
paddle.fluid.data_feeder
import
check_dtype
,
check_variable_and_dtype
from
paddle.framework
import
LayerHelper
,
_create_tensor
,
in_dygraph_mode
from
paddle.nn
import
Layer
...
...
@@ -551,11 +550,7 @@ def _parallel_linear(
)
# NOTE: npu linear function use matmul_v2 but linear use matmul
linear_function
=
(
_linear
if
core
.
is_compiled_with_custom_device
(
'npu'
)
else
paddle
.
nn
.
functional
.
linear
)
linear_function
=
paddle
.
nn
.
functional
.
linear
linear_out
=
linear_function
(
x
,
linear
.
weight
,
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
0d45ac73
...
...
@@ -595,9 +595,6 @@ class ShardingOptimizer(MetaOptimizerBase):
# amp inf_var & clip global_norm_var
rings
=
[
self
.
mp_ring_id
,
self
.
pp_ring_id
]
# FIXME(wangxi): some problem with NPU found_finite, need sync with DP
if
core
.
is_compiled_with_custom_device
(
'npu'
):
rings
+=
[
self
.
dp_ring_id
]
FP16Utils
.
sync_amp_check_nan_inf
(
main_block
,
rings
)
gradientclip_helper
=
GradientClipHelper
(
None
)
...
...
@@ -719,10 +716,7 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
_recreate_not_persist_param_as_var
()
self
.
_dump_program_for_debug
()
# GPU need to wait server ready, GPU and NPU is Layered connection
if
not
core
.
is_compiled_with_custom_device
(
'npu'
):
self
.
_wait
()
self
.
_wait
()
return
optimize_ops
,
params_grads
def
_init_pair_comm
(
self
,
pair
,
ring_id
):
...
...
python/paddle/fluid/executor.py
浏览文件 @
0d45ac73
...
...
@@ -1988,14 +1988,9 @@ class Executor:
for
var
in
program
.
global_block
().
vars
.
values
():
if
var
.
is_data
:
data_vars
.
append
(
var
)
if
core
.
is_compiled_with_custom_device
(
'npu'
):
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'InMemoryDataset'
)
else
:
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_filelist
([
'None'
])
...
...
@@ -2165,14 +2160,9 @@ class Executor:
for
var
in
program
.
global_block
().
vars
.
values
():
if
var
.
is_data
:
data_vars
.
append
(
var
)
if
core
.
is_compiled_with_custom_device
(
'npu'
):
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'InMemoryDataset'
)
else
:
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_filelist
([
'None'
])
...
...
python/paddle/fluid/framework.py
浏览文件 @
0d45ac73
...
...
@@ -597,21 +597,6 @@ def _current_expected_place():
"You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default."
)
_global_expected_place_
=
core
.
CPUPlace
()
elif
core
.
is_compiled_with_custom_device
(
"npu"
):
# TODO(duanyanhui): Optimize DeviceManager and Return all expected places when device registered in DeviceManager is greater than 1.
try
:
device_count
=
core
.
get_custom_device_count
(
"npu"
)
except
Exception
as
e
:
device_count
=
0
if
device_count
>
0
:
_global_expected_place_
=
core
.
CustomPlace
(
"npu"
,
_custom_device_ids
(
"npu"
)[
0
]
)
else
:
warnings
.
warn
(
"You are using NPU version Paddle, but your NPU device is not set properly. CPU device will be used by default."
)
_global_expected_place_
=
core
.
CPUPlace
()
else
:
_global_expected_place_
=
core
.
CPUPlace
()
...
...
@@ -7454,9 +7439,9 @@ def device_guard(device=None):
device
,
index
=
device
.
split
(
':'
)
if
device
==
'cpu'
:
raise
ValueError
(
"Should not set device id for cpu."
)
if
device
not
in
[
'cpu'
,
'gpu'
,
'xpu'
,
'
npu'
,
'
'
,
None
]:
if
device
not
in
[
'cpu'
,
'gpu'
,
'xpu'
,
''
,
None
]:
raise
ValueError
(
"The Attr(device) should be 'cpu'
'npu'
or 'gpu', and it can also be empty string or None "
"The Attr(device) should be 'cpu' or 'gpu', and it can also be empty string or None "
"when there is no need to specify device. But received %s"
%
device
)
if
index
:
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
0d45ac73
...
...
@@ -4554,9 +4554,7 @@ class PipelineOptimizer:
def
__init__
(
self
,
optimizer
,
num_microbatches
=
1
,
start_cpu_core_id
=
0
):
self
.
_device
=
'cpu'
if
core
.
is_compiled_with_custom_device
(
'npu'
):
self
.
_device
=
"npu"
elif
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
self
.
_device
=
"gpu"
if
in_dygraph_mode
():
raise
Exception
(
"In dygraph, don't support PipelineOptimizer."
)
...
...
@@ -4945,8 +4943,8 @@ class PipelineOptimizer:
else
None
)
if
device
:
assert
device
[
0
:
3
]
==
'gpu'
or
device
[
0
:
3
]
==
'npu'
,
(
"Now, only gpu
and npu
devices are "
assert
device
[
0
:
3
]
==
'gpu'
,
(
"Now, only gpu devices are "
"supported in pipeline parallemism."
)
return
device
...
...
@@ -5148,8 +5146,8 @@ class PipelineOptimizer:
continue
dev_type
=
device
.
split
(
':'
)[
0
]
assert
dev_type
==
"gpu"
or
dev_type
==
'npu'
,
(
"Now only gpu
and npu
devices are supported "
assert
dev_type
==
"gpu"
,
(
"Now only gpu devices are supported "
"for pipeline parallelism."
)
...
...
@@ -6388,8 +6386,6 @@ class PipelineOptimizer:
dev_index
=
int
(
dev
.
split
(
":"
)[
1
])
if
core
.
is_compiled_with_cuda
():
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
1
))
elif
paddle
.
is_compiled_with_custom_device
(
'npu'
):
place_list
.
append
(
paddle
.
CustomPlace
(
'npu'
,
dev_index
%
1
))
# Step6: Split startup program
new_startup_program
=
self
.
_split_startup_program
(
...
...
@@ -6412,8 +6408,6 @@ class PipelineOptimizer:
if
core
.
is_compiled_with_cuda
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
elif
core
.
is_compiled_with_custom_device
(
'npu'
):
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
))
# A pass to move the recv op to the beginning of
# the forward/backward phase
self
.
_mv_head_recv
(
program_list
[
self
.
local_rank
])
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
0d45ac73
...
...
@@ -16,7 +16,6 @@ from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode
from
paddle.device
import
(
get_all_custom_device_type
,
is_compiled_with_cuda
,
is_compiled_with_custom_device
,
is_compiled_with_rocm
,
)
from
paddle.fluid.framework
import
_global_flags
,
in_dygraph_mode
...
...
@@ -465,13 +464,6 @@ def conv1d(
l_type
=
'depthwise_conv2d'
use_cudnn
=
False
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
squeeze_aixs
=
-
3
if
channel_last
else
-
2
x
=
unsqueeze
(
x
,
axis
=
[
squeeze_aixs
])
...
...
@@ -755,13 +747,6 @@ def conv2d(
use_mkldnn
=
_global_flags
()[
"FLAGS_use_mkldnn"
]
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
if
(
is_compiled_with_cuda
()
and
get_flags
(
"FLAGS_conv2d_disable_cudnn"
)[
...
...
python/paddle/nn/functional/loss.py
浏览文件 @
0d45ac73
...
...
@@ -16,7 +16,7 @@ import math
# TODO: define loss functions of neural network
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
,
fluid
,
in_dynamic_mode
from
paddle
import
_C_ops
,
fluid
,
in_dynamic_mode
from
paddle.framework
import
core
from
paddle.static.nn.control_flow
import
Assert
from
paddle.utils
import
deprecated
...
...
@@ -269,51 +269,15 @@ def fluid_softmax_with_cross_entropy(
if
input_dims
-
1
==
label_dims
:
label
=
paddle
.
unsqueeze
(
label
,
axis
=
axis
)
if
in_dygraph_mode
():
if
core
.
is_compiled_with_custom_device
(
"npu"
):
if
not
soft_label
:
valid_label
=
(
paddle
.
cast
(
label
!=
ignore_index
,
dtype
=
label
.
dtype
)
*
label
)
softmax
,
loss
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
logits
,
valid_label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
numeric_stable_mode
,
'axis'
,
axis
,
'use_softmax'
,
True
,
)
else
:
softmax
,
loss
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
logits
,
label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
numeric_stable_mode
,
'axis'
,
axis
,
'use_softmax'
,
True
,
)
else
:
softmax
,
loss
=
_C_ops
.
cross_entropy_with_softmax
(
logits
,
label
,
soft_label
,
True
,
numeric_stable_mode
,
ignore_index
,
axis
,
)
softmax
,
loss
=
_C_ops
.
cross_entropy_with_softmax
(
logits
,
label
,
soft_label
,
True
,
numeric_stable_mode
,
ignore_index
,
axis
,
)
if
not
return_softmax
:
return
loss
else
:
...
...
@@ -2734,41 +2698,9 @@ def cross_entropy(
valid_label
=
(
paddle
.
cast
(
label
!=
ignore_index
,
dtype
=
label
.
dtype
)
*
label
)
if
core
.
is_compiled_with_custom_device
(
"npu"
):
if
not
soft_label
:
_
,
out
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
input
,
valid_label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
True
,
'axis'
,
axis
,
'use_softmax'
,
use_softmax
,
)
else
:
_
,
out
=
_legacy_C_ops
.
softmax_with_cross_entropy
(
input
,
label
,
'soft_label'
,
soft_label
,
'ignore_index'
,
ignore_index
,
'numeric_stable_mode'
,
True
,
'axis'
,
axis
,
'use_softmax'
,
use_softmax
,
)
else
:
_
,
out
=
_C_ops
.
cross_entropy_with_softmax
(
input
,
label
,
soft_label
,
use_softmax
,
True
,
ignore_index
,
axis
)
_
,
out
=
_C_ops
.
cross_entropy_with_softmax
(
input
,
label
,
soft_label
,
use_softmax
,
True
,
ignore_index
,
axis
)
if
weight
is
not
None
:
...
...
python/paddle/static/amp/decorator.py
浏览文件 @
0d45ac73
...
...
@@ -220,24 +220,7 @@ class OptimizerWithMixedPrecision:
"""
train_program
=
loss
.
block
.
program
self
.
_train_program
=
train_program
# NOTE(zhiqiu): _float_status is only used for NPU.
if
core
.
is_compiled_with_custom_device
(
'npu'
):
float_status
=
paddle
.
static
.
data
(
name
=
"float_status"
,
shape
=
[
8
],
dtype
=
'float32'
)
self
.
_train_program
.
global_block
().
append_op
(
type
=
"alloc_float_status"
,
outputs
=
{
"FloatStatus"
:
float_status
},
)
self
.
_train_program
.
global_block
().
append_op
(
type
=
"clear_float_status"
,
inputs
=
{
"FloatStatus"
:
float_status
},
outputs
=
{
"FloatStatusOut"
:
float_status
},
)
self
.
_float_status
=
float_status
else
:
self
.
_float_status
=
None
self
.
_float_status
=
None
with
program_guard
(
self
.
_train_program
,
startup_program
):
self
.
_init_amp_var
()
...
...
@@ -476,27 +459,17 @@ class OptimizerWithMixedPrecision:
if
self
.
_is_distributed
:
# if distributed, split check_finite_and_unscale to overlap
# unscale with communication
if
core
.
is_compiled_with_custom_device
(
'npu'
)
:
with
self
.
_train_program
.
_optimized_guard
(
grads
):
for
p
,
g
in
params_grads
:
with
self
.
_train_program
.
_optimized_guard
(
[
p
,
g
]
):
_
,
found_inf
=
check_finite_and_unscale
(
grads
,
[
g
,
],
self
.
_loss_scaling
,
name
=
"find_infinite_scale"
,
float_status
=
self
.
_float_status
,
)
found_infs
.
append
(
found_inf
)
else
:
for
p
,
g
in
params_grads
:
with
self
.
_train_program
.
_optimized_guard
([
p
,
g
]):
_
,
found_inf
=
check_finite_and_unscale
(
[
g
,
],
self
.
_loss_scaling
,
name
=
"find_infinite_scale"
,
float_status
=
self
.
_float_status
,
)
found_infs
.
append
(
found_inf
)
elif
self
.
_use_pure_fp16
:
if
fp32_grads
:
with
self
.
_train_program
.
_optimized_guard
(
fp32_grads
):
...
...
python/paddle/static/amp/fp16_lists.py
浏览文件 @
0d45ac73
...
...
@@ -97,8 +97,6 @@ def _get_sys_unsupported_list(dtype):
device
=
None
if
core
.
is_compiled_with_xpu
():
device
=
'XPU'
elif
core
.
is_compiled_with_custom_device
(
'npu'
):
device
=
'NPU'
else
:
device
=
'GPU'
_
,
_
,
sys_unsupported_list
=
core
.
op_supported_infos
(
device
,
var_type
)
...
...
python/paddle/static/nn/common.py
浏览文件 @
0d45ac73
...
...
@@ -948,13 +948,6 @@ def conv2d(
):
l_type
=
'depthwise_conv2d'
# NPU only supports depthwise_conv2d when "input_channel = output_channel = groups"
if
core
.
is_compiled_with_custom_device
(
'npu'
):
if
num_channels
==
groups
and
num_channels
==
num_filters
:
l_type
=
'depthwise_conv2d'
else
:
l_type
=
'conv2d'
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
...
...
tools/timeline.py
浏览文件 @
0d45ac73
...
...
@@ -212,12 +212,6 @@ class Timeline:
self
.
_chrome_trace
.
emit_pid
(
"memory usage on %s:cudapinnedplace:%d"
%
(
k
,
0
),
pid
)
if
(
k
,
0
,
"NPU"
)
not
in
self
.
_mem_devices
:
pid
=
self
.
_allocate_pid
()
self
.
_mem_devices
[(
k
,
0
,
"NPU"
)]
=
pid
self
.
_chrome_trace
.
emit_pid
(
"memory usage on %s:npu:%d"
%
(
k
,
0
),
pid
)
def
_allocate_events
(
self
):
for
k
,
profile_pb
in
self
.
_profile_dict
.
items
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录