Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
3ab39705
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3ab39705
编写于
3月 29, 2021
作者:
A
An Improved PeleeNet Algorithm with Feature Pyramid Networks for Image Detection
提交者:
GitHub
3月 29, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adapter npu (#31926)
Co-authored-by:
N
baiyangfan
<
baiyangfan@baidu.com
>
上级
ac89174e
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
27 addition
and
9 deletion
+27
-9
paddle/fluid/framework/device_worker.h
paddle/fluid/framework/device_worker.h
+1
-1
paddle/fluid/framework/device_worker_factory.cc
paddle/fluid/framework/device_worker_factory.cc
+1
-1
paddle/fluid/framework/pipeline_trainer.cc
paddle/fluid/framework/pipeline_trainer.cc
+5
-1
paddle/fluid/framework/section_worker.cc
paddle/fluid/framework/section_worker.cc
+1
-1
paddle/fluid/framework/trainer.h
paddle/fluid/framework/trainer.h
+1
-1
paddle/fluid/operators/cast_op_npu.cc
paddle/fluid/operators/cast_op_npu.cc
+1
-0
paddle/fluid/operators/expand_op_npu.cc
paddle/fluid/operators/expand_op_npu.cc
+1
-0
paddle/fluid/operators/lookup_table_v2_op_npu.cc
paddle/fluid/operators/lookup_table_v2_op_npu.cc
+2
-0
paddle/fluid/operators/slice_op_npu.cc
paddle/fluid/operators/slice_op_npu.cc
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+3
-1
python/paddle/fluid/device_worker.py
python/paddle/fluid/device_worker.py
+1
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+8
-2
未找到文件。
paddle/fluid/framework/device_worker.h
浏览文件 @
3ab39705
...
...
@@ -634,7 +634,7 @@ class PSGPUWorker : public HogwildWorker {
};
#endif
#if
defined(PADDLE_WITH_NC
CL)
#if
(defined PADDLE_WITH_NCCL) || (defined WITH_ASCEND_
CL)
class
SectionWorker
:
public
DeviceWorker
{
public:
SectionWorker
()
{}
...
...
paddle/fluid/framework/device_worker_factory.cc
浏览文件 @
3ab39705
...
...
@@ -76,7 +76,7 @@ REGISTER_DEVICE_WORKER_CLASS(HeterBoxWorker);
REGISTER_DEVICE_WORKER_CLASS
(
PSGPUWorker
);
#endif
#if
defined(PADDLE_WITH_NC
CL)
#if
(defined PADDLE_WITH_NCCL) || (defined WITH_ASCEND_
CL)
REGISTER_DEVICE_WORKER_CLASS
(
SectionWorker
);
#endif
}
// namespace framework
...
...
paddle/fluid/framework/pipeline_trainer.cc
浏览文件 @
3ab39705
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#if
defined(PADDLE_WITH_NC
CL)
#if
(defined PADDLE_WITH_NCCL) || (defined WITH_ASCEND_
CL)
#include <map>
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/device_worker_factory.h"
...
...
@@ -35,7 +35,11 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
ParseDumpConfig
(
trainer_desc
);
const
auto
&
section_config
=
section_params
.
section_config
();
int
place_id
=
section_config
.
place_id
();
#if (defined PADDLE_WITH_NCCL)
place_
=
platform
::
CUDAPlace
(
place_id
);
#elif (defined WITH_ASCEND_CL)
place_
=
platform
::
NPUPlace
(
place_id
);
#endif
worker_
=
DeviceWorkerFactory
::
CreateDeviceWorker
(
trainer_desc
.
device_worker_name
());
auto
this_worker
=
...
...
paddle/fluid/framework/section_worker.cc
浏览文件 @
3ab39705
...
...
@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if
defined(PADDLE_WITH_NC
CL)
#if
(defined PADDLE_WITH_NCCL) || (defined WITH_ASCEND_
CL)
#include <float.h>
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
...
...
paddle/fluid/framework/trainer.h
浏览文件 @
3ab39705
...
...
@@ -320,7 +320,7 @@ class PSGPUTrainer : public TrainerBase {
};
#endif
#if
defined(PADDLE_WITH_NC
CL)
#if
(defined PADDLE_WITH_NCCL) || (defined WITH_ASCEND_
CL)
class
PipelineTrainer
:
public
TrainerBase
{
public:
PipelineTrainer
()
{}
...
...
paddle/fluid/operators/cast_op_npu.cc
浏览文件 @
3ab39705
...
...
@@ -83,6 +83,7 @@ REGISTER_OP_NPU_KERNEL(
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int16_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int32_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int64_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
bool
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
double
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/expand_op_npu.cc
浏览文件 @
3ab39705
...
...
@@ -76,6 +76,7 @@ class ExpandNPUKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_NPU_KERNEL
(
expand
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
...
...
paddle/fluid/operators/lookup_table_v2_op_npu.cc
浏览文件 @
3ab39705
...
...
@@ -82,9 +82,11 @@ namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL
(
lookup_table_v2
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
lookup_table_v2_grad
,
ops
::
LookupTableV2GradNPUKernel
<
float
>
,
ops
::
LookupTableV2GradNPUKernel
<
int
>
,
ops
::
LookupTableV2GradNPUKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/slice_op_npu.cc
浏览文件 @
3ab39705
...
...
@@ -124,11 +124,13 @@ namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL
(
slice
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
slice_grad
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
3ab39705
...
...
@@ -103,6 +103,8 @@ class ShardingOptimizer(MetaOptimizerBase):
self
.
pp_bz
=
self
.
user_defined_strategy
.
sharding_configs
[
"pp_bz"
]
self
.
pp_allreduce_in_optimize
=
self
.
user_defined_strategy
.
sharding_configs
[
"pp_allreduce_in_optimize"
]
self
.
optimize_offload
=
self
.
user_defined_strategy
.
sharding_configs
[
"optimize_offload"
]
if
self
.
inner_opt
is
None
:
raise
ValueError
(
...
...
@@ -238,7 +240,7 @@ class ShardingOptimizer(MetaOptimizerBase):
#check_allreduce_sum(main_block, self._shard, self.sharding_ring_id,
# self.dp_ring_id)
#check_allreduce_sum(main_block, self._shard, self.dp_ring_id)
self
.
_wait
()
#
self._wait()
return
optimize_ops
,
params_grads
def
_set_up
(
self
,
params_grads
):
...
...
python/paddle/fluid/device_worker.py
浏览文件 @
3ab39705
...
...
@@ -424,7 +424,7 @@ class Section(DeviceWorker):
# cfg.program_desc.CopyFrom(program.program._get_desc())
place
=
pipeline_opt
[
"place"
]
place_id
=
pipeline_opt
[
"place_id"
]
assert
isinstance
(
place
,
core
.
CUDAPlace
)
#
assert isinstance(place, core.CUDAPlace)
cfg
.
place
=
cfg
.
CUDAPlace
cfg
.
place_id
=
place_id
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
3ab39705
...
...
@@ -5272,7 +5272,10 @@ class PipelineOptimizer(object):
place_list
=
[]
for
dev
in
device_list
:
dev_index
=
int
(
dev
.
split
(
":"
)[
1
])
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
8
))
if
core
.
is_compiled_with_cuda
():
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
1
))
elif
core
.
is_compiled_with_npu
():
place_list
.
append
(
core
.
NPUPlace
(
dev_index
%
1
))
# Step6: Split startup program
new_startup_program
=
self
.
_split_startup_program
(
startup_program
,
...
...
@@ -5295,7 +5298,10 @@ class PipelineOptimizer(object):
self
.
_accumulate_gradients
(
real_block
)
real_block
.
_sync_with_cpp
()
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
if
core
.
is_compiled_with_cuda
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
elif
core
.
is_compiled_with_npu
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
))
main_program
.
_pipeline_opt
=
{
"trainer"
:
"PipelineTrainer"
,
"device_worker"
:
"Section"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录