Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0e74eea2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0e74eea2
编写于
4月 23, 2021
作者:
B
Baibaifan
提交者:
GitHub
4月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
solve hccl communicate conflict (#32447)
solve hccl communicate conflict (#32447)
上级
2b108a04
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
47 addition
and
13 deletion
+47
-13
paddle/fluid/framework/device_worker.h
paddle/fluid/framework/device_worker.h
+2
-1
paddle/fluid/framework/device_worker_factory.cc
paddle/fluid/framework/device_worker_factory.cc
+2
-1
paddle/fluid/framework/pipeline_trainer.cc
paddle/fluid/framework/pipeline_trainer.cc
+6
-1
paddle/fluid/framework/section_worker.cc
paddle/fluid/framework/section_worker.cc
+2
-1
paddle/fluid/framework/trainer.h
paddle/fluid/framework/trainer.h
+2
-1
paddle/fluid/operators/cast_op_npu.cc
paddle/fluid/operators/cast_op_npu.cc
+1
-0
paddle/fluid/operators/expand_op_npu.cc
paddle/fluid/operators/expand_op_npu.cc
+1
-0
paddle/fluid/operators/lookup_table_v2_op_npu.cc
paddle/fluid/operators/lookup_table_v2_op_npu.cc
+2
-0
paddle/fluid/operators/slice_op_npu.cc
paddle/fluid/operators/slice_op_npu.cc
+2
-0
python/paddle/distributed/fleet/meta_optimizers/common.py
python/paddle/distributed/fleet/meta_optimizers/common.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
...e/distributed/fleet/meta_optimizers/pipeline_optimizer.py
+1
-0
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+2
-2
python/paddle/fluid/device_worker.py
python/paddle/fluid/device_worker.py
+4
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+6
-2
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+8
-2
python/paddle/fluid/transpiler/collective.py
python/paddle/fluid/transpiler/collective.py
+3
-0
python/paddle/hapi/model.py
python/paddle/hapi/model.py
+1
-1
未找到文件。
paddle/fluid/framework/device_worker.h
浏览文件 @
0e74eea2
...
...
@@ -638,7 +638,8 @@ class PSGPUWorker : public HogwildWorker {
};
#endif
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
class
SectionWorker
:
public
DeviceWorker
{
public:
SectionWorker
()
{}
...
...
paddle/fluid/framework/device_worker_factory.cc
浏览文件 @
0e74eea2
...
...
@@ -79,7 +79,8 @@ REGISTER_DEVICE_WORKER_CLASS(HeterBoxWorker);
REGISTER_DEVICE_WORKER_CLASS
(
PSGPUWorker
);
#endif
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
REGISTER_DEVICE_WORKER_CLASS
(
SectionWorker
);
#endif
}
// namespace framework
...
...
paddle/fluid/framework/pipeline_trainer.cc
浏览文件 @
0e74eea2
...
...
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h"
...
...
@@ -34,7 +35,11 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
ParseDumpConfig
(
trainer_desc
);
const
auto
&
section_config
=
section_params
.
section_config
();
int
place_id
=
section_config
.
place_id
();
#if (defined PADDLE_WITH_NCCL)
place_
=
platform
::
CUDAPlace
(
place_id
);
#elif (defined WITH_ASCEND_CL)
place_
=
platform
::
NPUPlace
(
place_id
);
#endif
worker_
=
DeviceWorkerFactory
::
CreateDeviceWorker
(
trainer_desc
.
device_worker_name
());
auto
this_worker
=
...
...
paddle/fluid/framework/section_worker.cc
浏览文件 @
0e74eea2
...
...
@@ -9,7 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
#include <float.h>
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
...
...
paddle/fluid/framework/trainer.h
浏览文件 @
0e74eea2
...
...
@@ -332,7 +332,8 @@ class PSGPUTrainer : public TrainerBase {
};
#endif
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
class
PipelineTrainer
:
public
TrainerBase
{
public:
PipelineTrainer
()
{}
...
...
paddle/fluid/operators/cast_op_npu.cc
浏览文件 @
0e74eea2
...
...
@@ -92,6 +92,7 @@ REGISTER_OP_NPU_KERNEL(
cast
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int16_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int32_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int64_t
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
bool
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
double
>
,
ops
::
CastNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/expand_op_npu.cc
浏览文件 @
0e74eea2
...
...
@@ -79,6 +79,7 @@ class ExpandNPUKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_NPU_KERNEL
(
expand
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
ExpandNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
...
...
paddle/fluid/operators/lookup_table_v2_op_npu.cc
浏览文件 @
0e74eea2
...
...
@@ -86,9 +86,11 @@ namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL
(
lookup_table_v2
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
LookupTableV2NPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
lookup_table_v2_grad
,
ops
::
LookupTableV2GradNPUKernel
<
float
>
,
ops
::
LookupTableV2GradNPUKernel
<
int
>
,
ops
::
LookupTableV2GradNPUKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/slice_op_npu.cc
浏览文件 @
0e74eea2
...
...
@@ -124,11 +124,13 @@ namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL
(
slice
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
SliceNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
slice_grad
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
SliceGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
python/paddle/distributed/fleet/meta_optimizers/common.py
浏览文件 @
0e74eea2
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
from
__future__
import
print_function
import
os
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
,
unique_name
...
...
@@ -77,6 +78,7 @@ class CollectiveHelper(object):
nranks
=
len
(
endpoints
)
other_endpoints
=
endpoints
[:]
other_endpoints
.
remove
(
current_endpoint
)
if
rank
==
0
and
wait_port
:
wait_server_ready
(
other_endpoints
)
...
...
python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
浏览文件 @
0e74eea2
...
...
@@ -13,6 +13,7 @@
from
__future__
import
print_function
from
__future__
import
division
import
os
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
,
unique_name
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
0e74eea2
...
...
@@ -365,8 +365,8 @@ class ShardingOptimizer(MetaOptimizerBase):
'w'
)
as
f
:
f
.
writelines
(
str
(
main_block
.
program
))
self
.
_wait
()
if
core
.
is_compiled_with_cuda
():
self
.
_wait
()
return
optimize_ops
,
params_grads
def
_init_comm
(
self
):
...
...
python/paddle/fluid/device_worker.py
浏览文件 @
0e74eea2
...
...
@@ -433,7 +433,10 @@ class Section(DeviceWorker):
# cfg.program_desc.CopyFrom(program.program._get_desc())
place
=
pipeline_opt
[
"place"
]
place_id
=
pipeline_opt
[
"place_id"
]
assert
isinstance
(
place
,
core
.
CUDAPlace
)
if
core
.
is_compiled_with_cuda
():
assert
isinstance
(
place
,
core
.
CUDAPlace
)
elif
core
.
is_compiled_with_npu
():
assert
isinstance
(
place
,
core
.
NPUPlace
)
cfg
.
place
=
cfg
.
CUDAPlace
cfg
.
place_id
=
place_id
...
...
python/paddle/fluid/executor.py
浏览文件 @
0e74eea2
...
...
@@ -1451,8 +1451,12 @@ class Executor(object):
for
var
in
program
.
global_block
().
vars
.
values
():
if
var
.
is_data
:
data_vars
.
append
(
var
)
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
if
core
.
is_compiled_with_npu
():
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'InMemoryDataset'
)
else
:
dataset
=
paddle
.
fluid
.
DatasetFactory
().
create_dataset
(
'FileInstantDataset'
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
1
)
dataset
.
set_filelist
([
'None'
])
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
0e74eea2
...
...
@@ -4818,7 +4818,10 @@ class PipelineOptimizer(object):
place_list
=
[]
for
dev
in
device_list
:
dev_index
=
int
(
dev
.
split
(
":"
)[
1
])
place_list
.
append
(
core
.
CUDAPlace
(
0
))
if
core
.
is_compiled_with_cuda
():
place_list
.
append
(
core
.
CUDAPlace
(
dev_index
%
1
))
elif
core
.
is_compiled_with_npu
():
place_list
.
append
(
core
.
NPUPlace
(
dev_index
%
1
))
# Step6: Split startup program
new_startup_program
=
self
.
_split_startup_program
(
startup_program
,
...
...
@@ -4837,7 +4840,10 @@ class PipelineOptimizer(object):
self
.
_accumulate_gradients
(
real_block
)
real_block
.
_sync_with_cpp
()
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
if
core
.
is_compiled_with_cuda
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
elif
core
.
is_compiled_with_npu
():
place_id
=
int
(
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
))
main_program
.
_pipeline_opt
=
{
"trainer"
:
"PipelineTrainer"
,
"device_worker"
:
"Section"
,
...
...
python/paddle/fluid/transpiler/collective.py
浏览文件 @
0e74eea2
...
...
@@ -17,6 +17,7 @@ from __future__ import print_function
import
sys
import
math
from
functools
import
reduce
import
os
import
collections
import
six
...
...
@@ -101,6 +102,8 @@ class Collective(object):
nranks
=
len
(
endpoints
)
other_endpoints
=
endpoints
[:]
other_endpoints
.
remove
(
current_endpoint
)
block
=
program
.
global_block
()
if
rank
==
0
and
wait_port
:
wait_server_ready
(
other_endpoints
)
...
...
python/paddle/hapi/model.py
浏览文件 @
0e74eea2
...
...
@@ -133,9 +133,9 @@ def init_communicator(program, rank, nranks, wait_port, current_endpoint,
return
other_endpoints
=
endpoints
[:]
other_endpoints
.
remove
(
current_endpoint
)
block
=
program
.
global_block
()
if
rank
==
0
and
wait_port
:
wait_server_ready
(
other_endpoints
)
block
=
program
.
global_block
()
if
core
.
is_compiled_with_cuda
():
nccl_id_var
=
block
.
create_var
(
name
=
fluid
.
unique_name
.
generate
(
'nccl_id'
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录