Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
ERNIE
提交
ad3547c0
E
ERNIE
项目概览
PaddlePaddle
/
ERNIE
大约 1 年 前同步成功
通知
109
Star
5997
Fork
1270
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
29
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
ERNIE
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
29
Issue
29
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ad3547c0
编写于
5月 17, 2019
作者:
C
chengduozh
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add multi process implementation
上级
3a543ca8
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
68 addition
and
2 deletion
+68
-2
BERT/dist_utils.py
BERT/dist_utils.py
+45
-0
BERT/run_classifier.py
BERT/run_classifier.py
+23
-2
未找到文件。
BERT/dist_utils.py
0 → 100644
浏览文件 @
ad3547c0
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
paddle.fluid
as
fluid
def
nccl2_prepare
(
trainer_id
,
startup_prog
,
main_prog
):
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
mode
=
"nccl2"
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
trainer_id
,
trainers
=
os
.
environ
.
get
(
'PADDLE_TRAINER_ENDPOINTS'
),
current_endpoint
=
os
.
environ
.
get
(
'PADDLE_CURRENT_ENDPOINT'
),
startup_program
=
startup_prog
,
program
=
main_prog
)
def
prepare_for_multi_process
(
exe
,
build_strategy
,
train_prog
,
startup_prog
):
# prepare for multi-process
trainer_id
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINER_ID'
,
0
))
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
print
(
"PADDLE_TRAINERS_NUM"
,
num_trainers
)
print
(
"PADDLE_TRAINER_ID"
,
trainer_id
)
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
trainer_id
=
trainer_id
# NOTE(zcd): use multi processes to train the model,
# and each process use one GPU card.
if
num_trainers
>
1
:
nccl2_prepare
(
trainer_id
,
startup_prog
,
train_prog
)
# the startup_prog are run two times, but it doesn't matter.
exe
.
run
(
startup_prog
)
BERT/run_classifier.py
浏览文件 @
ad3547c0
...
...
@@ -32,6 +32,8 @@ from model.classifier import create_model
from
optimization
import
optimization
from
utils.args
import
ArgumentGroup
,
print_arguments
from
utils.init
import
init_pretraining_params
,
init_checkpoint
import
dist_utils
# yapf: disable
parser
=
argparse
.
ArgumentParser
(
__doc__
)
...
...
@@ -107,6 +109,21 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
(
eval_phase
,
np
.
sum
(
total_cost
)
/
np
.
sum
(
total_num_seqs
),
np
.
sum
(
total_acc
)
/
np
.
sum
(
total_num_seqs
),
time_end
-
time_begin
))
def
get_device_num
():
visible_device
=
os
.
getenv
(
'CUDA_VISIBLE_DEVICES'
)
# NOTE(zcd): use multi processes to train the model,
# and each process use one GPU card.
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
if
num_trainers
>
1
:
return
1
if
visible_device
:
device_num
=
len
(
visible_device
.
split
(
','
))
else
:
device_num
=
subprocess
.
check_output
([
'nvidia-smi'
,
'-L'
]).
decode
().
count
(
'
\n
'
)
return
device_num
def
update_lr
(
args
):
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
args
.
learning_rate
=
args
.
learning_rate
/
num_trainers
def
main
(
args
):
bert_config
=
BertConfig
(
args
.
bert_config_path
)
...
...
@@ -114,12 +131,14 @@ def main(args):
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
int
(
os
.
getenv
(
'FLAGS_selected_gpus'
,
'0'
)))
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
dev_count
=
get_device_num
()
#
fluid.core.get_cuda_device_count()
else
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exe
=
fluid
.
Executor
(
place
)
update_lr
(
args
)
task_name
=
args
.
task_name
.
lower
()
processors
=
{
'xnli'
:
reader
.
XnliProcessor
,
...
...
@@ -250,7 +269,9 @@ def main(args):
exec_strategy
.
num_threads
=
dev_count
exec_strategy
.
num_iteration_per_drop_scope
=
args
.
num_iteration_per_drop_scope
build_strategy
=
fluid
.
BuildStrategy
()
dist_utils
.
prepare_for_multi_process
(
exe
,
build_strategy
,
train_program
,
startup_prog
)
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_cuda
,
loss_name
=
loss
.
name
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录