未验证 提交 fecd1a3e 编写于 作者: C cifar10 提交者: GitHub

[MLU] add mlu detection config for develop branch (#7115)

上级 88e75af9
use_gpu: true use_gpu: true
use_xpu: false use_xpu: false
use_mlu: false
log_iter: 20 log_iter: 20
save_dir: output save_dir: output
snapshot_epoch: 1 snapshot_epoch: 1
......
...@@ -411,14 +411,14 @@ class Trainer(object): ...@@ -411,14 +411,14 @@ class Trainer(object):
model = self.model model = self.model
sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
self.cfg.use_gpu and self._nranks > 1) (self.cfg.use_gpu or self.cfg.use_mlu) and self._nranks > 1)
if sync_bn: if sync_bn:
model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
# enabel auto mixed precision mode # enabel auto mixed precision mode
if self.use_amp: if self.use_amp:
scaler = paddle.amp.GradScaler( scaler = paddle.amp.GradScaler(
enable=self.cfg.use_gpu or self.cfg.use_npu, enable=self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu,
init_loss_scaling=self.cfg.get('init_loss_scaling', 1024)) init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
# get distributed model # get distributed model
if self.cfg.get('fleet', False): if self.cfg.get('fleet', False):
...@@ -474,7 +474,7 @@ class Trainer(object): ...@@ -474,7 +474,7 @@ class Trainer(object):
DataParallel) and use_fused_allreduce_gradients: DataParallel) and use_fused_allreduce_gradients:
with model.no_sync(): with model.no_sync():
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
enable=self.cfg.use_gpu, enable=self.cfg.use_gpu or self.cfg.use_mlu,
custom_white_list=self.custom_white_list, custom_white_list=self.custom_white_list,
custom_black_list=self.custom_black_list, custom_black_list=self.custom_black_list,
level=self.amp_level): level=self.amp_level):
...@@ -488,7 +488,7 @@ class Trainer(object): ...@@ -488,7 +488,7 @@ class Trainer(object):
list(model.parameters()), None) list(model.parameters()), None)
else: else:
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
enable=self.cfg.use_gpu, enable=self.cfg.use_gpu or self.cfg.use_mlu,
custom_white_list=self.custom_white_list, custom_white_list=self.custom_white_list,
custom_black_list=self.custom_black_list, custom_black_list=self.custom_black_list,
level=self.amp_level): level=self.amp_level):
...@@ -602,7 +602,7 @@ class Trainer(object): ...@@ -602,7 +602,7 @@ class Trainer(object):
# forward # forward
if self.use_amp: if self.use_amp:
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
enable=self.cfg.use_gpu, enable=self.cfg.use_gpu or self.cfg.use_mlu,
custom_white_list=self.custom_white_list, custom_white_list=self.custom_white_list,
custom_black_list=self.custom_black_list, custom_black_list=self.custom_black_list,
level=self.amp_level): level=self.amp_level):
...@@ -669,7 +669,7 @@ class Trainer(object): ...@@ -669,7 +669,7 @@ class Trainer(object):
# forward # forward
if self.use_amp: if self.use_amp:
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
enable=self.cfg.use_gpu, enable=self.cfg.use_gpu or self.cfg.use_mlu,
custom_white_list=self.custom_white_list, custom_white_list=self.custom_white_list,
custom_black_list=self.custom_black_list, custom_black_list=self.custom_black_list,
level=self.amp_level): level=self.amp_level):
......
...@@ -26,10 +26,30 @@ from .logger import setup_logger ...@@ -26,10 +26,30 @@ from .logger import setup_logger
logger = setup_logger(__name__) logger = setup_logger(__name__)
__all__ = [ __all__ = [
'check_gpu', 'check_npu', 'check_xpu', 'check_version', 'check_config' 'check_gpu', 'check_npu', 'check_xpu', 'check_mlu', 'check_version',
'check_config'
] ]
def check_mlu(use_mlu):
"""
Log error and exit when set use_mlu=true in paddlepaddle
cpu/gpu/xpu/npu version.
"""
err = "Config use_mlu cannot be set as true while you are " \
"using paddlepaddle cpu/gpu/xpu/npu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-mlu to run model on MLU \n" \
"\t2. Set use_mlu as false in config file to run " \
"model on CPU/GPU/XPU/NPU"
try:
if use_mlu and not paddle.is_compiled_with_mlu():
logger.error(err)
sys.exit(1)
except Exception as e:
pass
def check_npu(use_npu): def check_npu(use_npu):
""" """
Log error and exit when set use_npu=true in paddlepaddle Log error and exit when set use_npu=true in paddlepaddle
......
...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') ...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
import paddle import paddle
from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import load_config, merge_config
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser, merge_args from ppdet.utils.cli import ArgsParser, merge_args
from ppdet.engine import Trainer, init_parallel_env from ppdet.engine import Trainer, init_parallel_env
from ppdet.metrics.coco_utils import json_eval_results from ppdet.metrics.coco_utils import json_eval_results
...@@ -171,12 +171,18 @@ def main(): ...@@ -171,12 +171,18 @@ def main():
if 'use_gpu' not in cfg: if 'use_gpu' not in cfg:
cfg.use_gpu = False cfg.use_gpu = False
# disable mlu in config by default
if 'use_mlu' not in cfg:
cfg.use_mlu = False
if cfg.use_gpu: if cfg.use_gpu:
place = paddle.set_device('gpu') place = paddle.set_device('gpu')
elif cfg.use_npu: elif cfg.use_npu:
place = paddle.set_device('npu') place = paddle.set_device('npu')
elif cfg.use_xpu: elif cfg.use_xpu:
place = paddle.set_device('xpu') place = paddle.set_device('xpu')
elif cfg.use_mlu:
place = paddle.set_device('mlu')
else: else:
place = paddle.set_device('cpu') place = paddle.set_device('cpu')
...@@ -187,6 +193,7 @@ def main(): ...@@ -187,6 +193,7 @@ def main():
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_npu(cfg.use_npu) check_npu(cfg.use_npu)
check_xpu(cfg.use_xpu) check_xpu(cfg.use_xpu)
check_mlu(cfg.use_mlu)
check_version() check_version()
run(FLAGS, cfg) run(FLAGS, cfg)
......
...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') ...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
import paddle import paddle
from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import load_config, merge_config
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
from ppdet.engine import Tracker from ppdet.engine import Tracker
...@@ -115,12 +115,18 @@ def main(): ...@@ -115,12 +115,18 @@ def main():
if 'use_gpu' not in cfg: if 'use_gpu' not in cfg:
cfg.use_gpu = False cfg.use_gpu = False
# disable mlu in config by default
if 'use_mlu' not in cfg:
cfg.use_mlu = False
if cfg.use_gpu: if cfg.use_gpu:
place = paddle.set_device('gpu') place = paddle.set_device('gpu')
elif cfg.use_npu: elif cfg.use_npu:
place = paddle.set_device('npu') place = paddle.set_device('npu')
elif cfg.use_xpu: elif cfg.use_xpu:
place = paddle.set_device('xpu') place = paddle.set_device('xpu')
elif cfg.use_mlu:
place = paddle.set_device('mlu')
else: else:
place = paddle.set_device('cpu') place = paddle.set_device('cpu')
...@@ -128,6 +134,7 @@ def main(): ...@@ -128,6 +134,7 @@ def main():
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_npu(cfg.use_npu) check_npu(cfg.use_npu)
check_xpu(cfg.use_xpu) check_xpu(cfg.use_xpu)
check_mlu(cfg.use_mlu)
check_version() check_version()
run(FLAGS, cfg) run(FLAGS, cfg)
......
...@@ -32,7 +32,7 @@ import ast ...@@ -32,7 +32,7 @@ import ast
import paddle import paddle
from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import load_config, merge_config
from ppdet.engine import Trainer from ppdet.engine import Trainer
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser, merge_args from ppdet.utils.cli import ArgsParser, merge_args
from ppdet.slim import build_slim_model from ppdet.slim import build_slim_model
...@@ -204,12 +204,18 @@ def main(): ...@@ -204,12 +204,18 @@ def main():
if 'use_gpu' not in cfg: if 'use_gpu' not in cfg:
cfg.use_gpu = False cfg.use_gpu = False
# disable mlu in config by default
if 'use_mlu' not in cfg:
cfg.use_mlu = False
if cfg.use_gpu: if cfg.use_gpu:
place = paddle.set_device('gpu') place = paddle.set_device('gpu')
elif cfg.use_npu: elif cfg.use_npu:
place = paddle.set_device('npu') place = paddle.set_device('npu')
elif cfg.use_xpu: elif cfg.use_xpu:
place = paddle.set_device('xpu') place = paddle.set_device('xpu')
elif cfg.use_mlu:
place = paddle.set_device('mlu')
else: else:
place = paddle.set_device('cpu') place = paddle.set_device('cpu')
...@@ -220,6 +226,7 @@ def main(): ...@@ -220,6 +226,7 @@ def main():
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_npu(cfg.use_npu) check_npu(cfg.use_npu)
check_xpu(cfg.use_xpu) check_xpu(cfg.use_xpu)
check_mlu(cfg.use_mlu)
check_version() check_version()
run(FLAGS, cfg) run(FLAGS, cfg)
......
...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') ...@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
import paddle import paddle
from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import load_config, merge_config
from ppdet.engine import Tracker from ppdet.engine import Tracker
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
...@@ -127,12 +127,18 @@ def main(): ...@@ -127,12 +127,18 @@ def main():
if 'use_gpu' not in cfg: if 'use_gpu' not in cfg:
cfg.use_gpu = False cfg.use_gpu = False
# disable mlu in config by default
if 'use_mlu' not in cfg:
cfg.use_mlu = False
if cfg.use_gpu: if cfg.use_gpu:
place = paddle.set_device('gpu') place = paddle.set_device('gpu')
elif cfg.use_npu: elif cfg.use_npu:
place = paddle.set_device('npu') place = paddle.set_device('npu')
elif cfg.use_xpu: elif cfg.use_xpu:
place = paddle.set_device('xpu') place = paddle.set_device('xpu')
elif cfg.use_mlu:
place = paddle.set_device('mlu')
else: else:
place = paddle.set_device('cpu') place = paddle.set_device('cpu')
...@@ -140,6 +146,7 @@ def main(): ...@@ -140,6 +146,7 @@ def main():
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_npu(cfg.use_npu) check_npu(cfg.use_npu)
check_xpu(cfg.use_xpu) check_xpu(cfg.use_xpu)
check_mlu(cfg.use_mlu)
check_version() check_version()
run(FLAGS, cfg) run(FLAGS, cfg)
......
...@@ -149,12 +149,18 @@ def main(): ...@@ -149,12 +149,18 @@ def main():
if 'use_gpu' not in cfg: if 'use_gpu' not in cfg:
cfg.use_gpu = False cfg.use_gpu = False
# disable mlu in config by default
if 'use_mlu' not in cfg:
cfg.use_mlu = False
if cfg.use_gpu: if cfg.use_gpu:
place = paddle.set_device('gpu') place = paddle.set_device('gpu')
elif cfg.use_npu: elif cfg.use_npu:
place = paddle.set_device('npu') place = paddle.set_device('npu')
elif cfg.use_xpu: elif cfg.use_xpu:
place = paddle.set_device('xpu') place = paddle.set_device('xpu')
elif cfg.use_mlu:
place = paddle.set_device('mlu')
else: else:
place = paddle.set_device('cpu') place = paddle.set_device('cpu')
...@@ -167,6 +173,7 @@ def main(): ...@@ -167,6 +173,7 @@ def main():
check.check_gpu(cfg.use_gpu) check.check_gpu(cfg.use_gpu)
check.check_npu(cfg.use_npu) check.check_npu(cfg.use_npu)
check.check_xpu(cfg.use_xpu) check.check_xpu(cfg.use_xpu)
check.check_mlu(cfg.use_mlu)
check.check_version() check.check_version()
run(FLAGS, cfg) run(FLAGS, cfg)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册