diff --git a/configs/runtime.yml b/configs/runtime.yml index f601433afc13619183da153cbb09afe9f1332fbe..7ccf0c97a10557f8f1754714569497488b29d93b 100644 --- a/configs/runtime.yml +++ b/configs/runtime.yml @@ -1,5 +1,6 @@ use_gpu: true use_xpu: false +use_mlu: false log_iter: 20 save_dir: output snapshot_epoch: 1 diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index f720d84773544f23c29a3ab5fbff9aa4dfb0e3ae..066cb5f670eadc0ee1a74ef0ca34d4a86d40ea4a 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -411,14 +411,14 @@ class Trainer(object): model = self.model sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and - self.cfg.use_gpu and self._nranks > 1) + (self.cfg.use_gpu or self.cfg.use_mlu) and self._nranks > 1) if sync_bn: model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model) # enabel auto mixed precision mode if self.use_amp: scaler = paddle.amp.GradScaler( - enable=self.cfg.use_gpu or self.cfg.use_npu, + enable=self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu, init_loss_scaling=self.cfg.get('init_loss_scaling', 1024)) # get distributed model if self.cfg.get('fleet', False): @@ -474,7 +474,7 @@ class Trainer(object): DataParallel) and use_fused_allreduce_gradients: with model.no_sync(): with paddle.amp.auto_cast( - enable=self.cfg.use_gpu, + enable=self.cfg.use_gpu or self.cfg.use_mlu, custom_white_list=self.custom_white_list, custom_black_list=self.custom_black_list, level=self.amp_level): @@ -488,7 +488,7 @@ class Trainer(object): list(model.parameters()), None) else: with paddle.amp.auto_cast( - enable=self.cfg.use_gpu, + enable=self.cfg.use_gpu or self.cfg.use_mlu, custom_white_list=self.custom_white_list, custom_black_list=self.custom_black_list, level=self.amp_level): @@ -602,7 +602,7 @@ class Trainer(object): # forward if self.use_amp: with paddle.amp.auto_cast( - enable=self.cfg.use_gpu, + enable=self.cfg.use_gpu or self.cfg.use_mlu, custom_white_list=self.custom_white_list, custom_black_list=self.custom_black_list, level=self.amp_level): @@ -669,7 +669,7 @@ class Trainer(object): # forward if self.use_amp: with paddle.amp.auto_cast( - enable=self.cfg.use_gpu, + enable=self.cfg.use_gpu or self.cfg.use_mlu, custom_white_list=self.custom_white_list, custom_black_list=self.custom_black_list, level=self.amp_level): diff --git a/ppdet/utils/check.py b/ppdet/utils/check.py index 52df359db486ebb569663c0ba6536bf66b0dce24..5235e0ebe79a2097b8f059b071e28e680955e823 100644 --- a/ppdet/utils/check.py +++ b/ppdet/utils/check.py @@ -26,10 +26,30 @@ from .logger import setup_logger logger = setup_logger(__name__) __all__ = [ - 'check_gpu', 'check_npu', 'check_xpu', 'check_version', 'check_config' + 'check_gpu', 'check_npu', 'check_xpu', 'check_mlu', 'check_version', + 'check_config' ] +def check_mlu(use_mlu): + """ + Log error and exit when set use_mlu=true in paddlepaddle + cpu/gpu/xpu/npu version. + """ + err = "Config use_mlu cannot be set as true while you are " \ + "using paddlepaddle cpu/gpu/xpu/npu version ! \nPlease try: \n" \ + "\t1. Install paddlepaddle-mlu to run model on MLU \n" \ + "\t2. Set use_mlu as false in config file to run " \ + "model on CPU/GPU/XPU/NPU" + + try: + if use_mlu and not paddle.is_compiled_with_mlu(): + logger.error(err) + sys.exit(1) + except Exception as e: + pass + + def check_npu(use_npu): """ Log error and exit when set use_npu=true in paddlepaddle diff --git a/tools/eval.py b/tools/eval.py index 38ac7f9ac66afa2b02f3783aab1f93a485af6f5c..fc4b95b90e90b883f753c7f3e4965b2f04e9f3be 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') import paddle from ppdet.core.workspace import load_config, merge_config -from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config +from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config from ppdet.utils.cli import ArgsParser, merge_args from ppdet.engine import Trainer, init_parallel_env from ppdet.metrics.coco_utils import json_eval_results @@ -171,12 +171,18 @@ def main(): if 'use_gpu' not in cfg: cfg.use_gpu = False + # disable mlu in config by default + if 'use_mlu' not in cfg: + cfg.use_mlu = False + if cfg.use_gpu: place = paddle.set_device('gpu') elif cfg.use_npu: place = paddle.set_device('npu') elif cfg.use_xpu: place = paddle.set_device('xpu') + elif cfg.use_mlu: + place = paddle.set_device('mlu') else: place = paddle.set_device('cpu') @@ -187,6 +193,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) + check_mlu(cfg.use_mlu) check_version() run(FLAGS, cfg) diff --git a/tools/eval_mot.py b/tools/eval_mot.py index 49d7d2850de35c49b070492aa125ecb2092611c1..9f09533a29f111cbe2fff1922a4eba99ec0fad57 100644 --- a/tools/eval_mot.py +++ b/tools/eval_mot.py @@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') import paddle from ppdet.core.workspace import load_config, merge_config -from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config +from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config from ppdet.utils.cli import ArgsParser from ppdet.engine import Tracker @@ -115,12 +115,18 @@ def main(): if 'use_gpu' not in cfg: cfg.use_gpu = False + # disable mlu in config by default + if 'use_mlu' not in cfg: + cfg.use_mlu = False + if cfg.use_gpu: place = paddle.set_device('gpu') elif cfg.use_npu: place = paddle.set_device('npu') elif cfg.use_xpu: place = paddle.set_device('xpu') + elif cfg.use_mlu: + place = paddle.set_device('mlu') else: place = paddle.set_device('cpu') @@ -128,6 +134,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) + check_mlu(cfg.use_mlu) check_version() run(FLAGS, cfg) diff --git a/tools/infer.py b/tools/infer.py index d1e9ed7139c0b6184ba4168bc3d5c91ff09e13e9..2b92d7ba12bbcb5148d0ba9a668c1f7fd7d7ce47 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -32,7 +32,7 @@ import ast import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.engine import Trainer -from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config +from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config from ppdet.utils.cli import ArgsParser, merge_args from ppdet.slim import build_slim_model @@ -204,12 +204,18 @@ def main(): if 'use_gpu' not in cfg: cfg.use_gpu = False + # disable mlu in config by default + if 'use_mlu' not in cfg: + cfg.use_mlu = False + if cfg.use_gpu: place = paddle.set_device('gpu') elif cfg.use_npu: place = paddle.set_device('npu') elif cfg.use_xpu: place = paddle.set_device('xpu') + elif cfg.use_mlu: + place = paddle.set_device('mlu') else: place = paddle.set_device('cpu') @@ -220,6 +226,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) + check_mlu(cfg.use_mlu) check_version() run(FLAGS, cfg) diff --git a/tools/infer_mot.py b/tools/infer_mot.py index 8d54a4dcba8eacedf9711451adac338b8cc3c547..c93c47b63d883fab88a1144c922555f51c503e2d 100644 --- a/tools/infer_mot.py +++ b/tools/infer_mot.py @@ -30,7 +30,7 @@ warnings.filterwarnings('ignore') import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.engine import Tracker -from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config +from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config from ppdet.utils.cli import ArgsParser @@ -127,12 +127,18 @@ def main(): if 'use_gpu' not in cfg: cfg.use_gpu = False + # disable mlu in config by default + if 'use_mlu' not in cfg: + cfg.use_mlu = False + if cfg.use_gpu: place = paddle.set_device('gpu') elif cfg.use_npu: place = paddle.set_device('npu') elif cfg.use_xpu: place = paddle.set_device('xpu') + elif cfg.use_mlu: + place = paddle.set_device('mlu') else: place = paddle.set_device('cpu') @@ -140,6 +146,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) + check_mlu(cfg.use_mlu) check_version() run(FLAGS, cfg) diff --git a/tools/train.py b/tools/train.py index 289f292c9d8c2c2c077c8f9638ad65261a5eea8a..7b91aafa5db96e0849bfaa3dc83ce1af934f9b17 100755 --- a/tools/train.py +++ b/tools/train.py @@ -149,12 +149,18 @@ def main(): if 'use_gpu' not in cfg: cfg.use_gpu = False + # disable mlu in config by default + if 'use_mlu' not in cfg: + cfg.use_mlu = False + if cfg.use_gpu: place = paddle.set_device('gpu') elif cfg.use_npu: place = paddle.set_device('npu') elif cfg.use_xpu: place = paddle.set_device('xpu') + elif cfg.use_mlu: + place = paddle.set_device('mlu') else: place = paddle.set_device('cpu') @@ -167,6 +173,7 @@ def main(): check.check_gpu(cfg.use_gpu) check.check_npu(cfg.use_npu) check.check_xpu(cfg.use_xpu) + check.check_mlu(cfg.use_mlu) check.check_version() run(FLAGS, cfg)