[MLU] add mlu detection config for develop branch (#7115)

fecd1a3e · cifar10 · GitHub · 88e75af9 · fecd1a3e · fecd1a3e
8 changed file
--- a/configs/runtime.yml
+++ b/configs/runtime.yml
 use_gpu: true
 use_xpu: false
+use_mlu: false
 log_iter: 20
 save_dir: output
 snapshot_epoch: 1

--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -411,14 +411,14 @@ class Trainer(object):
        model = self.model
        sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
-                   self.cfg.use_gpu and self._nranks > 1)
+                   (self.cfg.use_gpu or self.cfg.use_mlu) and self._nranks > 1)
        if sync_bn:
            model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        # enabel auto mixed precision mode
        if self.use_amp:
            scaler = paddle.amp.GradScaler(
-                enable=self.cfg.use_gpu or self.cfg.use_npu,
+                enable=self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu,
                init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
        # get distributed model
        if self.cfg.get('fleet', False):
@@ -474,7 +474,7 @@ class Trainer(object):
                            DataParallel) and use_fused_allreduce_gradients:
                        with model.no_sync():
                            with paddle.amp.auto_cast(
-                                    enable=self.cfg.use_gpu,
+                                    enable=self.cfg.use_gpu or self.cfg.use_mlu,
                                    custom_white_list=self.custom_white_list,
                                    custom_black_list=self.custom_black_list,
                                    level=self.amp_level):
@@ -488,7 +488,7 @@ class Trainer(object):
                            list(model.parameters()), None)
                    else:
                        with paddle.amp.auto_cast(
-                                enable=self.cfg.use_gpu,
+                                enable=self.cfg.use_gpu or self.cfg.use_mlu,
                                custom_white_list=self.custom_white_list,
                                custom_black_list=self.custom_black_list,
                                level=self.amp_level):
@@ -602,7 +602,7 @@ class Trainer(object):
            # forward
            if self.use_amp:
                with paddle.amp.auto_cast(
-                        enable=self.cfg.use_gpu,
+                        enable=self.cfg.use_gpu or self.cfg.use_mlu,
                        custom_white_list=self.custom_white_list,
                        custom_black_list=self.custom_black_list,
                        level=self.amp_level):
@@ -669,7 +669,7 @@ class Trainer(object):
            # forward
            if self.use_amp:
                with paddle.amp.auto_cast(
-                        enable=self.cfg.use_gpu,
+                        enable=self.cfg.use_gpu or self.cfg.use_mlu,
                        custom_white_list=self.custom_white_list,
                        custom_black_list=self.custom_black_list,
                        level=self.amp_level):

--- a/ppdet/utils/check.py
+++ b/ppdet/utils/check.py
@@ -26,10 +26,30 @@ from .logger import setup_logger
 logger = setup_logger(__name__)
 __all__ = [
-    'check_gpu', 'check_npu', 'check_xpu', 'check_version', 'check_config'
+    'check_gpu', 'check_npu', 'check_xpu', 'check_mlu', 'check_version',
+    'check_config'
 ]
+def check_mlu(use_mlu):
+    """
+    Log error and exit when set use_mlu=true in paddlepaddle
+    cpu/gpu/xpu/npu version.
+    """
+    err = "Config use_mlu cannot be set as true while you are " \
+          "using paddlepaddle cpu/gpu/xpu/npu version ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle-mlu to run model on MLU \n" \
+          "\t2. Set use_mlu as false in config file to run " \
+          "model on CPU/GPU/XPU/NPU"
+    try:
+        if use_mlu and not paddle.is_compiled_with_mlu():
+            logger.error(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
 def check_npu(use_npu):
    """
    Log error and exit when set use_npu=true in paddlepaddle

--- a/tools/eval.py
+++ b/tools/eval.py
@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
 import paddle
 from ppdet.core.workspace import load_config, merge_config
-from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
 from ppdet.utils.cli import ArgsParser, merge_args
 from ppdet.engine import Trainer, init_parallel_env
 from ppdet.metrics.coco_utils import json_eval_results
@@ -171,12 +171,18 @@ def main():
    if 'use_gpu' not in cfg:
        cfg.use_gpu = False
+    # disable mlu in config by default
+    if 'use_mlu' not in cfg:
+        cfg.use_mlu = False
    if cfg.use_gpu:
        place = paddle.set_device('gpu')
    elif cfg.use_npu:
        place = paddle.set_device('npu')
    elif cfg.use_xpu:
        place = paddle.set_device('xpu')
+    elif cfg.use_mlu:
+        place = paddle.set_device('mlu')
    else:
        place = paddle.set_device('cpu')
@@ -187,6 +193,7 @@ def main():
    check_gpu(cfg.use_gpu)
    check_npu(cfg.use_npu)
    check_xpu(cfg.use_xpu)
+    check_mlu(cfg.use_mlu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/eval_mot.py
+++ b/tools/eval_mot.py
@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
 import paddle
 from ppdet.core.workspace import load_config, merge_config
-from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
 from ppdet.engine import Tracker
@@ -115,12 +115,18 @@ def main():
    if 'use_gpu' not in cfg:
        cfg.use_gpu = False
+    # disable mlu in config by default
+    if 'use_mlu' not in cfg:
+        cfg.use_mlu = False
    if cfg.use_gpu:
        place = paddle.set_device('gpu')
    elif cfg.use_npu:
        place = paddle.set_device('npu')
    elif cfg.use_xpu:
        place = paddle.set_device('xpu')
+    elif cfg.use_mlu:
+        place = paddle.set_device('mlu')
    else:
        place = paddle.set_device('cpu')
@@ -128,6 +134,7 @@ def main():
    check_gpu(cfg.use_gpu)
    check_npu(cfg.use_npu)
    check_xpu(cfg.use_xpu)
+    check_mlu(cfg.use_mlu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/infer.py
+++ b/tools/infer.py
@@ -32,7 +32,7 @@ import ast
 import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.engine import Trainer
-from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
 from ppdet.utils.cli import ArgsParser, merge_args
 from ppdet.slim import build_slim_model
@@ -204,12 +204,18 @@ def main():
    if 'use_gpu' not in cfg:
        cfg.use_gpu = False
+    # disable mlu in config by default
+    if 'use_mlu' not in cfg:
+        cfg.use_mlu = False
    if cfg.use_gpu:
        place = paddle.set_device('gpu')
    elif cfg.use_npu:
        place = paddle.set_device('npu')
    elif cfg.use_xpu:
        place = paddle.set_device('xpu')
+    elif cfg.use_mlu:
+        place = paddle.set_device('mlu')
    else:
        place = paddle.set_device('cpu')
@@ -220,6 +226,7 @@ def main():
    check_gpu(cfg.use_gpu)
    check_npu(cfg.use_npu)
    check_xpu(cfg.use_xpu)
+    check_mlu(cfg.use_mlu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/infer_mot.py
+++ b/tools/infer_mot.py
@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
 import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.engine import Tracker
-from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
@@ -127,12 +127,18 @@ def main():
    if 'use_gpu' not in cfg:
        cfg.use_gpu = False
+    # disable mlu in config by default
+    if 'use_mlu' not in cfg:
+        cfg.use_mlu = False
    if cfg.use_gpu:
        place = paddle.set_device('gpu')
    elif cfg.use_npu:
        place = paddle.set_device('npu')
    elif cfg.use_xpu:
        place = paddle.set_device('xpu')
+    elif cfg.use_mlu:
+        place = paddle.set_device('mlu')
    else:
        place = paddle.set_device('cpu')
@@ -140,6 +146,7 @@ def main():
    check_gpu(cfg.use_gpu)
    check_npu(cfg.use_npu)
    check_xpu(cfg.use_xpu)
+    check_mlu(cfg.use_mlu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/train.py
+++ b/tools/train.py
@@ -149,12 +149,18 @@ def main():
    if 'use_gpu' not in cfg:
        cfg.use_gpu = False
+    # disable mlu in config by default
+    if 'use_mlu' not in cfg:
+        cfg.use_mlu = False
    if cfg.use_gpu:
        place = paddle.set_device('gpu')
    elif cfg.use_npu:
        place = paddle.set_device('npu')
    elif cfg.use_xpu:
        place = paddle.set_device('xpu')
+    elif cfg.use_mlu:
+        place = paddle.set_device('mlu')
    else:
        place = paddle.set_device('cpu')
@@ -167,6 +173,7 @@ def main():
    check.check_gpu(cfg.use_gpu)
    check.check_npu(cfg.use_npu)
    check.check_xpu(cfg.use_xpu)
+    check.check_mlu(cfg.use_mlu)
    check.check_version()
    run(FLAGS, cfg)