[NPU] add npu support for yolov3, test=develop (#4344)

69d21d88 · Qi Li · GitHub · 1755a2b2 · 69d21d88 · 69d21d88
11 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,6 @@ dataset/wider_face/WIDER_val
 dataset/wider_face/wider_face_split
 ppdet/version.py
+# NPU meta folder
+kernel_meta/
--- a/ppdet/utils/check.py
+++ b/ppdet/utils/check.py
@@ -25,7 +25,26 @@ import paddle.version as fluid_version
 from .logger import setup_logger
 logger = setup_logger(__name__)
-__all__ = ['check_gpu', 'check_version', 'check_config']
+__all__ = ['check_gpu', 'check_npu', 'check_version', 'check_config']
+def check_npu(use_npu):
+    """
+    Log error and exit when set use_npu=true in paddlepaddle
+    cpu/gpu/xpu version.
+    """
+    err = "Config use_npu cannot be set as true while you are " \
+          "using paddlepaddle cpu/gpu/xpu version ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle-npu to run model on NPU \n" \
+          "\t2. Set use_npu as false in config file to run " \
+          "model on CPU/GPU/XPU"
+    try:
+        if use_npu and not paddle.is_compiled_with_npu():
+            logger.error(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
 def check_gpu(use_gpu):

--- a/static/ppdet/modeling/anchor_heads/yolo_head.py
+++ b/static/ppdet/modeling/anchor_heads/yolo_head.py
@@ -303,8 +303,14 @@ class YOLOv3Head(object):
        return route, tip
    def _upsample(self, input, scale=2, name=None):
+        align_corners = True
+        if fluid.core.is_compiled_with_npu():
+            align_corners = False
        out = fluid.layers.resize_nearest(
-            input=input, scale=float(scale), name=name)
+            input=input,
+            scale=float(scale),
+            name=name,
+            align_corners=align_corners)
        return out
    def _parse_anchors(self, anchors):
@@ -520,8 +526,14 @@ class YOLOv4Head(YOLOv3Head):
        self.spp_stage = spp_stage
    def _upsample(self, input, scale=2, name=None):
+        align_corners = True
+        if fluid.core.is_compiled_with_npu():
+            align_corners = False
        out = fluid.layers.resize_nearest(
-            input=input, scale=float(scale), name=name)
+            input=input,
+            scale=float(scale),
+            name=name,
+            align_corners=align_corners)
        return out
    def max_pool(self, input, size):

--- a/static/ppdet/utils/check.py
+++ b/static/ppdet/utils/check.py
@@ -29,6 +29,7 @@ logger = logging.getLogger(__name__)
 __all__ = [
    'check_gpu',
    'check_xpu',
+    'check_npu',
    'check_version',
    'check_config',
    'check_py_func',
@@ -54,6 +55,25 @@ def check_xpu(use_xpu):
        pass
+def check_npu(use_npu):
+    """
+    Log error and exit when set use_npu=true in paddlepaddle
+    cpu/gpu/xpu version.
+    """
+    err = "Config use_npu cannot be set as true while you are " \
+          "using paddlepaddle cpu/gpu/xpu version ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle-npu to run model on NPU \n" \
+          "\t2. Set use_npu as false in config file to run " \
+          "model on CPU/GPU/XPU"
+    try:
+        if use_npu and not fluid.is_compiled_with_npu():
+            logger.error(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
 def check_gpu(use_gpu):
    """
    Log error and exit when set use_gpu=true in paddlepaddle

--- a/static/ppdet/utils/dist_utils.py
+++ b/static/ppdet/utils/dist_utils.py
@@ -31,6 +31,19 @@ def nccl2_prepare(trainer_id, startup_prog, main_prog):
        program=main_prog)
+def collective_prepare(trainer_id, startup_prog, main_prog):
+    config = fluid.DistributeTranspilerConfig()
+    config.mode = "collective"
+    config.collective_mode = "grad_allreduce"
+    t = fluid.DistributeTranspiler(config=config)
+    t.transpile(
+        trainer_id,
+        trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'),
+        current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'),
+        startup_program=startup_prog,
+        program=main_prog)
 def prepare_for_multi_process(exe, build_strategy, startup_prog, main_prog):
    trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0))
    num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
@@ -38,4 +51,7 @@ def prepare_for_multi_process(exe, build_strategy, startup_prog, main_prog):
        return
    build_strategy.num_trainers = num_trainers
    build_strategy.trainer_id = trainer_id
-    nccl2_prepare(trainer_id, startup_prog, main_prog)
+    if fluid.core.is_compiled_with_npu():
+        collective_prepare(trainer_id, startup_prog, main_prog)
+    else:
+        nccl2_prepare(trainer_id, startup_prog, main_prog)
--- a/static/tools/eval.py
+++ b/static/tools/eval.py
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
 try:
    from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results
    import ppdet.utils.checkpoint as checkpoint
-    from ppdet.utils.check import check_gpu, check_xpu, check_version, check_config, enable_static_mode
+    from ppdet.utils.check import check_gpu, check_xpu, check_npu, check_version, check_config, enable_static_mode
    from ppdet.data.reader import create_reader
@@ -63,6 +63,10 @@ def main():
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
+    # disable npu in config by default and check use_npu
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    check_npu(cfg.use_npu)
    use_xpu = False
    if hasattr(cfg, 'use_xpu'):
        check_xpu(cfg.use_xpu)
@@ -73,6 +77,9 @@ def main():
    assert not (use_xpu and cfg.use_gpu), \
            'Can not run on both XPU and GPU'
+    assert not (cfg.use_npu and cfg.use_gpu), \
+            'Can not run on both NPU and GPU'
    main_arch = cfg.architecture
    multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
@@ -80,6 +87,8 @@ def main():
    # define executor
    if cfg.use_gpu:
        place = fluid.CUDAPlace(0)
+    elif cfg.use_npu:
+        place = fluid.NPUPlace(0)
    elif use_xpu:
        place = fluid.XPUPlace(0)
    else:
@@ -117,7 +126,7 @@ def main():
        return
    compile_program = fluid.CompiledProgram(eval_prog).with_data_parallel()
-    if use_xpu:
+    if use_xpu or cfg.use_npu:
        compile_program = eval_prog
    assert cfg.metric != 'OID', "eval process of OID dataset \

--- a/static/tools/infer.py
+++ b/static/tools/infer.py
@@ -41,7 +41,7 @@ try:
    from ppdet.utils.eval_utils import parse_fetches
    from ppdet.utils.cli import ArgsParser
-    from ppdet.utils.check import check_gpu, check_version, check_config, enable_static_mode
+    from ppdet.utils.check import check_gpu, check_npu, check_version, check_config, enable_static_mode
    from ppdet.utils.visualizer import visualize_results
    import ppdet.utils.checkpoint as checkpoint
@@ -109,6 +109,10 @@ def main():
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
+    # disable npu in config by default and check use_npu
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    check_npu(cfg.use_npu)
    # check if paddlepaddle version is satisfied
    check_version()
@@ -119,7 +123,12 @@ def main():
    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)
-    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
+    if cfg.use_gpu:
+        place = fluid.CUDAPlace(0)
+    elif cfg.use_npu:
+        place = fluid.NPUPlace(0)
+    else:
+        place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    model = create(main_arch)

--- a/static/tools/train.py
+++ b/static/tools/train.py
@@ -50,7 +50,7 @@ try:
    from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results
    from ppdet.utils.stats import TrainingStats
    from ppdet.utils.cli import ArgsParser
-    from ppdet.utils.check import check_gpu, check_xpu, check_version, check_config, enable_static_mode
+    from ppdet.utils.check import check_gpu, check_xpu, check_npu, check_version, check_config, enable_static_mode
    import ppdet.utils.checkpoint as checkpoint
 except ImportError as e:
    if sys.argv[0].find('static') >= 0:
@@ -87,6 +87,10 @@ def main():
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
+    # disable npu in config by default and check use_npu
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    check_npu(cfg.use_npu)
    use_xpu = False
    if hasattr(cfg, 'use_xpu'):
        check_xpu(cfg.use_xpu)
@@ -97,6 +101,9 @@ def main():
    assert not (use_xpu and cfg.use_gpu), \
            'Can not run on both XPU and GPU'
+    assert not (cfg.use_npu and cfg.use_gpu), \
+            'Can not run on both NPU and GPU'
    save_only = getattr(cfg, 'save_prediction_only', False)
    if save_only:
        raise NotImplementedError('The config file only support prediction,'
@@ -105,6 +112,8 @@ def main():
    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
+    if cfg.use_npu:
+        devices_num = fluid.core.get_npu_device_count()
    elif use_xpu:
        # ToDo(qingshu): XPU only support single card now
        devices_num = 1
@@ -113,6 +122,8 @@ def main():
    if cfg.use_gpu and 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
+    elif cfg.use_npu and 'FLAGS_selected_npus' in env:
+        device_id = int(env['FLAGS_selected_npus'])
    elif use_xpu and 'FLAGS_selected_xpus' in env:
        device_id = int(env['FLAGS_selected_xpus'])
    else:
@@ -120,6 +131,8 @@ def main():
    if cfg.use_gpu:
        place = fluid.CUDAPlace(device_id)
+    elif cfg.use_npu:
+        place = fluid.NPUPlace(device_id)
    elif use_xpu:
        place = fluid.XPUPlace(device_id)
    else:
@@ -216,12 +229,12 @@ def main():
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
-    if use_xpu:
+    if use_xpu or cfg.use_npu:
        compiled_train_prog = train_prog
    if FLAGS.eval:
        compiled_eval_prog = fluid.CompiledProgram(eval_prog)
-        if use_xpu:
+        if use_xpu or cfg.use_npu:
            compiled_eval_prog = eval_prog
    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'

--- a/tools/eval.py
+++ b/tools/eval.py
@@ -30,7 +30,7 @@ warnings.filterwarnings('ignore')
 import paddle
 from ppdet.core.workspace import load_config, merge_config
-from ppdet.utils.check import check_gpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
 from ppdet.engine import Trainer, init_parallel_env
 from ppdet.metrics.coco_utils import json_eval_results
@@ -116,7 +116,16 @@ def main():
    cfg['save_prediction_only'] = FLAGS.save_prediction_only
    merge_config(FLAGS.opt)
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+    # disable npu in config by default
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    if cfg.use_gpu:
+        place = paddle.set_device('gpu')
+    elif cfg.use_npu:
+        place = paddle.set_device('npu')
+    else:
+        place = paddle.set_device('cpu')
    if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn' and not cfg.use_gpu:
        cfg['norm_type'] = 'bn'
@@ -126,6 +135,7 @@ def main():
    check_config(cfg)
    check_gpu(cfg.use_gpu)
+    check_npu(cfg.use_npu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/infer.py
+++ b/tools/infer.py
@@ -31,7 +31,7 @@ import glob
 import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.engine import Trainer
-from ppdet.utils.check import check_gpu, check_version, check_config
+from ppdet.utils.check import check_gpu, check_npu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
 from ppdet.slim import build_slim_model
@@ -141,7 +141,16 @@ def main():
    cfg['vdl_log_dir'] = FLAGS.vdl_log_dir
    merge_config(FLAGS.opt)
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+    # disable npu in config by default
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    if cfg.use_gpu:
+        place = paddle.set_device('gpu')
+    elif cfg.use_npu:
+        place = paddle.set_device('npu')
+    else:
+        place = paddle.set_device('cpu')
    if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn' and not cfg.use_gpu:
        cfg['norm_type'] = 'bn'
@@ -151,6 +160,7 @@ def main():
    check_config(cfg)
    check_gpu(cfg.use_gpu)
+    check_npu(cfg.use_npu)
    check_version()
    run(FLAGS, cfg)

--- a/tools/train.py
+++ b/tools/train.py
@@ -127,7 +127,16 @@ def main():
    cfg['profiler_options'] = FLAGS.profiler_options
    merge_config(FLAGS.opt)
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+    # disable npu in config by default
+    if 'use_npu' not in cfg:
+        cfg.use_npu = False
+    if cfg.use_gpu:
+        place = paddle.set_device('gpu')
+    elif cfg.use_npu:
+        place = paddle.set_device('npu')
+    else:
+        place = paddle.set_device('cpu')
    if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn' and not cfg.use_gpu:
        cfg['norm_type'] = 'bn'
@@ -139,6 +148,7 @@ def main():
    merge_config(FLAGS.opt)
    check.check_config(cfg)
    check.check_gpu(cfg.use_gpu)
+    check.check_npu(cfg.use_npu)
    check.check_version()
    run(FLAGS, cfg)