From 991c94f135339edf8f00dfa4aa78376060b147bc Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Wed, 19 Jun 2019 15:06:08 +0800 Subject: [PATCH] test=develop, add add_multi_gpu_install_check (#18157) * test=develop, add add_multi_gpu_install_check * test=develop, refine warning doc * test=develop, refine warning doc * test=develop, refine warning doc * test=develop, support multi cpu --- python/paddle/fluid/install_check.py | 117 ++++++++++++++---- .../fluid/tests/unittests/CMakeLists.txt | 4 + 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index dd1725b45ac..e4077e73dfb 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -19,9 +19,12 @@ from . import layers from . import backward from .dygraph import Layer, nn from . import executor - +from . import optimizer from . import core +from . import compiler +import logging import numpy as np +import os __all__ = ['run_check'] @@ -45,25 +48,95 @@ def run_check(): This func should not be called only if you need to verify installation ''' print("Running Verify Fluid Program ... ") - prog = Program() - startup_prog = Program() - scope = core.Scope() - with executor.scope_guard(scope): - with program_guard(prog, startup_prog): - with unique_name.guard(): - np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - inp = layers.data( - name="inp", shape=[2, 2], append_batch_size=False) - simple_layer = SimpleLayer("simple_layer") - out = simple_layer(inp) - param_grads = backward.append_backward( - out, parameter_list=[simple_layer._fc1._w.name])[0] - exe = executor.Executor(core.CPUPlace( - ) if not core.is_compiled_with_cuda() else core.CUDAPlace(0)) - exe.run(default_startup_program()) - exe.run(feed={inp.name: np_inp}, - fetch_list=[out.name, param_grads[1].name]) + use_cuda = False if not core.is_compiled_with_cuda() else True + place = core.CPUPlace() if not core.is_compiled_with_cuda( + ) else core.CUDAPlace(0) + np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) + + if use_cuda: + if core.get_cuda_device_count() > 1: + os.environ['CUDA_VISIBLE_DEVICES'] = "0,1" + else: + os.environ['CUDA_VISIBLE_DEVICES'] = "0" + + def test_parallerl_exe(): + train_prog = Program() + startup_prog = Program() + scope = core.Scope() + if not use_cuda: + os.environ['CPU_NUM'] = "2" + with executor.scope_guard(scope): + with program_guard(train_prog, startup_prog): + with unique_name.guard(): + places = [] + build_strategy = compiler.BuildStrategy() + build_strategy.enable_inplace = True + build_strategy.memory_optimize = True + inp = layers.data( + name="inp", shape=[2, 2], append_batch_size=False) + simple_layer = SimpleLayer("simple_layer") + out = simple_layer(inp) + exe = executor.Executor(place) + if use_cuda: + places = [core.CUDAPlace(0), core.CUDAPlace(1)] + else: + places = [core.CPUPlace(), core.CPUPlace()] + loss = layers.mean(out) + loss.persistable = True + optimizer.SGD(learning_rate=0.01).minimize(loss) + startup_prog.random_seed = 1 + compiled_prog = compiler.CompiledProgram( + train_prog).with_data_parallel( + build_strategy=build_strategy, + loss_name=loss.name, + places=places) + exe.run(startup_prog) + + exe.run(compiled_prog, + feed={inp.name: np_inp}, + fetch_list=[loss.name]) + + def test_simple_exe(): + train_prog = Program() + startup_prog = Program() + scope = core.Scope() + if not use_cuda: + os.environ['CPU_NUM'] = "1" + with executor.scope_guard(scope): + with program_guard(train_prog, startup_prog): + with unique_name.guard(): + inp0 = layers.data( + name="inp", shape=[2, 2], append_batch_size=False) + simple_layer0 = SimpleLayer("simple_layer") + out0 = simple_layer0(inp0) + param_grads = backward.append_backward( + out0, parameter_list=[simple_layer0._fc1._w.name])[0] + exe0 = executor.Executor(core.CPUPlace() + if not core.is_compiled_with_cuda() + else core.CUDAPlace(0)) + exe0.run(startup_prog) + exe0.run(feed={inp0.name: np_inp}, + fetch_list=[out0.name, param_grads[1].name]) + + test_simple_exe() + + print("Your Paddle Fluid works well on SINGLE GPU or CPU.") + try: + test_parallerl_exe() + print("Your Paddle Fluid works well on MUTIPLE GPU or CPU.") + print( + "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now" + ) + except Exception as e: + logging.warning( + "Your Paddle Fluid has some problem with multiple GPU. This may be caused by:" + "\n 1. There is only 1 GPU visible on your Device;" + "\n 2. No.1 or No.2 GPU or both of them are occupied now" + "\n 3. Wrong installation of NVIDIA-NCCL2, please follow instruction on https://github.com/NVIDIA/nccl-tests " + "\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html" + ) - print( - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now" - ) + print("\n Original Error is: {}".format(e)) + print( + "Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! " + "\n Let's start deep Learning with Paddle Fluid now") diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 15569b339df..ebcad7d5e51 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -116,6 +116,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_mnist) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) list(REMOVE_ITEM TEST_OPS test_layers) list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model) +list(REMOVE_ITEM TEST_OPS test_install_check) # Some ops need to check results when gc is enabled # Currently, only ops that register NoNeedBufferVarsInference need to do this test @@ -172,6 +173,9 @@ py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mn py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS FLAGS_cudnn_deterministic=1 SERIAL) set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +py_test_modules(test_install_check MODULES test_install_check ENVS + FLAGS_cudnn_deterministic=1 SERIAL) +set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST") if(WITH_DISTRIBUTE) py_test_modules(test_dist_train MODULES test_dist_train) -- GitLab