未验证 提交 991c94f1 编写于 作者: J Jiabin Yang 提交者: GitHub

test=develop, add add_multi_gpu_install_check (#18157)

* test=develop, add add_multi_gpu_install_check

* test=develop, refine warning doc

* test=develop, refine warning doc

* test=develop, refine warning doc

* test=develop, support multi cpu
上级 bbc29292
...@@ -19,9 +19,12 @@ from . import layers ...@@ -19,9 +19,12 @@ from . import layers
from . import backward from . import backward
from .dygraph import Layer, nn from .dygraph import Layer, nn
from . import executor from . import executor
from . import optimizer
from . import core from . import core
from . import compiler
import logging
import numpy as np import numpy as np
import os
__all__ = ['run_check'] __all__ = ['run_check']
...@@ -45,25 +48,95 @@ def run_check(): ...@@ -45,25 +48,95 @@ def run_check():
This func should not be called only if you need to verify installation This func should not be called only if you need to verify installation
''' '''
print("Running Verify Fluid Program ... ") print("Running Verify Fluid Program ... ")
prog = Program() use_cuda = False if not core.is_compiled_with_cuda() else True
startup_prog = Program() place = core.CPUPlace() if not core.is_compiled_with_cuda(
scope = core.Scope() ) else core.CUDAPlace(0)
with executor.scope_guard(scope): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with program_guard(prog, startup_prog):
with unique_name.guard(): if use_cuda:
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) if core.get_cuda_device_count() > 1:
inp = layers.data( os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
name="inp", shape=[2, 2], append_batch_size=False) else:
simple_layer = SimpleLayer("simple_layer") os.environ['CUDA_VISIBLE_DEVICES'] = "0"
out = simple_layer(inp)
param_grads = backward.append_backward( def test_parallerl_exe():
out, parameter_list=[simple_layer._fc1._w.name])[0] train_prog = Program()
exe = executor.Executor(core.CPUPlace( startup_prog = Program()
) if not core.is_compiled_with_cuda() else core.CUDAPlace(0)) scope = core.Scope()
exe.run(default_startup_program()) if not use_cuda:
exe.run(feed={inp.name: np_inp}, os.environ['CPU_NUM'] = "2"
fetch_list=[out.name, param_grads[1].name]) with executor.scope_guard(scope):
with program_guard(train_prog, startup_prog):
with unique_name.guard():
places = []
build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = True
build_strategy.memory_optimize = True
inp = layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
simple_layer = SimpleLayer("simple_layer")
out = simple_layer(inp)
exe = executor.Executor(place)
if use_cuda:
places = [core.CUDAPlace(0), core.CUDAPlace(1)]
else:
places = [core.CPUPlace(), core.CPUPlace()]
loss = layers.mean(out)
loss.persistable = True
optimizer.SGD(learning_rate=0.01).minimize(loss)
startup_prog.random_seed = 1
compiled_prog = compiler.CompiledProgram(
train_prog).with_data_parallel(
build_strategy=build_strategy,
loss_name=loss.name,
places=places)
exe.run(startup_prog)
exe.run(compiled_prog,
feed={inp.name: np_inp},
fetch_list=[loss.name])
def test_simple_exe():
train_prog = Program()
startup_prog = Program()
scope = core.Scope()
if not use_cuda:
os.environ['CPU_NUM'] = "1"
with executor.scope_guard(scope):
with program_guard(train_prog, startup_prog):
with unique_name.guard():
inp0 = layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
simple_layer0 = SimpleLayer("simple_layer")
out0 = simple_layer0(inp0)
param_grads = backward.append_backward(
out0, parameter_list=[simple_layer0._fc1._w.name])[0]
exe0 = executor.Executor(core.CPUPlace()
if not core.is_compiled_with_cuda()
else core.CUDAPlace(0))
exe0.run(startup_prog)
exe0.run(feed={inp0.name: np_inp},
fetch_list=[out0.name, param_grads[1].name])
test_simple_exe()
print("Your Paddle Fluid works well on SINGLE GPU or CPU.")
try:
test_parallerl_exe()
print("Your Paddle Fluid works well on MUTIPLE GPU or CPU.")
print(
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now"
)
except Exception as e:
logging.warning(
"Your Paddle Fluid has some problem with multiple GPU. This may be caused by:"
"\n 1. There is only 1 GPU visible on your Device;"
"\n 2. No.1 or No.2 GPU or both of them are occupied now"
"\n 3. Wrong installation of NVIDIA-NCCL2, please follow instruction on https://github.com/NVIDIA/nccl-tests "
"\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html"
)
print( print("\n Original Error is: {}".format(e))
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now" print(
) "Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! "
"\n Let's start deep Learning with Paddle Fluid now")
...@@ -116,6 +116,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_mnist) ...@@ -116,6 +116,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_mnist)
list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer)
list(REMOVE_ITEM TEST_OPS test_layers) list(REMOVE_ITEM TEST_OPS test_layers)
list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model) list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model)
list(REMOVE_ITEM TEST_OPS test_install_check)
# Some ops need to check results when gc is enabled # Some ops need to check results when gc is enabled
# Currently, only ops that register NoNeedBufferVarsInference need to do this test # Currently, only ops that register NoNeedBufferVarsInference need to do this test
...@@ -172,6 +173,9 @@ py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mn ...@@ -172,6 +173,9 @@ py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mn
py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS
FLAGS_cudnn_deterministic=1 SERIAL) FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
py_test_modules(test_install_check MODULES test_install_check ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST")
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train) py_test_modules(test_dist_train MODULES test_dist_train)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册