未验证 提交 cf4533d0 编写于 作者: J Jiabin Yang 提交者: GitHub

Cherry pick install check (#18326)

* test=release/1.5, add mutigpu install check

* test=develop, refine code to use cuda_devices
上级 c8d00cb2
......@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .framework import Program, program_guard, unique_name, default_startup_program
import os
from .framework import Program, program_guard, unique_name, cuda_places, cpu_places
from .param_attr import ParamAttr
from .initializer import Constant
from . import layers
from . import backward
from .dygraph import Layer, nn
from . import executor
from . import optimizer
from . import core
from . import compiler
import logging
import numpy as np
__all__ = ['run_check']
......@@ -45,25 +48,97 @@ def run_check():
This func should not be called only if you need to verify installation
'''
print("Running Verify Fluid Program ... ")
prog = Program()
device_list = []
if core.is_compiled_with_cuda():
try:
core.get_cuda_device_count()
except Exception as e:
logging.warning(
"You are using GPU version Paddle Fluid, But Your CUDA Device is not set properly"
"\n Original Error is {}".format(e))
return 0
device_list = cuda_places()
else:
device_list = [core.CPUPlace(), core.CPUPlace()]
np_inp_single = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = []
for i in range(len(device_list)):
inp.append(np_inp_single)
np_inp_muti = np.array(inp)
np_inp_muti = np_inp_muti.reshape(len(device_list), 2, 2)
def test_parallerl_exe():
train_prog = Program()
startup_prog = Program()
scope = core.Scope()
with executor.scope_guard(scope):
with program_guard(prog, startup_prog):
with program_guard(train_prog, startup_prog):
with unique_name.guard():
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = True
build_strategy.memory_optimize = True
inp = layers.data(name="inp", shape=[2, 2])
simple_layer = SimpleLayer("simple_layer")
out = simple_layer(inp)
exe = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
(core.get_cuda_device_count() > 0) else core.CPUPlace())
loss = layers.mean(out)
loss.persistable = True
optimizer.SGD(learning_rate=0.01).minimize(loss)
startup_prog.random_seed = 1
compiled_prog = compiler.CompiledProgram(
train_prog).with_data_parallel(
build_strategy=build_strategy,
loss_name=loss.name,
places=device_list)
exe.run(startup_prog)
exe.run(compiled_prog,
feed={inp.name: np_inp_muti},
fetch_list=[loss.name])
def test_simple_exe():
train_prog = Program()
startup_prog = Program()
scope = core.Scope()
with executor.scope_guard(scope):
with program_guard(train_prog, startup_prog):
with unique_name.guard():
inp0 = layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
simple_layer0 = SimpleLayer("simple_layer")
out0 = simple_layer0(inp0)
param_grads = backward.append_backward(
out, parameter_list=[simple_layer._fc1._w.name])[0]
exe = executor.Executor(core.CPUPlace(
) if not core.is_compiled_with_cuda() else core.CUDAPlace(0))
exe.run(default_startup_program())
exe.run(feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name])
out0, parameter_list=[simple_layer0._fc1._w.name])[0]
exe0 = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
(core.get_cuda_device_count() > 0) else core.CPUPlace())
exe0.run(startup_prog)
exe0.run(feed={inp0.name: np_inp_single},
fetch_list=[out0.name, param_grads[1].name])
test_simple_exe()
print("Your Paddle Fluid works well on SINGLE GPU or CPU.")
try:
test_parallerl_exe()
print("Your Paddle Fluid works well on MUTIPLE GPU or CPU.")
print(
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now!"
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now"
)
except Exception as e:
logging.warning(
"Your Paddle Fluid has some problem with multiple GPU. This may be caused by:"
"\n 1. There is only 1 or 0 GPU visible on your Device;"
"\n 2. No.1 or No.2 GPU or both of them are occupied now"
"\n 3. Wrong installation of NVIDIA-NCCL2, please follow instruction on https://github.com/NVIDIA/nccl-tests "
"\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html"
)
print("\n Original Error is: {}".format(e))
print(
"Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! "
"\n Let's start deep Learning with Paddle Fluid now")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册