install_check.py 9.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
16

17 18 19 20
import numpy as np

import paddle

21 22
__all__ = []

23 24 25 26 27

def _simple_network():
    """
    Define a simple network composed by a single linear layer.
    """
28 29 30
    input = paddle.static.data(
        name="input", shape=[None, 2, 2], dtype="float32"
    )
31 32 33
    weight = paddle.create_parameter(
        shape=[2, 3],
        dtype="float32",
34 35
        attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.1)),
    )
36 37 38 39 40 41
    bias = paddle.create_parameter(shape=[3], dtype="float32")
    linear_out = paddle.nn.functional.linear(x=input, weight=weight, bias=bias)
    out = paddle.tensor.sum(linear_out)
    return input, out, weight


42
def _prepare_data():
43
    """
44
    Prepare feeding data for simple network. The shape is [1, 2, 2].
45 46 47 48

    """
    # Prepare the feeding data.
    np_input_single = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
49
    return np_input_single.reshape(1, 2, 2)
50 51 52 53 54 55 56 57 58 59 60 61 62


def _is_cuda_available():
    """
    Check whether CUDA is avaiable.
    """
    try:
        assert len(paddle.static.cuda_places()) > 0
        return True
    except Exception as e:
        logging.warning(
            "You are using GPU version PaddlePaddle, but there is no GPU "
            "detected on your machine. Maybe CUDA devices is not set properly."
63 64
            "\n Original Error is {}".format(e)
        )
65 66 67
        return False


68 69 70 71 72 73 74 75 76 77 78
def _is_npu_available():
    """
    Check whether NPU is avaiable.
    """
    try:
        assert len(paddle.static.npu_places()) > 0
        return True
    except Exception as e:
        logging.warning(
            "You are using NPU version PaddlePaddle, but there is no NPU "
            "detected on your machine. Maybe NPU devices is not set properly."
79 80
            "\n Original Error is {}".format(e)
        )
81 82 83
        return False


84
def _is_xpu_available():
85
    """
86 87 88 89 90 91 92 93 94
    Check whether XPU is avaiable.
    """
    try:
        assert len(paddle.static.xpu_places()) > 0
        return True
    except Exception as e:
        logging.warning(
            "You are using XPU version PaddlePaddle, but there is no XPU "
            "detected on your machine. Maybe XPU devices is not set properly."
95 96
            "\n Original Error is {}".format(e)
        )
97 98 99 100 101 102
        return False


def _run_dygraph_single(use_cuda, use_xpu, use_npu):
    """
    Testing the simple network in dygraph mode using one CPU/GPU/XPU/NPU.
103 104 105

    Args:
        use_cuda (bool): Whether running with CUDA.
106 107
        use_xpu (bool): Whether running with XPU.
        use_npu (bool): Whether running with NPU.
108 109 110 111
    """
    paddle.disable_static()
    if use_cuda:
        paddle.set_device('gpu')
112 113
    elif use_xpu:
        paddle.set_device('xpu')
114 115
    elif use_npu:
        paddle.set_device('npu')
116 117 118
    else:
        paddle.set_device('cpu')
    weight_attr = paddle.ParamAttr(
119 120
        name="weight", initializer=paddle.nn.initializer.Constant(value=0.5)
    )
121
    bias_attr = paddle.ParamAttr(
122 123 124 125 126
        name="bias", initializer=paddle.nn.initializer.Constant(value=1.0)
    )
    linear = paddle.nn.Linear(
        2, 4, weight_attr=weight_attr, bias_attr=bias_attr
    )
127
    input_np = _prepare_data()
128 129 130 131
    input_tensor = paddle.to_tensor(input_np)
    linear_out = linear(input_tensor)
    out = paddle.tensor.sum(linear_out)
    out.backward()
132 133 134
    opt = paddle.optimizer.Adam(
        learning_rate=0.001, parameters=linear.parameters()
    )
135 136 137
    opt.step()


138
def _run_static_single(use_cuda, use_xpu, use_npu):
139
    """
140
    Testing the simple network with executor running directly, using one CPU/GPU/XPU/NPU.
141 142 143

    Args:
        use_cuda (bool): Whether running with CUDA.
144 145
        use_xpu (bool): Whether running with XPU.
        use_npu (bool): Whether running with NPU.
146 147 148 149 150 151 152 153 154
    """
    paddle.enable_static()
    with paddle.static.scope_guard(paddle.static.Scope()):
        train_prog = paddle.static.Program()
        startup_prog = paddle.static.Program()
        startup_prog.random_seed = 1
        with paddle.static.program_guard(train_prog, startup_prog):
            input, out, weight = _simple_network()
            param_grads = paddle.static.append_backward(
155 156
                out, parameter_list=[weight.name]
            )[0]
157

158 159
        if use_cuda:
            place = paddle.CUDAPlace(0)
160 161
        elif use_xpu:
            place = paddle.XPUPlace(0)
162 163 164 165 166 167
        elif use_npu:
            place = paddle.NPUPlace(0)
        else:
            place = paddle.CPUPlace()

        exe = paddle.static.Executor(place)
168
        exe.run(startup_prog)
169 170
        exe.run(
            train_prog,
171
            feed={input.name: _prepare_data()},
172 173
            fetch_list=[out.name, param_grads[1].name],
        )
174 175 176
    paddle.disable_static()


177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
def train_for_run_parallel():
    """
    train script for parallel traning check
    """

    # to avoid cyclic import
    class LinearNet(paddle.nn.Layer):
        """
        simple fc network for parallel training check
        """

        def __init__(self):
            super(LinearNet, self).__init__()
            self._linear1 = paddle.nn.Linear(10, 10)
            self._linear2 = paddle.nn.Linear(10, 1)

        def forward(self, x):
            """
            forward
            """
            return self._linear2(self._linear1(x))

    paddle.distributed.init_parallel_env()

    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = paddle.nn.MSELoss()
    adam = paddle.optimizer.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters()
    )

    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    loss.backward()
    adam.step()
    adam.clear_grad()


def _run_parallel(device_list):
220 221 222 223 224
    """
    Testing the simple network in data parallel mode, using multiple CPU/GPU.

    Args:
        use_cuda (bool): Whether running with CUDA.
225 226
        use_xpu (bool): Whether running with XPU.
        use_npu (bool): Whether running with NPU.
227 228
        device_list (int): The specified devices.
    """
229
    paddle.distributed.spawn(train_for_run_parallel, nprocs=len(device_list))
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252


def run_check():
    """
    Check whether PaddlePaddle is installed correctly and running successfully
    on your system.

    Examples:
        .. code-block:: python

            import paddle

            paddle.utils.run_check()
            # Running verify PaddlePaddle program ...
            # W1010 07:21:14.972093  8321 device_context.cc:338] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 11.0, Runtime API Version: 10.1
            # W1010 07:21:14.979770  8321 device_context.cc:346] device: 0, cuDNN Version: 7.6.
            # PaddlePaddle works well on 1 GPU.
            # PaddlePaddle works well on 8 GPUs.
            # PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.
    """

    print("Running verify PaddlePaddle program ... ")

253 254 255 256
    use_cuda = False
    use_xpu = False
    use_npu = False

257 258
    if paddle.is_compiled_with_cuda():
        use_cuda = _is_cuda_available()
259 260
    elif paddle.is_compiled_with_xpu():
        use_xpu = _is_xpu_available()
261 262
    elif paddle.is_compiled_with_npu():
        use_npu = _is_npu_available()
263

264 265 266
    if use_cuda:
        device_str = "GPU"
        device_list = paddle.static.cuda_places()
267 268 269
    elif use_xpu:
        device_str = "XPU"
        device_list = paddle.static.xpu_places()
270 271 272
    elif use_npu:
        device_str = "NPU"
        device_list = paddle.static.npu_places()
273 274
    else:
        device_str = "CPU"
275
        device_list = paddle.static.cpu_places(device_count=1)
276 277
    device_count = len(device_list)

278 279
    _run_static_single(use_cuda, use_xpu, use_npu)
    _run_dygraph_single(use_cuda, use_xpu, use_npu)
280 281 282
    print("PaddlePaddle works well on 1 {}.".format(device_str))

    try:
283 284 285 286 287 288
        if len(device_list) > 1:
            _run_parallel(device_list)
            print(
                "PaddlePaddle works well on {} {}s.".format(
                    device_count, device_str
                )
289
            )
290 291 292 293 294 295 296 297 298
        print(
            "PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now."
        )
    except Exception as e:
        logging.warning(
            "PaddlePaddle meets some problem with {} {}s. This may be caused by:"
            "\n 1. There is not enough GPUs visible on your system"
            "\n 2. Some GPUs are occupied by other process now"
            "\n 3. NVIDIA-NCCL2 is not installed correctly on your system. Please follow instruction on https://github.com/NVIDIA/nccl-tests "
299 300 301 302
            "\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html".format(
                device_count, device_str
            )
        )
303 304

        logging.warning("\n Original Error is: {}".format(e))
305 306 307 308 309 310
        print(
            "PaddlePaddle is installed successfully ONLY for single {}! "
            "Let's start deep learning with PaddlePaddle now.".format(
                device_str
            )
        )
311
        raise e