diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 76edb3910cccedf761721e7ca34e49fc1054e59b..c4ac5aa3046a9c4cf041668ddab94dc250d46fd2 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/platform/gpu_info.h" #include +#include +#include #include "gflags/gflags.h" #include "paddle/fluid/platform/cuda_device_guard.h" @@ -39,6 +41,10 @@ DECLARE_uint64(gpu_memory_limit_mb); constexpr static float fraction_reserve_gpu_memory = 0.05f; +static std::once_flag g_device_props_size_init_flag; +static std::vector> g_device_props_init_flags; +static std::vector g_device_props; + USE_GPU_MEM_STAT; namespace paddle { namespace platform { @@ -297,6 +303,44 @@ std::vector GetSelectedDevices() { return devices; } +const gpuDeviceProp &GetDeviceProperties(int id) { + std::call_once(g_device_props_size_init_flag, [&] { + int gpu_num = 0; + gpu_num = platform::GetCUDADeviceCount(); + g_device_props_init_flags.resize(gpu_num); + g_device_props.resize(gpu_num); + for (int i = 0; i < gpu_num; ++i) { + g_device_props_init_flags[i] = std::make_unique(); + } + }); + + if (id == -1) { + id = platform::GetCurrentDeviceId(); + } + + if (id < 0 || id >= static_cast(g_device_props.size())) { + PADDLE_THROW(platform::errors::OutOfRange( + "The device id %d is out of range [0, %d), where %d is the number of " + "devices on this machine. Because the device id should be greater than " + "or equal to zero and smaller than the number of gpus. Please input " + "appropriate device again!", + id, static_cast(g_device_props.size()), + static_cast(g_device_props.size()))); + } + + std::call_once(*(g_device_props_init_flags[id]), [&] { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaGetDeviceProperties(&g_device_props[id], id)); +#else + PADDLE_ENFORCE_CUDA_SUCCESS( + hipGetDeviceProperties(&g_device_props[id], id)); +#endif + }); + + return g_device_props[id]; +} + void SetDeviceId(int id) { // TODO(qijun): find a better way to cache the cuda device count PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index ef7f93a61dbfb36ee4db5ee2f48879815414ecbc..401873dcd77da214662204247166bd14a55a1d9a 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -67,6 +67,9 @@ dim3 GetGpuMaxGridDimSize(int); //! Get a list of device ids from environment variable or use all. std::vector GetSelectedDevices(); +//! Get the properties of the ith GPU device. +const gpuDeviceProp &GetDeviceProperties(int id); + //! Set the GPU device id for next execution. void SetDeviceId(int device_id); diff --git a/paddle/fluid/platform/type_defs.h b/paddle/fluid/platform/type_defs.h index 31784a04265803a53d339f8c30eecee437f3294f..f46bd1a0bdfa4afbea19ff108aeaba56fa614f6c 100644 --- a/paddle/fluid/platform/type_defs.h +++ b/paddle/fluid/platform/type_defs.h @@ -27,11 +27,13 @@ namespace paddle { using gpuStream_t = hipStream_t; using gpuError_t = hipError_t; using gpuEvent_t = hipEvent_t; +using gpuDeviceProp = hipDeviceProp_t; #else #define gpuSuccess cudaSuccess using gpuStream_t = cudaStream_t; using gpuError_t = cudaError_t; using gpuEvent_t = cudaEvent_t; +using gpuDeviceProp = cudaDeviceProp; #endif } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c00f529f61793f4c06fec7f0e6ee41bd5aec7733..b27191734b034421686833eced56c880a3d335f7 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2286,6 +2286,31 @@ All parameter, weight, gradient are variables in Paddle. #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("get_cuda_device_count", platform::GetCUDADeviceCount); m.def("cuda_empty_cache", platform::EmptyCache); + m.def("get_device_properties", + [](int id) -> const gpuDeviceProp & { + return platform::GetDeviceProperties(id); + }, + py::return_value_policy::copy); + + py::class_(m, "_gpuDeviceProperties") + .def_readonly("name", &gpuDeviceProp::name) + .def_readonly("major", &gpuDeviceProp::major) + .def_readonly("minor", &gpuDeviceProp::minor) + .def_readonly("is_multi_gpu_board", &gpuDeviceProp::isMultiGpuBoard) + .def_readonly("is_integrated", &gpuDeviceProp::integrated) + .def_readonly("multi_processor_count", + &gpuDeviceProp::multiProcessorCount) + .def_readonly("total_memory", &gpuDeviceProp::totalGlobalMem) + .def("__repr__", [](const gpuDeviceProp &gpu_device_prop) { + std::ostringstream stream; + stream << "_gpuDeviceProperties(name='" << gpu_device_prop.name + << "', major=" << gpu_device_prop.major + << ", minor=" << gpu_device_prop.minor << ", total_memory=" + << gpu_device_prop.totalGlobalMem / (1024 * 1024) + << "MB, multi_processor_count=" + << gpu_device_prop.multiProcessorCount << ")"; + return stream.str(); + }); #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32) m.def("nvprof_init", platform::CudaProfilerInit); diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 4d1934aeed9fb5dfc0d9e2f03ac5c98eb6ab31d1..a559df21ad2413e049ac922f4a12e0e6ae4281ba 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -27,6 +27,7 @@ __all__ = [ 'device_count', 'empty_cache', 'stream_guard', + 'get_device_properties', ] @@ -204,3 +205,69 @@ def stream_guard(stream): yield finally: stream = _set_current_stream(pre_stream) + + +def get_device_properties(device=None): + ''' + Return the properties of given device. + + Args: + device(paddle.CUDAPlace or int or str): The device, the id of the device + or the string name of device like 'gpu:x' which to get the properties of + the device from. If device is None, the device is the current device. + Default: None. + + Returns: + _gpuDeviceProperties: the properties of the device which include ASCII string + identifying device, major compute capability, minor compute capability, global + memory available on device and the number of multiprocessors on the device. + + Examples: + + .. code-block:: python + + # required: gpu + + import paddle + paddle.device.cuda.get_device_properties() + # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108) + + paddle.device.cuda.get_device_properties(0) + # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108) + + paddle.device.cuda.get_device_properties('gpu:0') + # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108) + + paddle.device.cuda.get_device_properties(paddle.CUDAPlace(0)) + # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108) + + ''' + + if not core.is_compiled_with_cuda(): + raise ValueError( + "The API paddle.device.cuda.get_device_properties is not supported in " + "CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support " + "to call this API.") + + if device is not None: + if isinstance(device, int): + device_id = device + elif isinstance(device, core.CUDAPlace): + device_id = device.get_device_id() + elif isinstance(device, str): + if device.startswith('gpu:'): + device_id = int(device[4:]) + else: + raise ValueError( + "The current string {} is not expected. Because paddle.device." + "cuda.get_device_properties only support string which is like 'gpu:x'. " + "Please input appropriate string again!".format(device)) + else: + raise ValueError( + "The device type {} is not expected. Because paddle.device.cuda." + "get_device_properties only support int, str or paddle.CUDAPlace. " + "Please input appropriate device again!".format(device)) + else: + device_id = -1 + + return core.get_device_properties(device_id) diff --git a/python/paddle/fluid/tests/unittests/test_get_device_properties.py b/python/paddle/fluid/tests/unittests/test_get_device_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..4cfb91bfae93e7e9520f565d35aaff878d608007 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_get_device_properties.py @@ -0,0 +1,70 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import unittest +from paddle.fluid import core +from paddle.device.cuda import device_count, get_device_properties + + +class TestGetDeviceProperties(unittest.TestCase): + def test_get_device_properties_default(self): + if core.is_compiled_with_cuda(): + props = get_device_properties() + self.assertIsNotNone(props) + + def test_get_device_properties_str(self): + if core.is_compiled_with_cuda(): + props = get_device_properties('gpu:0') + self.assertIsNotNone(props) + + def test_get_device_properties_int(self): + if core.is_compiled_with_cuda(): + gpu_num = device_count() + for i in range(gpu_num): + props = get_device_properties(i) + self.assertIsNotNone(props) + + def test_get_device_properties_CUDAPlace(self): + if core.is_compiled_with_cuda(): + device = core.CUDAPlace(0) + props = get_device_properties(device) + self.assertIsNotNone(props) + + +class TestGetDevicePropertiesError(unittest.TestCase): + def test_error_api(self): + if core.is_compiled_with_cuda(): + + def test_device_indexError_error(): + device_error = device_count() + 1 + props = get_device_properties(device_error) + + self.assertRaises(IndexError, test_device_indexError_error) + + def test_device_value_error1(): + device_error = 'gpu1' + props = get_device_properties(device_error) + + self.assertRaises(ValueError, test_device_value_error1) + + def test_device_value_error2(): + device_error = float(device_count()) + props = get_device_properties(device_error) + + self.assertRaises(ValueError, test_device_value_error2) + + +if __name__ == "__main__": + unittest.main()