diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc
index 76edb3910cccedf761721e7ca34e49fc1054e59b..c4ac5aa3046a9c4cf041668ddab94dc250d46fd2 100644
--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -14,6 +14,8 @@ limitations under the License. */
 
 #include "paddle/fluid/platform/gpu_info.h"
 #include <cstdlib>
+#include <mutex>
+#include <vector>
 
 #include "gflags/gflags.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
@@ -39,6 +41,10 @@ DECLARE_uint64(gpu_memory_limit_mb);
 
 constexpr static float fraction_reserve_gpu_memory = 0.05f;
 
+static std::once_flag g_device_props_size_init_flag;
+static std::vector<std::unique_ptr<std::once_flag>> g_device_props_init_flags;
+static std::vector<paddle::gpuDeviceProp> g_device_props;
+
 USE_GPU_MEM_STAT;
 namespace paddle {
 namespace platform {
@@ -297,6 +303,44 @@ std::vector<int> GetSelectedDevices() {
   return devices;
 }
 
+const gpuDeviceProp &GetDeviceProperties(int id) {
+  std::call_once(g_device_props_size_init_flag, [&] {
+    int gpu_num = 0;
+    gpu_num = platform::GetCUDADeviceCount();
+    g_device_props_init_flags.resize(gpu_num);
+    g_device_props.resize(gpu_num);
+    for (int i = 0; i < gpu_num; ++i) {
+      g_device_props_init_flags[i] = std::make_unique<std::once_flag>();
+    }
+  });
+
+  if (id == -1) {
+    id = platform::GetCurrentDeviceId();
+  }
+
+  if (id < 0 || id >= static_cast<int>(g_device_props.size())) {
+    PADDLE_THROW(platform::errors::OutOfRange(
+        "The device id %d is out of range [0, %d), where %d is the number of "
+        "devices on this machine. Because the device id should be greater than "
+        "or equal to zero and smaller than the number of gpus. Please input "
+        "appropriate device again!",
+        id, static_cast<int>(g_device_props.size()),
+        static_cast<int>(g_device_props.size())));
+  }
+
+  std::call_once(*(g_device_props_init_flags[id]), [&] {
+#ifdef PADDLE_WITH_CUDA
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+        cudaGetDeviceProperties(&g_device_props[id], id));
+#else
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+      hipGetDeviceProperties(&g_device_props[id], id));
+#endif
+  });
+
+  return g_device_props[id];
+}
+
 void SetDeviceId(int id) {
   // TODO(qijun): find a better way to cache the cuda device count
   PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(),
diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h
index ef7f93a61dbfb36ee4db5ee2f48879815414ecbc..401873dcd77da214662204247166bd14a55a1d9a 100644
--- a/paddle/fluid/platform/gpu_info.h
+++ b/paddle/fluid/platform/gpu_info.h
@@ -67,6 +67,9 @@ dim3 GetGpuMaxGridDimSize(int);
 //! Get a list of device ids from environment variable or use all.
 std::vector<int> GetSelectedDevices();
 
+//! Get the properties of the ith GPU device.
+const gpuDeviceProp &GetDeviceProperties(int id);
+
 //! Set the GPU device id for next execution.
 void SetDeviceId(int device_id);
 
diff --git a/paddle/fluid/platform/type_defs.h b/paddle/fluid/platform/type_defs.h
index 31784a04265803a53d339f8c30eecee437f3294f..f46bd1a0bdfa4afbea19ff108aeaba56fa614f6c 100644
--- a/paddle/fluid/platform/type_defs.h
+++ b/paddle/fluid/platform/type_defs.h
@@ -27,11 +27,13 @@ namespace paddle {
 using gpuStream_t = hipStream_t;
 using gpuError_t = hipError_t;
 using gpuEvent_t = hipEvent_t;
+using gpuDeviceProp = hipDeviceProp_t;
 #else
 #define gpuSuccess cudaSuccess
 using gpuStream_t = cudaStream_t;
 using gpuError_t = cudaError_t;
 using gpuEvent_t = cudaEvent_t;
+using gpuDeviceProp = cudaDeviceProp;
 #endif
 
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index c00f529f61793f4c06fec7f0e6ee41bd5aec7733..b27191734b034421686833eced56c880a3d335f7 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -2286,6 +2286,31 @@ All parameter, weight, gradient are variables in Paddle.
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
   m.def("cuda_empty_cache", platform::EmptyCache);
+  m.def("get_device_properties",
+        [](int id) -> const gpuDeviceProp & {
+          return platform::GetDeviceProperties(id);
+        },
+        py::return_value_policy::copy);
+
+  py::class_<gpuDeviceProp>(m, "_gpuDeviceProperties")
+      .def_readonly("name", &gpuDeviceProp::name)
+      .def_readonly("major", &gpuDeviceProp::major)
+      .def_readonly("minor", &gpuDeviceProp::minor)
+      .def_readonly("is_multi_gpu_board", &gpuDeviceProp::isMultiGpuBoard)
+      .def_readonly("is_integrated", &gpuDeviceProp::integrated)
+      .def_readonly("multi_processor_count",
+                    &gpuDeviceProp::multiProcessorCount)
+      .def_readonly("total_memory", &gpuDeviceProp::totalGlobalMem)
+      .def("__repr__", [](const gpuDeviceProp &gpu_device_prop) {
+        std::ostringstream stream;
+        stream << "_gpuDeviceProperties(name='" << gpu_device_prop.name
+               << "', major=" << gpu_device_prop.major
+               << ", minor=" << gpu_device_prop.minor << ", total_memory="
+               << gpu_device_prop.totalGlobalMem / (1024 * 1024)
+               << "MB, multi_processor_count="
+               << gpu_device_prop.multiProcessorCount << ")";
+        return stream.str();
+      });
 
 #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32)
   m.def("nvprof_init", platform::CudaProfilerInit);
diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py
index 4d1934aeed9fb5dfc0d9e2f03ac5c98eb6ab31d1..a559df21ad2413e049ac922f4a12e0e6ae4281ba 100644
--- a/python/paddle/device/cuda/__init__.py
+++ b/python/paddle/device/cuda/__init__.py
@@ -27,6 +27,7 @@ __all__ = [
     'device_count',
     'empty_cache',
     'stream_guard',
+    'get_device_properties',
 ]
 
 
@@ -204,3 +205,69 @@ def stream_guard(stream):
             yield
         finally:
             stream = _set_current_stream(pre_stream)
+
+
+def get_device_properties(device=None):
+    '''
+    Return the properties of given device.
+
+    Args:
+        device(paddle.CUDAPlace or int or str): The device, the id of the device 
+            or the string name of device like 'gpu:x' which to get the properties of 
+            the device from. If device is None, the device is the current device. 
+            Default: None.
+
+    Returns:
+        _gpuDeviceProperties: the properties of the device which include ASCII string 
+        identifying device, major compute capability, minor compute capability, global 
+        memory available on device and the number of multiprocessors on the device.
+
+    Examples:
+    
+        .. code-block:: python
+
+            # required: gpu
+
+            import paddle
+            paddle.device.cuda.get_device_properties()
+            # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108)
+
+            paddle.device.cuda.get_device_properties(0)
+            # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108)
+
+            paddle.device.cuda.get_device_properties('gpu:0')
+            # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108)
+
+            paddle.device.cuda.get_device_properties(paddle.CUDAPlace(0))
+            # _gpuDeviceProperties(name='A100-SXM4-40GB', major=8, minor=0, total_memory=40536MB, multi_processor_count=108)
+
+    '''
+
+    if not core.is_compiled_with_cuda():
+        raise ValueError(
+            "The API paddle.device.cuda.get_device_properties is not supported in "
+            "CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support "
+            "to call this API.")
+
+    if device is not None:
+        if isinstance(device, int):
+            device_id = device
+        elif isinstance(device, core.CUDAPlace):
+            device_id = device.get_device_id()
+        elif isinstance(device, str):
+            if device.startswith('gpu:'):
+                device_id = int(device[4:])
+            else:
+                raise ValueError(
+                    "The current string {} is not expected. Because paddle.device."
+                    "cuda.get_device_properties only support string which is like 'gpu:x'. "
+                    "Please input appropriate string again!".format(device))
+        else:
+            raise ValueError(
+                "The device type {} is not expected. Because paddle.device.cuda."
+                "get_device_properties only support int, str or paddle.CUDAPlace. "
+                "Please input appropriate device again!".format(device))
+    else:
+        device_id = -1
+
+    return core.get_device_properties(device_id)
diff --git a/python/paddle/fluid/tests/unittests/test_get_device_properties.py b/python/paddle/fluid/tests/unittests/test_get_device_properties.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cfb91bfae93e7e9520f565d35aaff878d608007
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_get_device_properties.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import unittest
+from paddle.fluid import core
+from paddle.device.cuda import device_count, get_device_properties
+
+
+class TestGetDeviceProperties(unittest.TestCase):
+    def test_get_device_properties_default(self):
+        if core.is_compiled_with_cuda():
+            props = get_device_properties()
+            self.assertIsNotNone(props)
+
+    def test_get_device_properties_str(self):
+        if core.is_compiled_with_cuda():
+            props = get_device_properties('gpu:0')
+            self.assertIsNotNone(props)
+
+    def test_get_device_properties_int(self):
+        if core.is_compiled_with_cuda():
+            gpu_num = device_count()
+            for i in range(gpu_num):
+                props = get_device_properties(i)
+                self.assertIsNotNone(props)
+
+    def test_get_device_properties_CUDAPlace(self):
+        if core.is_compiled_with_cuda():
+            device = core.CUDAPlace(0)
+            props = get_device_properties(device)
+            self.assertIsNotNone(props)
+
+
+class TestGetDevicePropertiesError(unittest.TestCase):
+    def test_error_api(self):
+        if core.is_compiled_with_cuda():
+
+            def test_device_indexError_error():
+                device_error = device_count() + 1
+                props = get_device_properties(device_error)
+
+            self.assertRaises(IndexError, test_device_indexError_error)
+
+            def test_device_value_error1():
+                device_error = 'gpu1'
+                props = get_device_properties(device_error)
+
+            self.assertRaises(ValueError, test_device_value_error1)
+
+            def test_device_value_error2():
+                device_error = float(device_count())
+                props = get_device_properties(device_error)
+
+            self.assertRaises(ValueError, test_device_value_error2)
+
+
+if __name__ == "__main__":
+    unittest.main()