From 9a2a4b5f65aa8d191cc07ab60a3d3452c1c8a1aa Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Wed, 14 Oct 2020 20:21:30 +0800
Subject: [PATCH] Support setting xpu place in dygraph mode (#27909)

* support setting xpu place

* add ut, test=kunlun
---
 .../operators/metrics/accuracy_op_xpu.cc      |  4 +-
 paddle/fluid/pybind/pybind.cc                 |  4 ++
 python/paddle/device.py                       | 54 +++++++++++++------
 .../fluid/tests/unittests/test_device.py      | 24 +++++++++
 4 files changed, 67 insertions(+), 19 deletions(-)
diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
index 294539b696..d73e46df34 100644
--- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
+++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc
@@ -81,9 +81,9 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
     memory::Copy(platform::CPUPlace(), label_int64_host,
                  BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
                  label_data, label_int64_size);
-    for (int i = 0; i < num_samples; ++i) {
+    for (size_t i = 0; i < num_samples; ++i) {
       label_int32_host[i] = label_int64_host[i];
-      for (int j = 0; j < class_dim; ++j) {
+      for (size_t j = 0; j < class_dim; ++j) {
         indices_int32_host[i * class_dim + j] =
             indices_int64_host[i * class_dim + j];
       }
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 619133c61e..3d9d204991 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1468,6 +1468,7 @@ All parameter, weight, gradient are variables in Paddle.
              std::exit(-1);
 #endif
            })
+#ifdef PADDLE_WITH_XPU
       .def("_type", &PlaceIndex<platform::XPUPlace>)
       .def("_equals", &IsSamePlace<platform::XPUPlace, platform::Place>)
       .def("_equals", &IsSamePlace<platform::XPUPlace, platform::CUDAPlace>)
@@ -1475,6 +1476,9 @@ All parameter, weight, gradient are variables in Paddle.
       .def("_equals", &IsSamePlace<platform::XPUPlace, platform::XPUPlace>)
       .def("_equals",
            &IsSamePlace<platform::XPUPlace, platform::CUDAPinnedPlace>)
+      .def("get_device_id",
+           [](const platform::XPUPlace &self) { return self.GetDeviceId(); })
+#endif
       .def("__str__", string::to_string<const platform::XPUPlace &>);
 
   py::class_<paddle::platform::CPUPlace>(m, "CPUPlace", R"DOC(
diff --git a/python/paddle/device.py b/python/paddle/device.py
index c2f331caa8..16bb1123e6 100644
--- a/python/paddle/device.py
+++ b/python/paddle/device.py
@@ -103,15 +103,15 @@ def get_cudnn_version():
 
 def set_device(device):
     """
-    Paddle supports running calculations on various types of devices, including CPU and GPU.
+    Paddle supports running calculations on various types of devices, including CPU, GPU and XPU.
     They are represented by string identifiers. This function can specify the global device
     which the OP will run.
 
     Parameters:
         device(str): This parameter determines the specific running device.
-            It can be ``cpu`` or ``gpu:0``. When ``device`` is ``cpu``, the
-            program is running on the cpu. When ``device`` is ``gpu``, the
-            program is running ont the gpu.
+            It can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the 
+            index of the GPUs or XPUs. 
+
     Examples:
 
      .. code-block:: python
@@ -132,20 +132,37 @@ def set_device(device):
                 "The device should not be 'gpu', " \
                 "since PaddlePaddle is not compiled with CUDA")
         place = core.CUDAPlace(ParallelEnv().dev_id)
+    elif lower_device == 'xpu':
+        if not core.is_compiled_with_xpu():
+            raise ValueError(
+                "The device should not be 'xpu', " \
+                "since PaddlePaddle is not compiled with XPU")
+        place = core.XPUPlace(ParallelEnv().dev_id)
     else:
-        avaliable_device = re.match(r'gpu:\d+', lower_device)
-        if not avaliable_device:
+        avaliable_gpu_device = re.match(r'gpu:\d+', lower_device)
+        avaliable_xpu_device = re.match(r'xpu:\d+', lower_device)
+        if not avaliable_gpu_device and not avaliable_xpu_device:
             raise ValueError(
-                "The device must be a string which is like 'cpu', 'gpu' or 'gpu:0'"
+                "The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu' or 'xpu:x'"
             )
-        if not core.is_compiled_with_cuda():
-            raise ValueError(
-                "The device should not be {}, since PaddlePaddle is " \
-                "not compiled with CUDA".format(avaliable_device))
-        device_info_list = device.split(':', 1)
-        device_id = device_info_list[1]
-        device_id = int(device_id)
-        place = core.CUDAPlace(device_id)
+        if avaliable_gpu_device:
+            if not core.is_compiled_with_cuda():
+                raise ValueError(
+                    "The device should not be {}, since PaddlePaddle is " \
+                    "not compiled with CUDA".format(avaliable_gpu_device))
+            device_info_list = device.split(':', 1)
+            device_id = device_info_list[1]
+            device_id = int(device_id)
+            place = core.CUDAPlace(device_id)
+        if avaliable_xpu_device:
+            if not core.is_compiled_with_xpu():
+                raise ValueError(
+                    "The device should not be {}, since PaddlePaddle is " \
+                    "not compiled with XPU".format(avaliable_xpu_device))
+            device_info_list = device.split(':', 1)
+            device_id = device_info_list[1]
+            device_id = int(device_id)
+            place = core.XPUPlace(device_id)
     framework._set_expected_place(place)
     return place
 
@@ -153,8 +170,8 @@ def set_device(device):
 def get_device():
     """
     This funciton can get the current global device of the program is running.
-    It's a string which is like 'cpu' and 'gpu:0'. if the global device is not
-    set, it will return a string which is 'gpu:0' when cuda is avaliable or it 
+    It's a string which is like 'cpu', 'gpu:x' and 'xpu:x'. if the global device is not
+    set, it will return a string which is 'gpu:x' when cuda is avaliable or it 
     will return a string which is 'cpu' when cuda is not avaliable.
 
     Examples:
@@ -173,5 +190,8 @@ def get_device():
     elif isinstance(place, core.CUDAPlace):
         device_id = place.get_device_id()
         device = 'gpu:' + str(device_id)
+    elif isinstance(place, core.XPUPlace):
+        device_id = place.get_device_id()
+        device = 'xpu:' + str(device_id)
 
     return device
diff --git a/python/paddle/fluid/tests/unittests/test_device.py b/python/paddle/fluid/tests/unittests/test_device.py
index 0ab56f9244..195337e80d 100644
--- a/python/paddle/fluid/tests/unittests/test_device.py
+++ b/python/paddle/fluid/tests/unittests/test_device.py
@@ -51,6 +51,19 @@ class TestStaticDeviceManage(unittest.TestCase):
             self.assertEqual(isinstance(exe.place, core.CUDAPlace), True)
             self.assertEqual(device, "gpu:0")
 
+    def test_xpu_device(self):
+        if core.is_compiled_with_xpu():
+            out1 = paddle.zeros(shape=[1, 3], dtype='float32')
+            out2 = paddle.ones(shape=[1, 3], dtype='float32')
+            out3 = paddle.concat(x=[out1, out2], axis=0)
+            paddle.set_device('xpu:0')
+            exe = paddle.fluid.Executor()
+            exe.run(paddle.fluid.default_startup_program())
+            res = exe.run(fetch_list=[out3])
+            device = paddle.get_device()
+            self.assertEqual(isinstance(exe.place, core.XPUPlace), True)
+            self.assertEqual(device, "xpu:0")
+
 
 class TestImperativeDeviceManage(unittest.TestCase):
     def test_cpu(self):
@@ -78,6 +91,17 @@ class TestImperativeDeviceManage(unittest.TestCase):
                                core.CUDAPlace), True)
                 self.assertEqual(device, "gpu:0")
 
+    def test_xpu(self):
+        if core.is_compiled_with_xpu():
+            with fluid.dygraph.guard():
+                out = paddle.to_tensor([1, 2])
+                device = paddle.get_device()
+                self.assertEqual(
+                    isinstance(framework._current_expected_place(),
+                               core.XPUPlace), True)
+                self.assertTrue(out.place.is_xpu_place())
+                self.assertEqual(device, "xpu:0")
+
 
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab