From c3bcd3c1e2f2e3a732f61f59db49ec1d15a22191 Mon Sep 17 00:00:00 2001
From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com>
Date: Wed, 8 Jan 2020 20:58:03 +0800
Subject: [PATCH] fix dygraph non zero gpu bug, test=develop (#22165)

---
 .../fluid/memory/allocation/cuda_allocator.cc |  2 +
 .../fluid/memory/allocation/cuda_allocator.h  |  2 +
 .../fluid/tests/unittests/CMakeLists.txt      |  2 +-
 .../test_imperative_using_non_zero_gpu.py     | 38 +++++++++++++++++++
 4 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc
index 6e0677dcb45..154ff1c87aa 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@@ -33,6 +33,8 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
 }
 
 Allocation* CUDAAllocator::AllocateImpl(size_t size) {
+  std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
+
   platform::CUDADeviceGuard guard(place_.device);
   void* ptr;
   auto result = cudaMalloc(&ptr, size);
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h
index 886f6e7a327..ae48d26ceeb 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_allocator.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #pragma once
+#include <mutex>  // NOLINT
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include "paddle/fluid/platform/place.h"
 
@@ -33,6 +34,7 @@ class CUDAAllocator : public Allocator {
 
  private:
   platform::CUDAPlace place_;
+  std::once_flag once_flag_;
 };
 
 }  // namespace allocation
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 7ce08b58c1d..73e0c2394a6 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -330,5 +330,5 @@ set_tests_properties(test_parallel_executor_test_while_train test_parallel_execu
         test_parallel_executor_crf test_sync_batch_norm_op
         test_parallel_executor_feed_persistable_var
         test_parallel_executor_crf_auto_growth test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
-        test_data_norm_op
+        test_data_norm_op test_imperative_using_non_zero_gpu
         test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST")
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
new file mode 100644
index 00000000000..0af8132acfd
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import unittest
+from paddle.fluid.dygraph import to_variable, Embedding, guard
+import numpy as np
+
+
+class TestImperativeUsingNonZeroGpu(unittest.TestCase):
+    def run_main(self, np_arr, place):
+        with guard(place):
+            embedding = Embedding(size=[10, 10])
+            var = to_variable(np_arr)
+            self.assertTrue(np.array_equal(np_arr, var.numpy()))
+
+    def test_non_zero_gpu(self):
+        if not fluid.is_compiled_with_cuda():
+            return
+
+        np_arr = np.random.random([11, 13]).astype('float32')
+        self.run_main(np_arr, fluid.CUDAPlace(1))
+        self.run_main(np_arr, fluid.CUDAPlace(0))
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab