From bfcb5e52350bd63d9ea8b3505ae7914bdd4ee9b4 Mon Sep 17 00:00:00 2001
From: JiabinYang <marsyang199376@gmail.com>
Date: Wed, 19 Dec 2018 13:38:58 +0000
Subject: [PATCH] test=develop, fix gpu compile error on prefetch, and fix
 hs/nce ut failed on gpu

---
 .../fluid/operators/distributed/parameter_prefetch.h   | 10 +++++++---
 .../tests/unittests/test_hsigmoid_remote_table_op.py   |  2 --
 .../fluid/tests/unittests/test_nce_remote_table_op.py  |  2 --
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/operators/distributed/parameter_prefetch.h b/paddle/fluid/operators/distributed/parameter_prefetch.h
index 89671bd74..47d082c4a 100644
--- a/paddle/fluid/operators/distributed/parameter_prefetch.h
+++ b/paddle/fluid/operators/distributed/parameter_prefetch.h
@@ -39,6 +39,9 @@ void prefetch_with_reconstruct(const std::string& id_name,
                                const framework::ExecutionContext& context,
                                const framework::Scope& scope,
                                framework::LoDTensor* original) {
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  auto& actual_ctx = *pool.Get(context.GetPlace());
+
   prefetch(id_name, out_name, table_names, epmap, height_sections, context,
            scope);
   auto& out = scope.FindVar(out_name)->Get<framework::LoDTensor>();
@@ -62,9 +65,10 @@ void prefetch_with_reconstruct(const std::string& id_name,
       PADDLE_THROW("paddle is not compiled with CUDA!");
 #else
       auto stream =
-          static_cast<platform::CUDADeviceContext*>(actual_ctx)->stream();
-      memory::Copy(boost::get<platform::CUDAPlace>(ids.place()), out_rows,
-                   cpu_place, original_row, original_width * sizeof(T), stream);
+          static_cast<platform::CUDADeviceContext*>(&actual_ctx)->stream();
+      memory::Copy(boost::get<platform::CUDAPlace>(ids.place()), original_row,
+                   platform::CPUPlace(), out_rows, original_width * sizeof(T),
+                   stream);
 #endif
     }
   }
diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_remote_table_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_remote_table_op.py
index 9ed6c94bd..da343dd50 100644
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_remote_table_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_remote_table_op.py
@@ -253,8 +253,6 @@ class TestListenAndServOp(unittest.TestCase):
         port1 = self._get_pserver_port(p1.pid)
 
         places = [core.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(core.CUDAPlace(0))
 
         for place in places:
             self._run_hsigmoid_op_one_pserver(place, port0)
diff --git a/python/paddle/fluid/tests/unittests/test_nce_remote_table_op.py b/python/paddle/fluid/tests/unittests/test_nce_remote_table_op.py
index b5f93f93a..cc6f40de8 100644
--- a/python/paddle/fluid/tests/unittests/test_nce_remote_table_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nce_remote_table_op.py
@@ -221,8 +221,6 @@ class TestListenAndServOp(unittest.TestCase):
         port1 = self._get_pserver_port(p1.pid)
 
         places = [core.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(core.CUDAPlace(0))
 
         for place in places:
             self._run_nce_op_two_pserver(place, port0, port1)
-- 
GitLab