add helper function to get appropriate DeviceContext (#7066)

* add helper function to get appropriate DeviceContext

add helper function to get appropriate DeviceContext (#7066)
* add helper function to get appropriate DeviceContext
5036cf03 · QI JUN · GitHub · a096c58e · 5036cf03 · 5036cf03
3 changed file
--- a/paddle/framework/data_transform.h
+++ b/paddle/framework/data_transform.h
@@ -27,9 +27,8 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
-using DataTransformFn =
+using DataTransformFn = std::function<void(const platform::DeviceContext* ctx,
-    std::function<void(const std::vector<platform::DeviceContext*> ctx,
+                                           const Variable& in, Variable* out)>;
-                       const Variable& in, Variable* out)>;
 using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
 struct KernelTypePairHash {

--- a/paddle/framework/data_transform_test.cc
+++ b/paddle/framework/data_transform_test.cc
@@ -54,18 +54,18 @@ auto kernel1 = GenFromBit({0, 0, 0, 1});
 auto kernel2 = GenFromBit({0, 0, 1, 0});
 auto kernel3 = GenFromBit({0, 0, 1, 1});
-void TransDataType_t(std::vector<platform::DeviceContext*> ctx,
+void TransDataType_t(const platform::DeviceContext* ctx, const Variable& in,
-                     const Variable& in, Variable* out) {
+                     Variable* out) {
  test_value++;
 }
-void TransDataLayout_t(std::vector<platform::DeviceContext*> ctx,
+void TransDataLayout_t(const platform::DeviceContext* ctx, const Variable& in,
-                       const Variable& in, Variable* out) {
+                       Variable* out) {
  test_value--;
 }
-void TransLibraryType_t(std::vector<platform::DeviceContext*> ctx,
+void TransLibraryType_t(const platform::DeviceContext* ctx, const Variable& in,
-                        const Variable& in, Variable* out) {
+                        Variable* out) {
  test_value += 2;
 }
@@ -83,7 +83,8 @@ TEST(DataTransform, Register) {
  using namespace paddle::platform;
  auto& instance = DataTransformFnMap::Instance();
-  std::vector<DeviceContext*> ctx;
+  ASSERT_EQ(instance.Map().size(), 3UL);
+  DeviceContext* ctx = nullptr;
  paddle::framework::Variable in;
  paddle::framework::Variable out;

--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -384,6 +384,24 @@ class RuntimeInferShapeContext : public InferShapeContext {
  const Scope& scope_;
 };
+const platform::DeviceContext* GetDeviceContext(
+    framework::KernelTypePair& kernel_pair) {
+  auto& actual_kernel_key = kernel_pair.first;
+  auto& expected_kernel_key = kernel_pair.second;
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  if (platform::is_gpu_place(actual_kernel_key.place_) &&
+      platform::is_cpu_place(expected_kernel_key.place_)) {
+    return pool.Get(actual_kernel_key.place_);
+  } else if (platform::is_cpu_place(actual_kernel_key.place_) &&
+             platform::is_gpu_place(expected_kernel_key.place_)) {
+    return pool.Get(expected_kernel_key.place_);
+  } else {
+    PADDLE_THROW(
+        "Currently, model parallelism is only supported between CPU and CUDA");
+  }
+}
 void OperatorWithKernel::Run(const Scope& scope,
                             const platform::Place& place) const {
  RuntimeInferShapeContext infer_shape_ctx(*this, scope);
@@ -418,9 +436,9 @@ void OperatorWithKernel::Run(const Scope& scope,
                      "CPU and other devices. For example, multi-GPU model "
                      "parallelism will failed.");
  } else {
+    auto kernel_pair = std::make_pair(actual_kernel_key, expected_kernel_key);
    const DataTransformFn* trans_fun =
-        DataTransformFnMap::Instance().GetNullable(
+        DataTransformFnMap::Instance().GetNullable(kernel_pair);
-            std::make_pair(actual_kernel_key, expected_kernel_key));
    if (trans_fun) {
      auto input_vars = this->InputVars();
      // TODO(qijun) filter the input vars that do not need to be transformed
@@ -437,22 +455,18 @@ void OperatorWithKernel::Run(const Scope& scope,
      }
      if (!need_trans.empty()) {
-        // TODO(qijun) get appropriate DeviceContext from DeviceContext pool
+        auto trans_dev_ctx = GetDeviceContext(kernel_pair);
-        platform::DeviceContext* trans_dev_ctx = nullptr;
-        std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx};
        // Wait for transform starting
        dev_ctx->Wait();
        for (auto var_name : need_trans) {
-          (*trans_fun)(trans_dev_ctx_vec, *(scope.FindVar(var_name)),
+          (*trans_fun)(trans_dev_ctx, *(scope.FindVar(var_name)),
                       scope.FindVar(var_name + framework::KernelTypeToString(
                                                    expected_kernel_key)));
        }
        // Wait for data transform finishing
-        for (auto ctx : trans_dev_ctx_vec) {
+        trans_dev_ctx->Wait();
-          ctx->Wait();
-        }
      }
    }
  }