optimize lod_reset op to avoid data transform

698b8b73 · Zhang Ting · GitHub · f0b15184 · 698b8b73 · 698b8b73
隐藏空白更改
内联并排

Showing with 10 addition and 1 deletion

paddle/fluid/operators/lod_reset_op.cc paddle/fluid/operators/lod_reset_op.cc +9 -0

paddle/fluid/operators/lod_reset_op.h paddle/fluid/operators/lod_reset_op.h +1 -1

未找到文件。
--- a/paddle/fluid/operators/lod_reset_op.cc
+++ b/paddle/fluid/operators/lod_reset_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/lod_reset_op.h"
 #include <memory>
+#include <string>
 namespace paddle {
 namespace operators {
@@ -50,6 +51,14 @@ class LoDResetOp : public framework::OperatorWithKernel {
        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
        ctx.device_context());
  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const framework::Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   expected_kernel_type.place_,
+                                   tensor.layout());
+  }
 };
 class LoDResetOpVarTypeInference : public framework::VarTypeInference {

--- a/paddle/fluid/operators/lod_reset_op.h
+++ b/paddle/fluid/operators/lod_reset_op.h
@@ -45,7 +45,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
        return;  // early return, since lod already set
      } else {
        auto* lod = lod_t->data<int>();
-        if (platform::is_gpu_place(ctx.GetPlace())) {
+        if (platform::is_gpu_place(lod_t->place())) {
          framework::Tensor lod_cpu;
          framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
          lod = lod_cpu.data<int>();