Advoid CPU -> CPU memory copy when start, end, step is already on CPU. (#29088)

95122ebe · Yiqun Liu · GitHub · d815fbf9 · 95122ebe · 95122ebe
隐藏空白更改
内联并排

Showing with 17 addition and 20 deletion

paddle/fluid/operators/range_op.cu paddle/fluid/operators/range_op.cu +4 -20

paddle/fluid/operators/utils.h paddle/fluid/operators/utils.h +13 -0

未找到文件。
--- a/paddle/fluid/operators/range_op.cu
+++ b/paddle/fluid/operators/range_op.cu
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <algorithm>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/range_op.h"
+#include "paddle/fluid/operators/utils.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
 namespace paddle {
@@ -34,26 +35,9 @@ class CUDARangeKernel : public framework::OpKernel<T> {
    auto* step_t = context.Input<framework::Tensor>("Step");
    auto* out = context.Output<framework::Tensor>("Out");
-    T start, end, step;
+    T start = GetValue<T>(start_t);
-    framework::Tensor n;
+    T end = GetValue<T>(end_t);
-    if (::paddle::platform::is_cpu_place(start_t->place())) {
+    T step = GetValue<T>(step_t);
-      start = start_t->data<T>()[0];
-    } else {
-      framework::TensorCopy(*start_t, platform::CPUPlace(), &n);
-      start = n.data<T>()[0];
-    }
-    if (::paddle::platform::is_cpu_place(end_t->place())) {
-      end = end_t->data<T>()[0];
-    } else {
-      framework::TensorCopy(*end_t, platform::CPUPlace(), &n);
-      end = n.data<T>()[0];
-    }
-    if (::paddle::platform::is_cpu_place(step_t->place())) {
-      step = step_t->data<T>()[0];
-    } else {
-      framework::TensorCopy(*step_t, platform::CPUPlace(), &n);
-      step = n.data<T>()[0];
-    }
    int64_t size = 0;
    GetSize(start, end, step, &size);

--- a/paddle/fluid/operators/utils.h
+++ b/paddle/fluid/operators/utils.h
@@ -108,5 +108,18 @@ inline framework::DDim GetShape(const framework::ExecutionContext& ctx) {
  return framework::make_ddim(vec_shape);
 }
+template <typename T>
+inline T GetValue(const framework::Tensor* x) {
+  T value = static_cast<T>(0);
+  if (!platform::is_cpu_place(x->place())) {
+    framework::Tensor cpu_x;
+    framework::TensorCopy(*x, platform::CPUPlace(), &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x->data<T>()[0];
+  }
+  return value;
+}
 }  // namespace operators
 }  // namespace paddle