From ec2160a6222db2f9dae665432370f7fd6839fdb8 Mon Sep 17 00:00:00 2001
From: OleNet <olenet@126.com>
Date: Mon, 15 Mar 2021 10:51:48 +0800
Subject: [PATCH] [NPU] add range op (#31560)

* add range op

* fix codestyle; call GetSize directly

Co-authored-by: oyjxer <1728722986@qq.com>
---
 paddle/fluid/operators/CMakeLists.txt       | 11 ++-
 paddle/fluid/operators/range_op_npu.cc      | 78 +++++++++++++++++
 paddle/fluid/operators/range_op_npu_test.cc | 95 +++++++++++++++++++++
 3 files changed, 178 insertions(+), 6 deletions(-)
 create mode 100644 paddle/fluid/operators/range_op_npu.cc
 create mode 100644 paddle/fluid/operators/range_op_npu_test.cc
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 0164b0262f..ad4e1cd55f 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -138,8 +138,8 @@ set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COMMON_OP_DEPS})
 set(GLOB_OPERATOR_DEPS ${OPERATOR_DEPS} CACHE INTERNAL "Global Op dependencies")
 
 cc_test(test_common_infer_shape_functions SRCS test_common_infer_shape_functions.cc DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op elementwise_add_op softmax_op softmax)
-cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op)
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
+cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor math_function)
 cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
 cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
@@ -152,16 +152,15 @@ else()
     cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3)
 endif()
 
-# ascend gather_op_npu unittest
-if (WITH_ASCEND_CL)
-    cc_test(gather_op_npu_test SRCS gather_op_npu_test.cc DEPS gather_op tensor op_registry scope device_context enforce executor)
-endif()
-
 cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS})
 if (WITH_PYTHON)
   cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind)
 endif()
 
+if (WITH_ASCEND_CL)
+  cc_test(range_op_npu_test SRCS range_op_npu_test.cc DEPS op_registry range_op scope device_context enforce executor)
+endif()
+
 set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 add_subdirectory(benchmark)
 
diff --git a/paddle/fluid/operators/range_op_npu.cc b/paddle/fluid/operators/range_op_npu.cc
new file mode 100644
index 0000000000..acdc092ade
--- /dev/null
+++ b/paddle/fluid/operators/range_op_npu.cc
@@ -0,0 +1,78 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef PADDLE_WITH_ASCEND_CL
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/operators/range_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+#include "paddle/fluid/operators/utils.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/operators/dropout_op.h"
+#include "paddle/fluid/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+
+template <typename DeviceContext, typename T>
+class RangeNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* start_t = context.Input<framework::Tensor>("Start");
+    auto* end_t = context.Input<framework::Tensor>("End");
+    auto* step_t = context.Input<framework::Tensor>("Step");
+    auto* out = context.Output<framework::Tensor>("Out");
+
+    framework::Tensor n;
+    framework::TensorCopySync(*start_t, platform::CPUPlace(), &n);
+    T start = n.data<T>()[0];
+    framework::TensorCopySync(*end_t, platform::CPUPlace(), &n);
+    T end = n.data<T>()[0];
+    framework::TensorCopySync(*step_t, platform::CPUPlace(), &n);
+    T step = n.data<T>()[0];
+
+    int64_t size = 0;
+    GetSize(start, end, step, &size);
+
+    out->Resize(framework::make_ddim({size}));
+    out->mutable_data<T>(context.GetPlace());
+
+    std::vector<T> odata;
+    T value = start;
+    for (int64_t i = 0; i < size; ++i) {
+      odata.push_back(value);
+      value += step;
+    }
+
+    framework::TensorFromVector(odata, context.device_context(), out);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_NPU_KERNEL(
+    range,
+    ops::RangeNPUKernel<paddle::platform::NPUDeviceContext, int>,
+    ops::RangeNPUKernel<paddle::platform::NPUDeviceContext, float>,
+    ops::RangeNPUKernel<paddle::platform::NPUDeviceContext, double>)
+
+#endif
diff --git a/paddle/fluid/operators/range_op_npu_test.cc b/paddle/fluid/operators/range_op_npu_test.cc
new file mode 100644
index 0000000000..f4ec2fe715
--- /dev/null
+++ b/paddle/fluid/operators/range_op_npu_test.cc
@@ -0,0 +1,95 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <string>
+#include <thread>  // NOLINT
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/operators/dropout_op.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/string/printf.h"
+
+namespace f = paddle::framework;
+namespace p = paddle::platform;
+namespace m = paddle::operators::math;
+
+USE_OP(range);
+USE_OP_DEVICE_KERNEL(range, NPU);
+
+template <typename T>
+void Compare(f::Scope* scope, const p::DeviceContext& ctx,
+             std::string op_type) {
+  // init
+  auto start = scope->Var("Start");
+  auto tensor_start = start->GetMutable<f::LoDTensor>();
+  std::vector<T> init_start;
+  init_start.push_back(static_cast<T>(1));
+  TensorFromVector(init_start, ctx, tensor_start);
+  tensor_start->Resize({1});
+
+  auto end = scope->Var("End");
+  auto tensor_end = end->GetMutable<f::LoDTensor>();
+  std::vector<T> init_end;
+  init_end.push_back(static_cast<T>(10));
+  TensorFromVector(init_end, ctx, tensor_end);
+  tensor_end->Resize({1});
+
+  auto step = scope->Var("Step");
+  auto tensor_step = step->GetMutable<f::LoDTensor>();
+  std::vector<T> init_step;
+  init_step.push_back(static_cast<T>(2));
+  TensorFromVector(init_step, ctx, tensor_step);
+  tensor_step->Resize({1});
+
+  ctx.Wait();
+
+  auto place = ctx.GetPlace();
+  auto out = scope->Var("Out");
+  auto tensor_out = out->GetMutable<f::LoDTensor>();
+
+  // run
+  auto op = f::OpRegistry::CreateOp(op_type, {{"Start", {"Start"}},
+                                              {"End", {"End"}},
+                                              {"Step", {"Step"}}},
+                                             {{"Out", {"Out"}}}, {});
+
+  op->Run(*scope, place);
+
+  std::vector<T> out_vec;
+  TensorToVector(*tensor_out, ctx, &out_vec);
+  ctx.Wait();
+
+  EXPECT_EQ(static_cast<T>(out_vec.size()), static_cast<T>(5));
+  EXPECT_EQ(static_cast<T>(out_vec[0]), static_cast<T>(1.0));
+  EXPECT_EQ(static_cast<T>(out_vec[1]), static_cast<T>(3.0));
+  EXPECT_EQ(static_cast<T>(out_vec[2]), static_cast<T>(5.0));
+  EXPECT_EQ(static_cast<T>(out_vec[3]), static_cast<T>(7.0));
+  EXPECT_EQ(static_cast<T>(out_vec[4]), static_cast<T>(9.0));
+}
+
+
+TEST(range, NPU) {
+    f::Scope scope;
+    p::NPUDeviceContext ctx(p::NPUPlace(0));
+    Compare<int>(&scope, ctx, "range");
+}
+
-- 
GitLab