[ARM] add expand_as op kernel,test=develop (#4047)

4bdeabb8 · Leonardo-Ding · GitHub · b2e7d97c · 4bdeabb8 · 4bdeabb8
10 changed file
--- a/lite/api/model_test.cc
+++ b/lite/api/model_test.cc
@@ -25,6 +25,8 @@
 #include "lite/core/profile/basic_profiler.h"
 #endif  // LITE_WITH_PROFILE
 #include <gflags/gflags.h>
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"

 using paddle::lite::profile::Timer;


--- a/lite/kernels/host/CMakeLists.txt
+++ b/lite/kernels/host/CMakeLists.txt
@@ -7,6 +7,7 @@ add_kernel(squeeze_compute_host Host basic SRCS squeeze_compute.cc DEPS ${lite_k
 add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(expand_as_compute_host Host basic SRCS expand_as_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})

--- a/lite/kernels/host/expand_as_compute.cc
+++ b/lite/kernels/host/expand_as_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/expand_as_compute.h"
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T, PrecisionType PType>
+void ExpandAsCompute<T, PType>::Run() {
+  auto& param = this->template Param<operators::ExpandAsParam>();
+  const auto* x = param.X;
+  auto* out = param.Out;
+  const auto* target = param.Target;
+  std::vector<int> expand_times;
+  const T* src = x->template data<T>();
+  T* dst = out->template mutable_data<T>();
+
+  // int dims = expand_times.size();
+  for (int i = 0; i < target->dims().size(); ++i) {
+    int times = target->dims()[i] / x->dims()[i];
+    expand_times.push_back(times);
+  }
+  int dims = target->dims().size();
+  DDim in_shape = x->dims();
+
+  int inner_num = 1;
+  int pos = dims - 1;
+  int outer_num = in_shape.count(0, pos);
+  inner_num *= in_shape[pos];
+  for (int j = 0; j < outer_num; ++j) {
+    for (int k = 0; k < expand_times[pos]; ++k) {
+      memcpy(dst + (j * expand_times[pos] + k) * inner_num,
+             src + j * inner_num,
+             sizeof(T) * inner_num);
+    }
+  }
+  inner_num *= expand_times[pos];
+  for (int i = dims - 2; i >= 0; --i) {
+    int outer_num = in_shape.count(0, i);
+    inner_num *= in_shape[i];
+    for (int j = outer_num - 1; j >= 0; --j) {
+      for (int k = expand_times[i] - 1; k >= 0; --k) {
+        memcpy(dst + (j * expand_times[i] + k) * inner_num,
+               dst + j * inner_num,
+               sizeof(T) * inner_num);
+      }
+    }
+    inner_num *= expand_times[i];
+  }
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+using expand_as_float =
+    paddle::lite::kernels::host::ExpandAsCompute<float, PRECISION(kFloat)>;
+REGISTER_LITE_KERNEL(expand_as, kHost, kFloat, kAny, expand_as_float, def)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kFloat),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Target",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kFloat),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kFloat),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
--- a/lite/kernels/host/expand_as_compute.h
+++ b/lite/kernels/host/expand_as_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T, PrecisionType PType>
+class ExpandAsCompute
+    : public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
+ public:
+  void Run() override;
+
+  virtual ~ExpandAsCompute() = default;
+};
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/operators/CMakeLists.txt
+++ b/lite/operators/CMakeLists.txt
@@ -34,6 +34,7 @@ add_operator(fake_quant extra SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op
 add_operator(fake_dequant extra SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
 add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS})
 add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS})
+add_operator(expand_as_op_lite basic SRCS expand_as_op.cc DEPS ${op_DEPS})
 add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS})
 add_operator(unsqueeze_op_lite basic SRCS unsqueeze_op.cc DEPS ${op_DEPS})
 add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS})

--- a/lite/operators/expand_as_op.cc
+++ b/lite/operators/expand_as_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/expand_as_op.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool ExpandAsOpLite::CheckShape() const {
+  CHECK_OR_FALSE(param_.X);
+  CHECK_OR_FALSE(param_.Target);
+  CHECK_OR_FALSE(param_.Out);
+  int target_size = param_.Target->dims().size();
+  int x_dims_size = param_.X->dims().size();
+  CHECK_EQ(target_size, x_dims_size)
+      << "The number of expand_times size must be qual to the rank of "
+         "Input(X).";
+  CHECK_LE(param_.X->dims().size(), 6u)
+      << "The rank of Input(X) must not be greater than 6.";
+  return true;
+}
+
+bool ExpandAsOpLite::InferShapeImpl() const {
+  DDim out_dims(param_.X->dims());
+  for (size_t i = 0; i < param_.Target->dims().size(); ++i) {
+    // out_dims[i] *= param_.expand_times[i];
+    out_dims[i] = param_.Target->dims()[i];
+  }
+  param_.Out->Resize(out_dims);
+  return true;
+}
+
+bool ExpandAsOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
+  auto X_name = opdesc.Input("X").front();
+  auto Out_name = opdesc.Output("Out").front();
+  param_.X = GetVar<lite::Tensor>(scope, X_name);
+  param_.Out = GetMutableVar<lite::Tensor>(scope, Out_name);
+  auto Target_name = opdesc.Input("Target").front();
+  param_.Target = GetVar<lite::Tensor>(scope, Target_name);
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(expand_as, paddle::lite::operators::ExpandAsOpLite);
--- a/lite/operators/expand_as_op.h
+++ b/lite/operators/expand_as_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <string>
+#include <vector>
+#include "lite/core/op_lite.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+class ExpandAsOpLite : public OpLite {
+ public:
+  ExpandAsOpLite() {}
+  explicit ExpandAsOpLite(const std::string &op_type) : OpLite(op_type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShapeImpl() const override;
+
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+  std::string DebugString() const override { return "expand_as"; }
+
+ private:
+  mutable ExpandAsParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
--- a/lite/operators/op_params.h
+++ b/lite/operators/op_params.h
@@ -1287,6 +1287,13 @@ struct ExpandParam : ParamBase {
  std::vector<int> expand_times{};
 };

+/// ----------------------- expand as operators ----------------------
+struct ExpandAsParam : ParamBase {
+  const lite::Tensor* X{};
+  const lite::Tensor* Target{};
+  lite::Tensor* Out{};
+};
+
 /// ----------------------- matmul operators ----------------------
 struct MatMulParam : ParamBase {
  const lite::Tensor* X{};

--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -86,6 +86,7 @@ endif()
    lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    #lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
 endif()
--- a/lite/tests/kernels/expand_as_compute_test.cc
+++ b/lite/tests/kernels/expand_as_compute_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/core/arena/framework.h"
+
+namespace paddle {
+namespace lite {
+
+class ExpandAsComputeTester : public arena::TestCase {
+ protected:
+  // common attributes for this op.
+  std::string x_ = "X";
+  std::string out_ = "Out";
+  std::string target_ = "Target";
+  DDim dims_;
+  DDim target_dims_;
+
+ public:
+  ExpandAsComputeTester(const Place& place,
+                        const std::string& alias,
+                        DDim dims,
+                        DDim target_dims)
+      : TestCase(place, alias), dims_(dims), target_dims_(target_dims) {}
+
+  void RunBaseline(Scope* scope) override {
+    const auto* input = scope->FindTensor(x_);
+    CHECK(input);
+    auto* out = scope->NewTensor(out_);
+    CHECK(out);
+    const auto* target = scope->FindTensor(target_);
+
+    DDim out_shape(input->dims());
+    DDim in_shape = input->dims();
+
+    std::vector<int> expand_times_;
+    for (size_t i = 0; i < target->dims().size(); ++i) {
+      int times = target->dims()[i] / input->dims()[i];
+      expand_times_.push_back(times);
+    }
+    for (size_t i = 0; i < expand_times_.size(); ++i) {
+      out_shape[i] *= expand_times_[i];
+    }
+    out->Resize(out_shape);
+    float* out_data = out->mutable_data<float>();
+    const float* input_data = input->data<float>();
+    std::vector<int> in_stride(in_shape.size(), 1),
+        out_stride(out_shape.size(), 1);
+    for (int i = in_shape.size() - 2; i >= 0; --i) {
+      in_stride[i] = in_shape[i + 1] * in_stride[i + 1];
+    }
+    for (int i = out_shape.size() - 2; i >= 0; --i) {
+      out_stride[i] = out_shape[i + 1] * out_stride[i + 1];
+    }
+    for (size_t out_id = 0; out_id < out_shape.production(); ++out_id) {
+      int in_id = 0;
+      for (int i = expand_times_.size() - 1; i >= 0; --i) {
+        int in_j = (out_id / out_stride[i]) % in_shape[i];
+        in_id += in_j * in_stride[i];
+      }
+      out_data[out_id] = input_data[in_id];
+    }
+  }
+
+  void PrepareOpDesc(cpp::OpDesc* op_desc) {
+    op_desc->SetType("expand_as");
+    op_desc->SetInput("X", {x_});
+    op_desc->SetInput("Target", {target_});
+    op_desc->SetOutput("Out", {out_});
+  }
+
+  void PrepareData() override {
+    std::vector<float> in_data(dims_.production());
+    std::vector<float> target_data(target_dims_.production());
+    for (int i = 0; i < dims_.production(); ++i) {
+      in_data[i] = i;
+    }
+    for (int i = 0; i < target_dims_.production(); ++i) {
+      target_data[i] = i;
+    }
+    SetCommonTensor(x_, dims_, in_data.data());
+    SetCommonTensor(target_, target_dims_, target_data.data());
+  }
+};
+
+void test_expand_as_3dim(Place place, float abs_error) {
+  for (int C : {3}) {
+    for (int H : {2}) {
+      for (int W : {4}) {
+        std::unique_ptr<arena::TestCase> tester(new ExpandAsComputeTester(
+            place, "def", DDim({C, H, W}), DDim({C * 2, H * 3, W * 1})));
+        arena::Arena arena(std::move(tester), place, abs_error);
+        arena.TestPrecision();
+      }
+    }
+  }
+}
+
+void test_expand_as_4dim(Place place, float abs_error) {
+  for (int N : {2}) {
+    for (int C : {3}) {
+      for (int H : {2}) {
+        for (int W : {4}) {
+          std::unique_ptr<arena::TestCase> tester(
+              new ExpandAsComputeTester(place,
+                                        "def",
+                                        DDim({N, C, H, W}),
+                                        DDim({N * 2, C * 3, H * 1, W * 4})));
+          arena::Arena arena(std::move(tester), place, abs_error);
+          arena.TestPrecision();
+        }
+      }
+    }
+  }
+}
+
+TEST(ExpandAs, precision) {
+  float abs_error = 1e-5;
+  Place place;
+#if defined(LITE_WITH_NPU)
+  place = TARGET(kNPU);
+  abs_error = 1e-2;  // Using fp16 in NPU
+#elif defined(LITE_WITH_ARM)
+  place = TARGET(kHost);
+#elif defined(LITE_WITH_X86)
+  place = TARGET(kHost);
+#else
+  return;
+#endif
+
+  test_expand_as_3dim(place, abs_error);
+  test_expand_as_4dim(place, abs_error);
+}
+
+}  // namespace lite
+}  // namespace paddle