move logical_compute to host and add ut (#3424)

b445941f · zhupengyang · GitHub · 92c6f80b · b445941f · b445941f
7 changed file
--- a/lite/core/op_registry.h
+++ b/lite/core/op_registry.h
@@ -111,18 +111,23 @@ class KernelRegistry final {
              KernelRegistryForTarget<TARGET(kCUDA),
                                      PRECISION(kFloat),
                                      DATALAYOUT(kNHWC)> *,  //
+              KernelRegistryForTarget<TARGET(kCUDA),
+                                      PRECISION(kAny),
+                                      DATALAYOUT(kAny)> *,  //
              KernelRegistryForTarget<TARGET(kCUDA),
                                      PRECISION(kInt8),
                                      DATALAYOUT(kNCHW)> *,  //
              KernelRegistryForTarget<TARGET(kCUDA),
                                      PRECISION(kInt8),
                                      DATALAYOUT(kNHWC)> *,  //
              KernelRegistryForTarget<TARGET(kX86),
                                      PRECISION(kFloat),
                                      DATALAYOUT(kNCHW)> *,  //
              KernelRegistryForTarget<TARGET(kX86),
                                      PRECISION(kInt8),
                                      DATALAYOUT(kNCHW)> *,  //
              KernelRegistryForTarget<TARGET(kHost),
                                      PRECISION(kFloat),
                                      DATALAYOUT(kNCHW)> *,  //
@@ -141,9 +146,7 @@ class KernelRegistry final {
              KernelRegistryForTarget<TARGET(kHost),
                                      PRECISION(kInt64),
                                      DATALAYOUT(kNCHW)> *,  //
-              KernelRegistryForTarget<TARGET(kCUDA),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny)> *,  //
              KernelRegistryForTarget<TARGET(kARM),
                                      PRECISION(kAny),
                                      DATALAYOUT(kAny)> *,  //

--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
@@ -88,7 +88,6 @@ add_kernel(gru_compute_arm ARM extra SRCS gru_compute.cc DEPS ${lite_kernel_deps
 add_kernel(beam_search_decode_compute_arm ARM extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm)
-add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)

--- a/lite/kernels/host/CMakeLists.txt
+++ b/lite/kernels/host/CMakeLists.txt
@@ -8,4 +8,5 @@ add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kerne
 add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(compare_compute_host Host extra SRCS compare_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(logical_compute_host Host extra SRCS logical_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(ctc_align_compute_host Host extra SRCS ctc_align_compute.cc DEPS ${lite_kernel_deps})
--- a/lite/kernels/arm/logical_compute.cc
+++ b/lite/kernels/arm/logical_compute.cc
@@ -12,44 +12,34 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "lite/kernels/arm/logical_compute.h"
+#include "lite/kernels/host/logical_compute.h"
-#include <vector>
-#include "lite/api/paddle_place.h"
-#include "lite/backends/arm/math/funcs.h"
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
 namespace paddle {
 namespace lite {
 namespace kernels {
-namespace arm {
+namespace host {
-#define LOGICAL_FUNCTOR(name, op)                                           \
+#define LOGICAL_FUNCTOR(name, op)                                \
-  template <typename T>                                                     \
+  struct _##name##Functor {                                      \
-  struct _##name##Functor {                                                 \
+    inline bool operator()(const bool& a, const bool& b) const { \
-    inline bool operator()(const T& a, const T& b) const { return a op b; } \
+      return a op b;                                             \
+    }                                                            \
  };
 LOGICAL_FUNCTOR(LogicalAnd, &&);
 LOGICAL_FUNCTOR(LogicalOr, ||);
-template <typename T>
 struct _LogicalXorFunctor {
-  inline bool operator()(const T& a, const T& b) const {
+  inline bool operator()(const bool& a, const bool& b) const {
    return (a || b) && !(a && b);
  }
 };
-template <typename T>
 struct _LogicalNotFunctor {
-  inline bool operator()(const T& a) const { return !a; }
+  inline bool operator()(const bool& a) const { return !a; }
 };
-// template<typename Functor>
+template <class Functor>
-template <template <typename T> class Functor>
-void BinaryLogicalCompute<Functor>::PrepareForRun() {}
-template <template <typename T> class Functor>
 // template<typename Functor>
 void BinaryLogicalCompute<Functor>::Run() {
  auto& param = this->Param<operators::LogicalParam>();
@@ -57,72 +47,103 @@ void BinaryLogicalCompute<Functor>::Run() {
  bool* z = param.Out->template mutable_data<bool>();
  const bool* x = param.X->template data<bool>();
  const bool* y = param.Y->template data<bool>();
-  using LogicalFunctor = Functor<bool>;
  for (int i = 0; i < count; ++i) {
-    z[i] = LogicalFunctor()(x[i], y[i]);
+    z[i] = Functor()(x[i], y[i]);
  }
 }
-template <template <typename> class Functor>
+template <class Functor>
-void UnaryLogicalCompute<Functor>::PrepareForRun() {}
-template <template <typename> class Functor>
 void UnaryLogicalCompute<Functor>::Run() {
  auto& param = this->Param<operators::LogicalParam>();
  const size_t count = param.X->numel();
  bool* z = param.Out->template mutable_data<bool>();
  const auto x = param.X->template data<bool>();
-  using LogicalFunctor = Functor<bool>;
  for (int i = 0; i < count; ++i) {
-    z[i] = LogicalFunctor()(x[i]);
+    z[i] = Functor()(x[i]);
  }
 }
-}  // namespace arm
+}  // namespace host
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
 REGISTER_LITE_KERNEL(logical_xor,
-                     kARM,
+                     kHost,
-                     kFloat,
+                     kAny,
-                     kNCHW,
+                     kAny,
-                     paddle::lite::kernels::arm::BinaryLogicalCompute<
+                     paddle::lite::kernels::host::BinaryLogicalCompute<
-                         paddle::lite::kernels::arm::_LogicalXorFunctor>,
+                         paddle::lite::kernels::host::_LogicalXorFunctor>,
                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+    .BindInput("X",
-    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+               {LiteType::GetTensorTy(TARGET(kHost),
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
    .Finalize();
 REGISTER_LITE_KERNEL(logical_and,
-                     kARM,
+                     kHost,
-                     kFloat,
+                     kAny,
-                     kNCHW,
+                     kAny,
-                     paddle::lite::kernels::arm::BinaryLogicalCompute<
+                     paddle::lite::kernels::host::BinaryLogicalCompute<
-                         paddle::lite::kernels::arm::_LogicalAndFunctor>,
+                         paddle::lite::kernels::host::_LogicalAndFunctor>,
                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+    .BindInput("X",
-    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+               {LiteType::GetTensorTy(TARGET(kHost),
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
    .Finalize();
 REGISTER_LITE_KERNEL(logical_or,
-                     kARM,
+                     kHost,
-                     kFloat,
+                     kAny,
-                     kNCHW,
+                     kAny,
-                     paddle::lite::kernels::arm::BinaryLogicalCompute<
+                     paddle::lite::kernels::host::BinaryLogicalCompute<
-                         paddle::lite::kernels::arm::_LogicalOrFunctor>,
+                         paddle::lite::kernels::host::_LogicalOrFunctor>,
                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+    .BindInput("X",
-    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+               {LiteType::GetTensorTy(TARGET(kHost),
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Y",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
    .Finalize();
 REGISTER_LITE_KERNEL(logical_not,
-                     kARM,
+                     kHost,
-                     kFloat,
+                     kAny,
-                     kNCHW,
+                     kAny,
-                     paddle::lite::kernels::arm::UnaryLogicalCompute<
+                     paddle::lite::kernels::host::UnaryLogicalCompute<
-                         paddle::lite::kernels::arm::_LogicalNotFunctor>,
+                         paddle::lite::kernels::host::_LogicalNotFunctor>,
                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+    .BindInput("X",
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
    .Finalize();
--- a/lite/kernels/arm/logical_compute.h
+++ b/lite/kernels/arm/logical_compute.h
@@ -13,41 +13,33 @@
 // limitations under the License.
 #pragma once
-#include <stdint.h>
-#include "lite/backends/arm/math/type_trans.h"
 #include "lite/core/kernel.h"
-#include "lite/operators/logical_op.h"
+#include "lite/core/op_registry.h"
 namespace paddle {
 namespace lite {
 namespace kernels {
-namespace arm {
+namespace host {
-// template <typename Functor>
-template <template <typename> class Functor>
+template <class Functor>
 class BinaryLogicalCompute
-    : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+    : public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
 public:
-  using param_t = operators::LogicalParam;
-  void PrepareForRun() override;
  void Run() override;
  ~BinaryLogicalCompute() {}
 };
-template <template <typename> class Functor>
+template <class Functor>
-class UnaryLogicalCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+class UnaryLogicalCompute
+    : public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
 public:
-  using param_t = operators::LogicalParam;
-  void PrepareForRun() override;
  void Run() override;
  ~UnaryLogicalCompute() {}
 };
-}  // namespace arm
+}  // namespace host
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -20,7 +20,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LIT
    #lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    #lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
-    #lite_cc_test(test_kernel_logical_xor_compute SRCS logical_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})

--- a/lite/tests/kernels/logical_compute_test.cc
+++ b/lite/tests/kernels/logical_compute_test.cc
@@ -20,86 +20,118 @@
 namespace paddle {
 namespace lite {
-bool _logical_xor_func(const bool& a, const bool& b) {
+struct _logical_and_func {
-  return (a || b) && !(a && b);
+  inline bool operator()(const bool& a, const bool& b) const { return a && b; }
-}
+};
-bool _logical_and_func(const bool& a, const bool& b) { return (a && b); }
-template <bool (*T)(const bool&, const bool&)>
+struct _logical_or_func {
-class LogicalXorTester : public arena::TestCase {
+  inline bool operator()(const bool& a, const bool& b) const { return a || b; }
+};
+struct _logical_xor_func {
+  inline bool operator()(const bool& a, const bool& b) const {
+    return (a || b) && !(a && b);
+  }
+};
+struct _logical_not_func {
+  inline bool operator()(const bool& a, const bool& b) const { return !a; }
+};
+template <class Functor>
+class LogicalTester : public arena::TestCase {
 protected:
-  std::string input_x_ = "x";
+  std::string op_type_ = "logical_xor";
-  std::string input_y_ = "y";
+  std::string x_ = "x";
-  std::string output_ = "out";
+  std::string y_ = "y";
-  DDim dims_{{3, 5, 4, 4}};
+  std::string out_ = "out";
+  DDim dims_{{2, 3, 4, 5}};
 public:
-  LogicalXorTester(const Place& place, const std::string& alias, DDim dims)
+  LogicalTester(const Place& place,
-      : TestCase(place, alias), dims_(dims) {}
+                const std::string& alias,
+                const std::string& op_type)
+      : TestCase(place, alias), op_type_(op_type) {}
  void RunBaseline(Scope* scope) override {
-    auto* out = scope->NewTensor(output_);
+    auto* x = scope->FindTensor(x_);
-    CHECK(out);
+    const bool* x_data = x->data<bool>();
+    const Tensor* y = nullptr;
+    const bool* y_data = nullptr;
+    if (op_type_ != "logical_not") {
+      y = scope->FindTensor(y_);
+      y_data = y->data<bool>();
+    }
+    auto* out = scope->NewTensor(out_);
    out->Resize(dims_);
    bool* out_data = out->mutable_data<bool>();
-    auto* x = scope->FindTensor(input_x_);
-    const bool* x_data = x->data<bool>();
-    auto* y = scope->FindTensor(input_y_);
-    const bool* y_data = y->data<bool>();
    for (int i = 0; i < dims_.production(); i++) {
-      // out_data[i] = (x_data[i] || y_data[i]) && !((x_data[i] && y_data[i]));
+      bool y_tmp = (y_data == nullptr) ? true : y_data[i];
-      out_data[i] = T(x_data[i], y_data[i]);
+      out_data[i] = Functor()(x_data[i], y_tmp);
    }
  }
  void PrepareOpDesc(cpp::OpDesc* op_desc) {
-    op_desc->SetType("logical_xor");
+    op_desc->SetType(op_type_);
-    op_desc->SetInput("X", {input_x_});
+    op_desc->SetInput("X", {x_});
-    op_desc->SetInput("Y", {input_y_});
+    if (op_type_ != "logical_not") {
-    op_desc->SetOutput("Out", {output_});
+      op_desc->SetInput("Y", {y_});
+    }
+    op_desc->SetOutput("Out", {out_});
  }
  void PrepareData() override {
-    // std::vector<bool> data(dims_.production());
+    bool* dx = new bool[dims_.production()];
-    // std::vector<char> datay(dims_.production());
+    for (int64_t i = 0; i < dims_.production(); i++) {
-    bool* data;
+      dx[i] = (i % 3 == 0);
-    bool* datay;
-    data = reinterpret_cast<bool*>(malloc(dims_.production() * sizeof(bool)));
-    datay = reinterpret_cast<bool*>(malloc(dims_.production() * sizeof(bool)));
-    LOG(INFO) << "dims_.production()"
-              << ":::" << dims_.production();
-    for (int i = 0; i < dims_.production(); i++) {
-      data[i] = 1;
-      datay[i] = 1;
    }
+    SetCommonTensor(x_, dims_, dx);
+    delete dx;
-    SetCommonTensor(input_x_, dims_, data);
+    if (op_type_ != "logical_not") {
-    SetCommonTensor(input_y_, dims_, datay);
+      bool* dy = new bool[dims_.production()];
+      for (int64_t i = 0; i < dims_.production(); i++) {
+        dy[i] = (i % 2 == 0);
+      }
+      SetCommonTensor(y_, dims_, dy);
+      delete dy;
+    }
  }
 };
-void test_logical(Place place) {
+void TestLogical(Place place, float abs_error) {
-  DDimLite dims{{3, 5, 4, 4}};
+  std::unique_ptr<arena::TestCase> logical_and_tester(
-  std::unique_ptr<arena::TestCase> logical_xor_tester(
+      new LogicalTester<_logical_and_func>(place, "def", "logical_and"));
-      new LogicalXorTester<_logical_xor_func>(place, "def", dims));
+  arena::Arena arena_and(std::move(logical_and_tester), place, abs_error);
-  arena::Arena arena_xor(std::move(logical_xor_tester), place, 1);
+  arena_and.TestPrecision();
-  arena_xor.TestPrecision();
+  std::unique_ptr<arena::TestCase> logical_or_tester(
+      new LogicalTester<_logical_or_func>(place, "def", "logical_or"));
+  arena::Arena arena_or(std::move(logical_or_tester), place, abs_error);
+  arena_or.TestPrecision();
-  std::unique_ptr<arena::TestCase> logical_and_tester(
+  std::unique_ptr<arena::TestCase> logical_xor_tester(
-      new LogicalXorTester<_logical_and_func>(place, "def", dims));
+      new LogicalTester<_logical_xor_func>(place, "def", "logical_xor"));
-  arena::Arena arena_and(std::move(logical_and_tester), place, 1);
+  arena::Arena arena_xor(std::move(logical_xor_tester), place, abs_error);
+  arena_xor.TestPrecision();
-  arena_and.TestPrecision();
+  std::unique_ptr<arena::TestCase> logical_not_tester(
+      new LogicalTester<_logical_not_func>(place, "def", "logical_not"));
+  arena::Arena arena_not(std::move(logical_not_tester), place, abs_error);
+  arena_not.TestPrecision();
 }
 TEST(Logical, precision) {
-// #ifdef LITE_WITH_X86
+  Place place;
-// //   Place place(TARGET(kX86));
+  float abs_error = 1e-5;
-// // #endif
+#if defined(LITE_WITH_ARM)
-#ifdef LITE_WITH_ARM
+  place = TARGET(kHost);
-  Place place(TARGET(kARM));
+#else
-  test_logical(place);
+  return;
 #endif
+  TestLogical(place, abs_error);
 }
 }  // namespace lite