Add clip op, test=develop (#3840)

d9c3ae01 · cc · GitHub · 13072661 · d9c3ae01 · d9c3ae01
13 changed file
--- a/lite/backends/arm/math/CMakeLists.txt
+++ b/lite/backends/arm/math/CMakeLists.txt
@@ -127,5 +127,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
      split_merge_lod_tenosr.cc
      reduce_prod.cc
      lstm.cc
+      clip.cc
      DEPS ${lite_kernel_deps} context tensor)
 endif()
--- a/lite/backends/arm/math/clip.cc
+++ b/lite/backends/arm/math/clip.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/backends/arm/math/clip.h"
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include "lite/backends/arm/math/funcs.h"
+#include "lite/backends/arm/math/saturate.h"
+
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+
+void clip_kernel_fp32(
+    const float* input, int64_t num, float min, float max, float* output) {
+  float tmp;
+  for (int64_t i = 0; i < num; i++) {
+    tmp = *input;
+    tmp = tmp > min ? tmp : min;
+    *output = tmp < max ? tmp : max;
+    input++;
+    output++;
+  }
+}
+
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
--- a/lite/backends/arm/math/clip.h
+++ b/lite/backends/arm/math/clip.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "lite/operators/op_params.h"
+#include "lite/utils/cp_logging.h"
+
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+
+void clip_kernel_fp32(
+    const float* input, int64_t num, float min, float max, float* output);
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
--- a/lite/backends/arm/math/funcs.h
+++ b/lite/backends/arm/math/funcs.h
@@ -25,6 +25,7 @@
 #include "lite/backends/arm/math/axpy.h"
 #include "lite/backends/arm/math/beam_search.h"
 #include "lite/backends/arm/math/box_coder.h"
+#include "lite/backends/arm/math/clip.h"
 #include "lite/backends/arm/math/col_im_transform.h"
 #include "lite/backends/arm/math/concat.h"
 #include "lite/backends/arm/math/conv_block_utils.h"

--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
@@ -78,6 +78,7 @@ add_kernel(assign_value_compute_arm ARM basic SRCS assign_value_compute.cc DEPS
 add_kernel(conditional_block_compute_arm ARM extra SRCS conditional_block_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm)
 add_kernel(distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm)
+add_kernel(clip_compute_arm ARM extra SRCS clip_compute.cc DEPS ${lite_kernel_deps} math_arm)

 # for OCR specific
 add_kernel(gru_unit_compute_arm ARM extra SRCS gru_unit_compute.cc DEPS ${lite_kernel_deps} math_arm)

--- a/lite/kernels/arm/clip_compute.cc
+++ b/lite/kernels/arm/clip_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/arm/clip_compute.h"
+#include <string>
+#include <vector>
+#include "lite/backends/arm/math/funcs.h"
+#include "lite/core/op_registry.h"
+#include "lite/core/tensor.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+void ClipCompute::Run() {
+  auto& param = Param<operators::ClipParam>();
+  lite::Tensor* x = param.x;
+  lite::Tensor* min_tensor = param.min_tensor;
+  lite::Tensor* max_tensor = param.max_tensor;
+  lite::Tensor* out = param.out;
+  float min = param.min;
+  float max = param.max;
+
+  if (min_tensor != nullptr) {
+    min = min_tensor->data<float>()[0];
+  }
+  if (max_tensor != nullptr) {
+    max = max_tensor->data<float>()[0];
+  }
+
+  const float* x_ptr = x->data<float>();
+  float* out_ptr = out->mutable_data<float>();
+  int64_t num = x->numel();
+  lite::arm::math::clip_kernel_fp32(x_ptr, num, min, max, out_ptr);
+  return;
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+    clip, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ClipCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Min", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Max", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/lite/kernels/arm/clip_compute.h
+++ b/lite/kernels/arm/clip_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/clip_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class ClipCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::ClipParam;
+
+  void Run() override;
+
+  virtual ~ClipCompute() = default;
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/operators/CMakeLists.txt
+++ b/lite/operators/CMakeLists.txt
@@ -112,6 +112,7 @@ add_operator(crf_decoding_op_lite extra SRCS crf_decoding_op.cc DEPS ${op_DEPS})
 add_operator(ctc_align_op_lite extra SRCS ctc_align_op.cc DEPS ${op_DEPS})
 add_operator(max_pool_with_index_op extra SRCS max_pool_with_index_op.cc DEPS ${op_DEPS})
 add_operator(pixel_shuffle_op extra SRCS pixel_shuffle_op.cc DEPS ${op_DEPS})
+add_operator(clip_op extra SRCS clip_op.cc DEPS ${op_DEPS})

 # for OCR specific
 add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})

--- a/lite/operators/clip_op.cc
+++ b/lite/operators/clip_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/clip_op.h"
+#include "lite/core/op_lite.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool ClipOpLite::CheckShape() const {
+  CHECK_OR_FALSE(param_.x);
+  CHECK_OR_FALSE(param_.out);
+  return true;
+}
+
+bool ClipOpLite::InferShapeImpl() const {
+  param_.out->Resize(param_.x->dims());
+  param_.out->set_lod(param_.x->lod());
+  return true;
+}
+
+bool ClipOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
+  AttachInput(op_desc, scope, "X", false, &param_.x);
+  AttachInput(op_desc, scope, "Min", true, &param_.min_tensor);
+  AttachInput(op_desc, scope, "Max", true, &param_.max_tensor);
+  AttachOutput(op_desc, scope, "Out", false, &param_.out);
+
+  param_.min = op_desc.GetAttr<float>("min");
+  param_.max = op_desc.GetAttr<float>("max");
+
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(clip, paddle::lite::operators::ClipOpLite);
--- a/lite/operators/clip_op.h
+++ b/lite/operators/clip_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <string>
+#include "lite/core/op_lite.h"
+#include "lite/core/scope.h"
+#include "lite/operators/op_params.h"
+#include "lite/utils/all.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+class ClipOpLite : public OpLite {
+ public:
+  ClipOpLite() {}
+
+  explicit ClipOpLite(const std::string &op_type) : OpLite(op_type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShapeImpl() const override;
+
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+
+  std::string DebugString() const override { return "clip"; }
+
+ private:
+  mutable ClipParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
--- a/lite/operators/op_params.h
+++ b/lite/operators/op_params.h
@@ -1602,6 +1602,15 @@ struct WhereIndexParam : ParamBase {
  lite::Tensor* output{nullptr};
 };

+struct ClipParam : ParamBase {
+  Tensor* x{};
+  Tensor* min_tensor{};
+  Tensor* max_tensor{};
+  Tensor* out{};
+  float min{};
+  float max{};
+};
+
 }  // namespace operators
 }  // namespace lite
 }  // namespace paddle
--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -63,6 +63,7 @@ if(LITE_BUILD_EXTRA)
    lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})

    # for training kernel
    if (LITE_WITH_TRAIN)

--- a/lite/tests/kernels/clip_compute_test.cc
+++ b/lite/tests/kernels/clip_compute_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/core/arena/framework.h"
+
+namespace paddle {
+namespace lite {
+
+class ClipComputeTester : public arena::TestCase {
+ protected:
+  // common attributes for this op.
+  std::string x_ = "x";
+  std::string out_ = "out";
+  std::string min_tensor_ = "min_tensor";
+  std::string max_tensor_ = "max_tensor";
+  float min_{};
+  float max_{};
+  bool use_minmax_tensor_{};
+  DDim x_dims_;
+
+ public:
+  ClipComputeTester(const Place& place,
+                    const std::string& alias,
+                    int n,
+                    int c,
+                    int h,
+                    int w,
+                    float min,
+                    float max,
+                    bool use_minmax_tensor)
+      : TestCase(place, alias) {
+    x_dims_ = DDim(std::vector<int64_t>({n, c, h, w}));
+    min_ = min;
+    max_ = max;
+    use_minmax_tensor_ = use_minmax_tensor;
+  }
+
+  void RunBaseline(Scope* scope) override {
+    auto* x = scope->FindTensor(x_);
+    auto* out = scope->NewTensor(out_);
+    CHECK(out);
+    out->Resize(x->dims());
+    const auto* x_data = x->data<float>();
+    auto* out_data = out->mutable_data<float>();
+
+    for (int i = 0; i < x->numel(); i++) {
+      if (x_data[i] < min_)
+        out_data[i] = min_;
+      else if (x_data[i] > max_)
+        out_data[i] = max_;
+      else
+        out_data[i] = x_data[i];
+    }
+  }
+
+  void PrepareOpDesc(cpp::OpDesc* op_desc) {
+    op_desc->SetType("clip");
+    op_desc->SetInput("X", {x_});
+    op_desc->SetOutput("Out", {out_});
+    if (use_minmax_tensor_) {
+      op_desc->SetInput("Min", {min_tensor_});
+      op_desc->SetInput("Max", {max_tensor_});
+      op_desc->SetAttr("min", 0.f);
+      op_desc->SetAttr("max", 0.f);
+    } else {
+      op_desc->SetAttr("min", min_);
+      op_desc->SetAttr("max", max_);
+    }
+  }
+
+  void PrepareData() override {
+    std::vector<float> x_data(x_dims_.production());
+    for (int i = 0; i < x_dims_.production(); i++) {
+      float sign = i % 3 == 0 ? -1.0f : 1.0f;
+      x_data[i] = sign * static_cast<float>(i % 128) * 0.013f + 0.001;
+    }
+    SetCommonTensor(x_, x_dims_, x_data.data());
+
+    if (use_minmax_tensor_) {
+      std::vector<float> min_data = {min_};
+      SetCommonTensor(
+          min_tensor_, DDim(std::vector<int64_t>({1})), min_data.data());
+
+      std::vector<float> max_data = {max_};
+      SetCommonTensor(
+          max_tensor_, DDim(std::vector<int64_t>({1})), max_data.data());
+    }
+  }
+};
+
+TEST(Clip, precision) {
+  LOG(INFO) << "test clip op";
+#ifdef LITE_WITH_ARM
+  Place place(TARGET(kARM));
+
+  float min = -1;
+  float max = 1;
+  for (int n : {1, 3}) {
+    for (int c : {3, 5}) {
+      for (int h : {5, 6}) {
+        for (int w : {6, 7}) {
+          for (bool use_minmax_tensor : {true, false}) {
+            std::unique_ptr<arena::TestCase> tester(new ClipComputeTester(
+                place, "def", n, c, h, w, min, max, use_minmax_tensor));
+            arena::Arena arena(std::move(tester), place, 2e-5);
+            arena.TestPrecision();
+          }
+        }
+      }
+    }
+  }
+#endif
+}
+
+}  // namespace lite
+}  // namespace paddle