Merge branch 'shixiaowei02/calib' into 'incubate/lite'

add calib kernel See merge request inference/paddlelite!34

Merge branch 'shixiaowei02/calib' into 'incubate/lite'
add calib kernel See merge request inference/paddlelite!34
dfbc4b50 · shixiaowei02 · d3c0d4be · 32e977af · dfbc4b50 · d3c0d4be
12 changed file
--- a/paddle/fluid/lite/arm/math/CMakeLists.txt
+++ b/paddle/fluid/lite/arm/math/CMakeLists.txt
@@ -16,7 +16,7 @@ cc_library(math_arm SRCS
    elementwise.cc
    concat.cc
    sgemv.cc
-    type_trans.cpp
+    type_trans.cc
    conv_impl.cc
    conv_direct_3x3s1.cc
    conv_direct_3x3s2.cc

--- a/paddle/fluid/lite/arm/math/type_trans.cpp
+++ b/paddle/fluid/lite/arm/math/type_trans.cpp
--- a/paddle/fluid/lite/core/op_registry.cc
+++ b/paddle/fluid/lite/core/op_registry.cc
@@ -96,6 +96,7 @@ KernelRegistry::KernelRegistry()
  INIT_FOR(kX86, kAny, kAny);

  INIT_FOR(kARM, kFloat, kNCHW);
+  INIT_FOR(kARM, kInt8, kNCHW);
  INIT_FOR(kARM, kAny, kNCHW);
  INIT_FOR(kARM, kAny, kAny);
 #undef INIT_FOR

--- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt
@@ -16,6 +16,7 @@ cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_a
 cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
 cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
 cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
+cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
 cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)

 lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
@@ -30,6 +31,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm)
 lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm)
 lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm)
 lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm)
+lite_cc_test(test_calib_compute_arm SRCS calib_compute_test.cc DEPS calib_compute_arm)
 lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm)

 set(arm_kernels

--- a/paddle/fluid/lite/kernels/arm/calib_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/calib_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/calib_compute.h"
+#include <vector>
+#include "paddle/fluid/lite/arm/math/type_trans.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+void CalibCompute::Run() {
+  auto& param = this->Param<operators::CalibParam>();
+  std::vector<float> scale = {param.in_scale};
+  if (param.in_dtype == PRECISION(kFloat) &&
+      param.out_dtype == PRECISION(kInt8)) {
+    const auto* din = param.input->data<float>();
+    auto* dout = param.output->mutable_data<signed char>();
+    lite::arm::math::fp32_to_int8(din, dout, scale.data(), 1, 1,
+                                  param.input->numel());
+    return;
+  }
+  if (param.in_dtype == PRECISION(kInt8) &&
+      param.out_dtype == PRECISION(kFloat)) {
+    const auto* din = param.input->data<signed char>();
+    auto* dout = param.output->mutable_data<float>();
+    lite::arm::math::int8_to_fp32(din, dout, scale.data(), 1, 1,
+                                  param.input->numel());
+    return;
+  }
+  LOG(FATAL) << "Unsupport Dtype.";
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(calib, kARM, kInt8, kNCHW,
+                     paddle::lite::kernels::arm::CalibCompute, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/calib_compute.h
+++ b/paddle/fluid/lite/kernels/arm/calib_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/operators/calib_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class CalibCompute : public KernelLite<TARGET(kARM), PRECISION(kInt8)> {
+ public:
+  using param_t = operators::CalibParam;
+
+  void Run() override;
+
+  ~CalibCompute() override{};
+
+ private:
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/kernels/arm/calib_compute_test.cc
+++ b/paddle/fluid/lite/kernels/arm/calib_compute_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/calib_compute.h"
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <utility>
+#include <vector>
+#include "paddle/fluid/lite/arm/math/funcs.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+static int get_rand(int start, int end) {
+  int i = rand();  // NOLINT
+  i = (i % (end - start)) + start;
+  return i;
+}
+
+static void int8_to_fp32_basic(const int8_t* din, float* dout,
+                               const float* scale, int axis_size,
+                               int64_t outer_size, int64_t inner_size) {
+  int loop_size = axis_size * outer_size;
+  for (int i = 0; i < loop_size; ++i) {
+    float scale_in = scale[i % axis_size];
+    for (int j = 0; j < inner_size; ++j) {
+      dout[j] = din[j] * scale_in;
+    }
+    dout += inner_size;
+    din += inner_size;
+  }
+}
+
+static void fp32_to_int8_basic(const float* din, int8_t* dout,
+                               const float* scale, int axis_size,
+                               int64_t outer_size, int64_t inner_size) {
+  int loop_size = axis_size * outer_size;
+  for (int i = 0; i < loop_size; ++i) {
+    float inv_scale = 1.f / scale[i % axis_size];
+    for (int j = 0; j < inner_size; ++j) {
+      dout[j] = static_cast<int8_t>(roundf(din[j] * inv_scale));
+    }
+    dout += inner_size;
+    din += inner_size;
+  }
+}
+
+void calib_ref(const operators::CalibParam& param) {
+  std::vector<float> scale = {param.in_scale};
+  if (param.in_dtype == PRECISION(kFloat) &&
+      param.out_dtype == PRECISION(kInt8)) {
+    const auto* din = param.input->data<float>();
+    auto* dout = param.output->mutable_data<signed char>();
+    fp32_to_int8_basic(din, dout, scale.data(), 1, 1, param.input->numel());
+    return;
+  }
+  if (param.in_dtype == PRECISION(kInt8) &&
+      param.out_dtype == PRECISION(kFloat)) {
+    const auto* din = param.input->data<signed char>();
+    auto* dout = param.output->mutable_data<float>();
+    int8_to_fp32_basic(din, dout, scale.data(), 1, 1, param.input->numel());
+    return;
+  }
+  LOG(FATAL) << "Unsupport Dtype.";
+}
+
+TEST(calib_arm, retrive_op) {
+  auto calib =
+      KernelRegistry::Global()
+          .Create<TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)>("calib");
+  ASSERT_FALSE(calib.empty());
+  ASSERT_TRUE(calib.front());
+}
+
+TEST(calib_arm, init) {
+  CalibCompute calib;
+  ASSERT_EQ(calib.precision(), PRECISION(kInt8));
+  ASSERT_EQ(calib.target(), TARGET(kARM));
+}
+
+TEST(calib_arm, int8_to_fp32) {
+  DeviceInfo::Init();
+  for (auto n : {1, 2}) {
+    for (auto c : {6, 32 /*, 128*/}) {
+      for (auto h : {9, 18 /*, 56 , 112, 224, 512*/}) {
+        for (auto w : {9, 18 /*, 56, 112, 224, 512*/}) {
+          Tensor x;
+          Tensor output;
+          Tensor output_ref;
+          // set the dims of input, output, ref output tensors
+          x.Resize({n, c, h, w});
+          output.Resize({n, c, h, w});
+          output_ref.Resize({n, c, h, w});
+          // initialize the data of input tensors
+          auto* x_data = x.mutable_data<char>();
+          auto* output_data = output.mutable_data<float>();
+          for (int i = 0; i < x.dims().production(); i++) {
+            float sign = i % 3 == 0 ? -1.0f : 1.0f;
+            x_data[i] = sign * static_cast<float>(i % 128) * 0.013f;
+          }
+          // prepare kernel params and run
+          CalibCompute calib;
+          std::unique_ptr<KernelContext> ctx(new KernelContext);
+          ctx->As<ARMContext>();
+          calib.SetContext(std::move(ctx));
+          operators::CalibParam param;
+          param.in_scale = get_rand(0, 100) * 0.1f;
+          param.in_dtype = PRECISION(kInt8);
+          param.out_dtype = PRECISION(kFloat);
+          param.input = &x;
+          param.output = &output;
+          calib.SetParam(param);
+          calib.Launch();
+          // invoking ref implementation and compare results
+          param.output = &output_ref;
+          calib_ref(param);
+          auto* output_ref_data = output_ref.mutable_data<float>();
+          for (int i = 0; i < output.dims().production(); i++) {
+            EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5);
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, def);
--- a/paddle/fluid/lite/operators/CMakeLists.txt
+++ b/paddle/fluid/lite/operators/CMakeLists.txt
@@ -21,6 +21,7 @@ cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
 cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
 cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
 cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS})
+cc_library(calib_op_lite SRCS calib_op.cc DEPS ${op_DEPS})
 cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
 cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS})
 cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
@@ -46,6 +47,7 @@ set(ops_lite
        activation_ops_lite
        dropout_op_lite
        concat_op_lite
+        calib_op_lite
        split_op_lite
        transpose_op_lite
        fake_quant
@@ -64,6 +66,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m
 lite_cc_test(test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite)
 lite_cc_test(test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite)
 lite_cc_test(test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite)
+lite_cc_test(test_calib_op_lite SRCS calib_op_test.cc DEPS calib_op_lite memory_lite ARM_DEPS calib_compute_arm)
 lite_cc_test(test_fusion_elementwise_activation_ops_lite 
             SRCS fusion_elementwise_activation_ops_test.cc 
             DEPS fusion_elementwise_activation_ops_lite memory_lite)

--- a/paddle/fluid/lite/operators/calib_op.cc
+++ b/paddle/fluid/lite/operators/calib_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/operators/calib_op.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool CalibOpLite::CheckShape() const {
+  CHECK_OR_FALSE(param_.input);
+  CHECK_OR_FALSE(param_.output);
+  return true;
+}
+bool CalibOpLite::InferShape() const {
+  param_.output->Resize(param_.input->dims());
+  return true;
+}
+
+bool CalibOpLite::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
+  auto x_var = scope->FindVar(opdesc.Input("Input").front());
+  auto output_var = scope->FindVar(opdesc.Output("Out").front());
+  CHECK(x_var);
+  CHECK(output_var);
+  param_.input = const_cast<lite::Tensor *>(&(x_var->Get<lite::Tensor>()));
+  param_.output = output_var->GetMutable<lite::Tensor>();
+  std::vector<std::string> input_arg_names = opdesc.InputArgumentNames();
+  param_.in_dtype =
+      static_cast<lite::PrecisionType>(opdesc.GetAttr<int>("in_dtype"));
+  param_.out_dtype =
+      static_cast<lite::PrecisionType>(opdesc.GetAttr<int>("out_dtype"));
+  if (opdesc.HasAttr("in_scale")) {
+    param_.in_scale = opdesc.GetAttr<float>("in_scale");
+  }
+  CHECK(param_.input) << "Input(X) of CalibOp should not be null.";
+  CHECK(param_.output) << "Output(Out) of CalibOp should not be null.";
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(calib, paddle::lite::operators::CalibOpLite);
--- a/paddle/fluid/lite/operators/calib_op.h
+++ b/paddle/fluid/lite/operators/calib_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_lite.h"
+#include "paddle/fluid/lite/core/scope.h"
+#include "paddle/fluid/lite/operators/op_params.h"
+#include "paddle/fluid/lite/utils/all.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+/*
+ * The data types used by the two adjacent layers in the model should
+ * be the same. When the two operators accept different data types,
+ * we may need to implicitly add a data type conversion operator.
+ * Currently, this operator only supports mutual conversion of int8
+ * and float32 types.
+ */
+class CalibOpLite : public OpLite {
+ public:
+  CalibOpLite() {}
+
+  explicit CalibOpLite(const std::string &type) : OpLite(type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShape() const override;
+
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope);
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+
+  std::string DebugString() const override { return "calib"; }
+
+ private:
+  mutable CalibParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/operators/calib_op_test.cc
+++ b/paddle/fluid/lite/operators/calib_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/operators/calib_op.h"
+#include <gtest/gtest.h>
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+#ifdef LITE_WITH_ARM
+TEST(calib_op_lite, TestARM) {
+  // prepare variables
+  Scope scope;
+  auto* x = scope.Var("Input")->GetMutable<Tensor>();
+  auto* output = scope.Var("output")->GetMutable<Tensor>();
+  x->Resize(DDim(std::vector<int64_t>({1, 10, 20})));
+  output->Resize(DDim(std::vector<int64_t>{1, 10, 20}));
+
+  // set data
+  for (int i = 0; i < 10 * 20; i++) {
+    x->mutable_data<float>()[i] = i;
+  }
+  for (int i = 0; i < 10 * 20; i++) {
+    output->mutable_data<float>()[i] = 0.;
+  }
+
+  // prepare op desc
+  cpp::OpDesc desc;
+  desc.SetType("calib");
+  desc.SetInput("Input", {"Input"});
+  desc.SetOutput("Out", {"output"});
+  desc.SetAttr("in_dtype", static_cast<int>(PRECISION(kInt8)));
+  desc.SetAttr("out_dtype", static_cast<int>(PRECISION(kFloat)));
+  desc.SetAttr("in_scale", 10.0f);
+
+  CalibOpLite calib("calib");
+
+  calib.SetValidPlaces({Place{TARGET(kARM), PRECISION(kInt8)}});
+  calib.Attach(desc, &scope);
+  auto kernels = calib.CreateKernels({Place{TARGET(kARM), PRECISION(kInt8)}});
+  ASSERT_FALSE(kernels.empty());
+}
+#endif
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+#ifdef LITE_WITH_ARM
+USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, def);
+#endif
--- a/paddle/fluid/lite/operators/op_params.h
+++ b/paddle/fluid/lite/operators/op_params.h
@@ -48,6 +48,14 @@ struct IoCopyParam {
  lite::Tensor* y{};
 };

+struct CalibParam {
+  const lite::Tensor* input{};
+  lite::Tensor* output{};
+  float in_scale;
+  PrecisionType in_dtype;
+  PrecisionType out_dtype;
+};
+
 /// -------------------------- NN operators ------------------------------------

 struct FcParam {