From 0cfbd26640f07b9e0d1f8ff168c5b139b14723fb Mon Sep 17 00:00:00 2001
From: juncaipeng <52520497+juncaipeng@users.noreply.github.com>
Date: Wed, 28 Aug 2019 19:27:52 +0800
Subject: [PATCH] Modify cast op and remove warning in argmax_test (#1894)

* modify cast op, test=develop

* modify cast op and remove warning in argmax_test, test=develop
---
 lite/kernels/arm/cast_compute.cc          | 16 +++-
 lite/operators/CMakeLists.txt             |  2 +-
 lite/tests/kernels/CMakeLists.txt         |  1 +
 lite/tests/kernels/argmax_compute_test.cc |  2 +-
 lite/tests/kernels/cast_compute_test.cc   | 89 +++++++++++++++++++++++
 5 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 lite/tests/kernels/cast_compute_test.cc
diff --git a/lite/kernels/arm/cast_compute.cc b/lite/kernels/arm/cast_compute.cc
index ad4cc82d3a..e67151650a 100644
--- a/lite/kernels/arm/cast_compute.cc
+++ b/lite/kernels/arm/cast_compute.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "lite/kernels/arm/cast_compute.h"
+#include <algorithm>
 #include "lite/arm/math/funcs.h"
 
 namespace paddle {
@@ -20,6 +21,11 @@ namespace lite {
 namespace kernels {
 namespace arm {
 
+template <class in_type, class out_type>
+out_type TransOp(in_type in) {
+  return static_cast<in_type>(in);
+}
+
 void CastCompute::PrepareForRun() {}
 
 void CastCompute::Run() {
@@ -28,11 +34,17 @@ void CastCompute::Run() {
 
   auto input_dims = param.X->dims();
 
-  if (param.in_dtype == param.out_dtype && param.in_dtype == 2 ||
-      param.in_dtype == 0) {
+  // BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6;
+  // SIZE_T = 19;UINT8 = 20;INT8 = 21;
+  if (param.in_dtype == param.out_dtype && param.in_dtype == 2) {
     const auto* x_data = param.X->data<float>();
     auto* o_data = param.Out->mutable_data<float>();
     memcpy(o_data, x_data, sizeof(float) * param.X->numel());
+  } else if (param.in_dtype == 21 && param.out_dtype == 5) {  // int8->float32
+    const char* x_data_begin = param.X->data<char>();
+    const char* x_data_end = x_data_begin + param.X->numel();
+    float* out_data = param.Out->mutable_data<float>();
+    std::transform(x_data_begin, x_data_end, out_data, TransOp<char, float>);
   } else {
     LOG(FATAL) << "other has not been implemented";
   }
diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
index 89185beaf7..81d4d05e36 100644
--- a/lite/operators/CMakeLists.txt
+++ b/lite/operators/CMakeLists.txt
@@ -59,6 +59,7 @@ add_operator(shape_op_lite basic SRCS shape_op.cc DEPS ${op_DEPS})
 add_operator(sequence_expand_op_lite basic SRCS sequence_expand_op.cc DEPS ${op_DEPS})
 add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS})
 add_operator(im2sequence_op basic SRCS im2sequence_op.cc DEPS ${op_DEPS})
+add_operator(cast_op_lite basic SRCS cast_op.cc DEPS ${op_DEPS})
 add_operator(assign_op basic SRCS assign_op.cc DEPS ${op_DEPS})
 
 # for OCR specific
@@ -81,7 +82,6 @@ add_operator(beam_search_op extra SRCS beam_search_op.cc DEPS ${op_DEPS})
 add_operator(sequence_pool_op_lite extra SRCS sequence_pool_op.cc DEPS ${op_DEPS})
 add_operator(lod_reset_op extra SRCS lod_reset_op.cc DEPS ${op_DEPS})
 add_operator(is_empty extra SRCS is_empty_op.cc DEPS ${op_DEPS})
-add_operator(cast_op_lite extra SRCS cast_op.cc DEPS ${op_DEPS})
 add_operator(slice_op_lite extra SRCS slice_op.cc DEPS ${op_DEPS})
 add_operator(write_to_array_op extra SRCS write_to_array_op.cc DEPS ${op_DEPS})
 add_operator(topk_op extra SRCS topk_op.cc DEPS ${op_DEPS})
diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt
index de3a95dea6..559f7ee76b 100644
--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -13,6 +13,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
     lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_conv2d_transpose_compute SRCS conv2d_transpose_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     #lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     #lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
diff --git a/lite/tests/kernels/argmax_compute_test.cc b/lite/tests/kernels/argmax_compute_test.cc
index 5323259dff..49cbd91071 100644
--- a/lite/tests/kernels/argmax_compute_test.cc
+++ b/lite/tests/kernels/argmax_compute_test.cc
@@ -43,7 +43,7 @@ class ArgmaxComputeTester : public arena::TestCase {
   void RunBaseline(Scope* scope) override {
     auto* out = scope->NewTensor(output_);
     CHECK(out);
-    int nchw[] = {dims_[0], dims_[1], dims_[2], dims_[3]};
+    int64_t nchw[] = {dims_[0], dims_[1], dims_[2], dims_[3]};
     std::vector<int64_t> output_shape(nchw, nchw + 4);
     output_shape.erase(output_shape.begin() + axis_);
     DDim output_dims(output_shape);
diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc
new file mode 100644
index 0000000000..f000ea1d71
--- /dev/null
+++ b/lite/tests/kernels/cast_compute_test.cc
@@ -0,0 +1,89 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/core/arena/framework.h"
+
+namespace paddle {
+namespace lite {
+
+class CastComputeTester : public arena::TestCase {
+ protected:
+  // common attributes for this op.
+  std::string input_ = "x";
+  std::string output_ = "out";
+  int in_dtype_ = 21;
+  int out_dtype_ = 5;
+  DDim x_dims_{{2, 2, 2, 2}};
+
+ public:
+  CastComputeTester(const Place& place, const std::string& alias)
+      : TestCase(place, alias) {}
+
+  void RunBaseline(Scope* scope) override {
+    auto* out = scope->NewTensor(output_);
+    CHECK(out);
+    out->Resize(x_dims_);
+    auto* output_data = out->mutable_data<float>();
+
+    auto* x = scope->FindTensor(input_);
+    const auto* x_data = x->data<char>();
+
+    int num = x_dims_[0];
+    int channel = x_dims_[1];
+    int size = x_dims_[2] * x_dims_[3];
+    int in_channel = channel * size;
+
+    auto* output_data_tmp = output_data;
+    auto* x_data_tmp = x_data;
+    for (int i = 0; i < x_dims_.production(); i++) {
+      *output_data_tmp = static_cast<float>(*x_data_tmp);
+      output_data_tmp++;
+      x_data_tmp++;
+    }
+  }
+
+  void PrepareOpDesc(cpp::OpDesc* op_desc) {
+    op_desc->SetType("cast");
+    op_desc->SetInput("X", {input_});
+    op_desc->SetOutput("Out", {output_});
+    op_desc->SetAttr("in_dtype", in_dtype_);
+    op_desc->SetAttr("out_dtype", out_dtype_);
+  }
+
+  void PrepareData() override {
+    std::vector<char> x_data(x_dims_.production());
+    for (int i = 0; i < x_dims_.production(); i++) {
+      float sign = i % 3 == 0 ? -1.0f : 1.0f;
+      x_data[i] = sign * static_cast<char>(i % 128);
+    }
+    SetCommonTensor(input_, x_dims_, x_data.data());
+  }
+};
+
+TEST(Cast, precision) {
+  LOG(INFO) << "test cast op";
+#ifdef LITE_WITH_ARM
+  Place place(TARGET(kARM));
+
+  std::unique_ptr<arena::TestCase> tester(new CastComputeTester(place, "def"));
+  arena::Arena arena(std::move(tester), place, 2e-5);
+  arena.TestPrecision();
+#endif
+}
+
+}  // namespace lite
+}  // namespace paddle
-- 
GitLab