From 6fa059b21091efbc64ca6997200640e06231b78b Mon Sep 17 00:00:00 2001
From: hanbuhe <hanbuhe@baidu.com>
Date: Mon, 13 Aug 2018 17:19:29 +0800
Subject: [PATCH] added test_tensor_qunat executable

---
 src/fpga/fpga_quantilization.cpp            | 48 +++++++++++++--------
 src/operators/kernel/fpga/concat_kernel.cpp |  4 +-
 src/operators/kernel/fpga/conv_kernel.cpp   | 38 ++++++++++++++++
 test/CMakeLists.txt                         |  8 ++++
 test/fpga/test_tensor_quant.cpp             |  2 +-
 5 files changed, 79 insertions(+), 21 deletions(-)
 create mode 100644 src/operators/kernel/fpga/conv_kernel.cpp
diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp
index 5bbf4f254d..34033a60a6 100644
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
@@ -37,45 +37,57 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
   }
 }
 
+template <typename Dtype>
+static Dtype find_max(Dtype* data, int num) {
+  Dtype max = 0;
+  for (int i = 0; i < num; ++i) {
+    max = std::max(max, data[i]);
+  }
+  return max;
+}
+
 template <typename Dtype>
 framework::Tensor* quantilize_filter(framework::Tensor* filter) {
   float scale = 0;
-  float max = 0f;
+  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
 
   const int batch_size = filter->dims()[0];
   const int channel = filter->dims()[1];
   const int height = filter->dims()[2];
   const int width = filter->dims()[3];
 
+  int8_t* int_data = nullptr;
+  int8_t* tmp_data = new int[filter->numel()];
+
   // 32bit filter -> 8bit filter;
   if (filter->type() == typeid(float)) {
     float* float_data = filter->data<float>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      max = std::max(max, float_data[i]);
-    }
+    float max = find_max(float_data, filter->numel());
 
-    float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
-    float float_range = max;
-    scale = (float_range / fix_range);
+    scale = (max / fix_range);
 
     framework::Tensor* filter = filter;
     framework::Tensor* quant_filter = new framework::Tensor();
-    int8_t* temp = new int8_t[filter->numel()];
-    int8_t* int_data = quant_filter->mutable_data<int8_t>();
+
+    int_data = quant_filter->mutable_data<int8_t>();
     for (int i = 0; i < filter->numel(); ++i) {
-      temp[i] = (int8_t)float_data[i] * scale;
+      tmp_data[i] = (int8_t)float_data[i] * scale;
     }
-    quant_filter.scale = scale;
-    // NCHW -> NHWC;
-    chw_to_hwc<int8_t>(temp, int_data, in_batch_size, channel, height, width);
-    return quantFilter;
-  } else if (filter->type() == typeid(int8_t)) {
-    // model is already quantilized
-    int8_t* int_data = filter->data<int8_t>();
+    filter = quant_filter;
+  } else {
+    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
+    scale = (max / fix_range);
+
+    int_data = filter->data<int8_t>();
     for (int i = 0; i < filter->numel(); ++i) {
-      max = std::max(max, int_data[i]);
+      tmp_data[i] = int_data[i];
     }
+    int_data = filter->mutable_data<int8_t>();
   }
+  // NCHW -> NHWC;
+  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
+  delete tmp_data;
+  *(filter->fpga_args().scale_pointer()) = scale;
   return filter;
 }
 
diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp
index ae1270b146..c6e04787a5 100644
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -36,18 +36,18 @@ void ConcatKernel<FPGA, float>::Compute(const ConcatParam &param) const {
   auto out_channel = out_dim[3];
 
   auto out_offset = 0;
-
   for (int i = 0; i < inputs.size(); ++i) {
     auto input = inputs[i];
     auto channels = input->dims()[3];
     out_offset += channels;
     auto src = input->data<half>();
     for (int j = 0; j < pixels; ++j) {
-      auto dst = out->data<half>() + out_offset;
+      auto dst = out->mutable_data<half>() + out_offset;
       memory::Copy(dst, src, sizeof(half));
     }
   }
 }
+template class ConcatKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp
new file mode 100644
index 0000000000..91d0f393fc
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONV_OP
+
+#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
+  return true;
+}
+
+template <>
+void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {
+  // ConvCompute<float>(param);
+}
+
+template class ConvKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ee1d12bfd6..f4a14f1bc4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -160,4 +160,12 @@ else ()
 
     #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
 
+
+
+endif()
+
+if(FPGA)
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+
 endif()
diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp
index 1e30b9be55..3835c395a4 100644
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -20,7 +20,7 @@ int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
   bool optimize = false;
   if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time2 = time();
+    auto time1 = time();
     DLOG << "load cost: " << time_diff(time1, time1) << "ms";
     std::vector<float> input;
     std::vector<int64_t> dims{1, 3, 224, 224};
-- 
GitLab