added test_tensor_qunat executable

6fa059b2 · hanbuhe · a5042501 · 6fa059b2 · 6fa059b2 · 6fa059b2
5 changed file
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
@@ -37,45 +37,57 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
  }
 }

+template <typename Dtype>
+static Dtype find_max(Dtype* data, int num) {
+  Dtype max = 0;
+  for (int i = 0; i < num; ++i) {
+    max = std::max(max, data[i]);
+  }
+  return max;
+}
+
 template <typename Dtype>
 framework::Tensor* quantilize_filter(framework::Tensor* filter) {
  float scale = 0;
-  float max = 0f;
+  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);

  const int batch_size = filter->dims()[0];
  const int channel = filter->dims()[1];
  const int height = filter->dims()[2];
  const int width = filter->dims()[3];

+  int8_t* int_data = nullptr;
+  int8_t* tmp_data = new int[filter->numel()];
+
  // 32bit filter -> 8bit filter;
  if (filter->type() == typeid(float)) {
    float* float_data = filter->data<float>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      max = std::max(max, float_data[i]);
-    }
+    float max = find_max(float_data, filter->numel());

-    float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
-    float float_range = max;
-    scale = (float_range / fix_range);
+    scale = (max / fix_range);

    framework::Tensor* filter = filter;
    framework::Tensor* quant_filter = new framework::Tensor();
-    int8_t* temp = new int8_t[filter->numel()];
-    int8_t* int_data = quant_filter->mutable_data<int8_t>();
+
+    int_data = quant_filter->mutable_data<int8_t>();
    for (int i = 0; i < filter->numel(); ++i) {
-      temp[i] = (int8_t)float_data[i] * scale;
+      tmp_data[i] = (int8_t)float_data[i] * scale;
    }
-    quant_filter.scale = scale;
-    // NCHW -> NHWC;
-    chw_to_hwc<int8_t>(temp, int_data, in_batch_size, channel, height, width);
-    return quantFilter;
-  } else if (filter->type() == typeid(int8_t)) {
-    // model is already quantilized
-    int8_t* int_data = filter->data<int8_t>();
+    filter = quant_filter;
+  } else {
+    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
+    scale = (max / fix_range);
+
+    int_data = filter->data<int8_t>();
    for (int i = 0; i < filter->numel(); ++i) {
-      max = std::max(max, int_data[i]);
+      tmp_data[i] = int_data[i];
    }
+    int_data = filter->mutable_data<int8_t>();
  }
+  // NCHW -> NHWC;
+  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
+  delete tmp_data;
+  *(filter->fpga_args().scale_pointer()) = scale;
  return filter;
 }


--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -36,18 +36,18 @@ void ConcatKernel<FPGA, float>::Compute(const ConcatParam &param) const {
  auto out_channel = out_dim[3];

  auto out_offset = 0;
-
  for (int i = 0; i < inputs.size(); ++i) {
    auto input = inputs[i];
    auto channels = input->dims()[3];
    out_offset += channels;
    auto src = input->data<half>();
    for (int j = 0; j < pixels; ++j) {
-      auto dst = out->data<half>() + out_offset;
+      auto dst = out->mutable_data<half>() + out_offset;
      memory::Copy(dst, src, sizeof(half));
    }
  }
 }
+template class ConcatKernel<FPGA, float>;

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONV_OP
+
+#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
+  return true;
+}
+
+template <>
+void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {
+  // ConvCompute<float>(param);
+}
+
+template class ConvKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -160,4 +160,12 @@ else ()

    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)

+
+
+endif()
+
+if(FPGA)
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+
 endif()
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -20,7 +20,7 @@ int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  bool optimize = false;
  if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time2 = time();
+    auto time1 = time();
    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
    std::vector<float> input;
    std::vector<int64_t> dims{1, 3, 224, 224};